This snippet finds the commonly used words in a body of text.
<?php
function commonWords($string, $max = null, $file = 'stopwords.txt'){
$handle = fopen($file, 'rb');
$contents = fread($handle, filesize($file));
fclose($handle);
$stopWords = explode("n", $contents);
foreach($stopWords as $key => $val){
$stopWords[$key] = trim($stopWords[$key]);
}
$string = preg_replace('/ss /i', '', $string);
$string = trim($string); // trim the string
$string = preg_replace('/[^a-zA-Z0-9 -]/', '', $string); // only take alphanumerical characters, but keep the spaces and dashes tooâ?¦
$string = strtolower($string); // make it lowercase
preg_match_all('/([a-z]*?)(?= )/i', $string, $matchWords);
$matchWords = $matchWords[0];
foreach ( $matchWords as $key => $item ) {
if ($item == '' || in_array(strtolower($item), $stopWords) || strlen($item) < 3) {
unset($matchWords[$key]);
}
}
$wordCountArr = array();
if ( is_array($matchWords) ) {
foreach ( $matchWords as $key => $val ) {
$val = strtolower($val);
if ( isset($wordCountArr[$val]) ) {
$wordCountArr[$val] ;
} else {
$wordCountArr[$val] = 1;
}
}
}
arsort($wordCountArr);
if($max != null){
$final = array_slice($wordCountArr, 0, $max);
}else{
$final = array_slice($wordCountArr, 0);
}
if(count($final) == 0){
$final = explode(' ', $string);
}
return $final;
}
$str = 'This is a string it has some words and some words are written more than one time. Words are a combination of letters and spaces to make readable text, these form to make sentences, paragraphs, and full bodies of text.';
print_r(commonWords($str, null));
echo '
';
echo '
';
print_r(commonWords($str, 10));
?>