corz.org uses cookies to remember that you've seen this notice explaining that corz.org uses cookies, okay!
<?php
/*
function:strip_stuff()
strips stuff from a string
also removes stop-words, and any words of two characters or less
(c) copyright corz.org 2000->today
*/
function strip_stuff($string) {
$string = strip_tags($string);
// stuff
$stoppers = array
('.',',',':','|','"','\\','/','?','*','~','#','%','$','(',')','[',']','{','}','-','_','=','+'
,'*','£','@','\\'.'$',"Â","\r\n","\r","\n","\t",'»','«',' ','©','®','&'
,'>','<','™','•','°',"!"
);
while (list($key, $val) = each($stoppers)) {
$string = str_replace($val, ' ', $string);
}
$string = str_replace("\t",' ',$string);
$string = str_replace(' ',' ',$string);
$string = str_replace(' ',' ',$string);
$string = strtolower($string);
// stop-words
$stop_words = array ('' // some of these are probably not neccessary
,' ',';o','a','A','all','and','are','at','be','but','by','can','do','don\'t'
,'for','got','have','he','here','I','in','if','is','it','like','me','my','n','no'
,'o','of','on','one','or','out','she','so','t','than','then','that','that\'s'
,'the','The','there','there\'s','these','this','to','too','was','we','with','you'
);
$words = explode (' ', $string);
$qs = count($stop_words);
reset($words);
while (list($key, $val) = each($words)) {
for ($i=0;$i<$qs;$i++) {
if (($words[$key] == $stop_words[$i]) or (strlen($words[$key]) < 3)){
unset($words[$key]);
break 1;
}
}
}
$string = implode(' ', $words);
return $string;
}/*
end function strip_stuff()
*/
?>