<?php /* --- ۞---> text { encoding:utf-8;bom:no;linebreaks:unix;tabs:4sp; } */ if (realpath ($_SERVER['SCRIPT_FILENAME']) == realpath (__FILE__)) { die ( 'to err is human, human!' ); } /* Text Functions v0.3.2
Some useful text functions This script is usually included from /inc/init.php
;o)
(c) copyright 2003->tomorrow! cor + corz.org */
/* function get_rand_word() grab a random word from a blog file. */ function get_rand_word ($filename) { if (!is_readable($filename)) { return 'unknown'; } $file_contents = implode('', file($filename)); $data = explode('<!--*end*-->', $file_contents); $this_data = $data[rand(0, (count($data)-2))]; $data_entry = explode('<!--*g*-->', $this_data); $datatext = strip_stops($data_entry[2]); $data_words = explode(' ', $datatext); $words = array_unique($data_words); sort($words); shuffle($words); $my_random_word = $words[rand(0,(count($words)-1))]; return $my_random_word; }
/* function:strip_stops() remove unwanted ('stop') characters from an input.. */ function strip_stops($string) {
$string = strip_tags($string);
// stuff $stoppers = array ('.',',',':','|','"','\\','/','?','*','~','#','%','$','(',')','[',']','{','}','-','_','=','+' ,'*','','@','\\'.'$',"","\r\n","\r","\n","\t",'','',' ','©','®','&' ,'>','<','™','','',"!" );
while (list($key, $val) = each($stoppers)) { $string = str_replace($val, ' ', $string); } $string = str_replace("\t",' ',$string); $string = str_replace(' ',' ',$string); $string = str_replace(' ',' ',$string); $string = strtolower($string);
// stop-words $stop_words = array ('' // some of these are probably not necessary ,';o','a','A','all','and','are','at','be','but','by','can','do','don\'t' ,'for','got','have','he','here','I','in','if','is','it','like','me','my','n','no' ,'o','of','on','one','or','out','she','so','t','than','then','that','that\'s' ,'the','The','there','there\'s','these','this','to','too','was','we','with','you' );
$words = explode (' ', $string); $qs = count($stop_words); reset($words);
while (list($key, $val) = each($words)) { for ($i=0;$i<$qs;$i++) { if (($words[$key] == $stop_words[$i]) or (strlen($words[$key]) < 4)){ unset($words[$key]); break 1; } } } $string = implode(' ', $words); return $string;
}/* end function strip_stuff() */
/* XSS Clean Slightly improved version of xss_sponge(). */
function xss_clean($data) {
// skip any null or non string values if (is_null($data) || !is_string($data)) { return false; }
if (get_magic_quotes_gpc()) { $string = stripslashes($string); }
// fix &entity\n; $data = str_replace(array('&','<','>'), array('&amp;','&lt;','&gt;'), $data);
// URL decode $data = urldecode($data);
// convert Hexadecimals $data = preg_replace('!(&#|\\\)[xX]([0-9a-fA-F]+);?!e','chr(hexdec("$2"))', $data);
// clean up entities $data = preg_replace('!(�+[0-9]+)!','$1;',$data); $data = html_entity_decode($data, ENT_NOQUOTES, 'UTF-8'); $data = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $data); $data = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $data); $data = html_entity_decode($data, ENT_COMPAT, 'UTF-8');
// remove any attribute starting with "on" or xmlns $data = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $data);
// remove javascript: and vbscript: protocols $data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data); $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data); $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data);
// only works in IE: <span style="width: expression(alert('Ping!'));"></span> $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data); $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data); $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data);
// remove namespaced elements (we do not need them) $data = preg_replace('#</*\w+:\w[^>]*+>#i', '', $data);
do { // remove really unwanted tags $old_data = $data; $data = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $data); } while ($old_data !== $data);
// we are done... return $data; }
/* integers to words.
converts 1145432 into "one million, one hundred and forty five thousand, four hundred and thirty two" fairly groovy.
Requires bignumbers(below)
*/ function int2eng($number) {
$output = ''; if ($number < 1) $number = 1;
$units = array(' ','one ','two ','three ','four ','five ','six ','seven ','eight ','nine '); $teens = array('ten ', 'eleven ','twelve ','thirteen ','fourteen ','fifteen ','sixteen ', 'seventeen ','eighteen ','nineteen '); $tenners = array('', '','twenty ','thirty ','fourty ','fifty ','sixty ','seventy ','eighty ', 'ninety ');
$lint = strlen($number); if ($lint > 2) $bigger = true;
for ($x = $lint ; $x >= 1 ; $x--) {
$last = substr($output, -5,4); $digit = substr($number, 0, 1); $number = substr($number, 1);
if ($x % 3 == 2) {
if ($digit == 1) { // 10-19.. $digit = substr($number, 0, 1); $number = substr($number, 1); $x--; if ($last == 'sand') { $output .= 'and '; } $output .= $teens[$digit];
} else { // 20-99..
if (($last == 'sand') ) { $output .= 'and '; } $output .= $tenners[$digit]; } } else { if (($x % 3 != 1) and ($digit > 0) and (!empty($output))) { $output .= ', '; } $output .= $units[$digit]; } if ((strlen($number) % 3) == 0) { $bignum = bignumbers(strlen($number) / 3); if (($last == 'dred') and ($bignum != 'thousand')) { $output .= 'and '; } $output .= $bignum; } if ((strlen($number) % 3) == 2 and $digit > 0) { $output .= 'hundred and '; } }
// clean up the output.. $output = str_replace(' ', ' ', $output); $output = str_replace('red and thou', 'red thou', $output); $output = str_replace('red and mill', 'red mill', $output); $output = str_replace('lion thousand', 'lion ', $output); if (substr($output, -5) == ' and ') { $output = substr($output, 0, -5).' '; }
return $output; }
/* it just looks better, okay! */ function bignumbers($test) { switch ($test) { case 0: $test = ""; break; case 1: $test = "thousand"; break; case 2: $test = "million"; break; case 3: $test = "trillion"; // <- that's a lot of comments! break; } return $test; }
// case-insensitive array search.. function in_arrayi($needle, $haystack) { if (!$needle or !is_array($haystack)) { return false; } for($h = 0 ; $h < count($haystack) ; $h++) { $haystack[$h] = strtolower($haystack[$h]); } return in_array(strtolower($needle),$haystack); }
// fuzzy array search! function fuzzy_array_match($words_array, $input, $sensitivity){ $shortest = -1; foreach ($words_array as $word) { $lev = levenshtein($input, $word); if ($lev == 0) { $closest = $word; $shortest = 0; break; } if ($lev <= $shortest || $shortest < 0) { $closest = $word; $shortest = $lev; } } if ($shortest <= $sensitivity){ return $closest; } else { return 0; } }
/* function mail_mash() v0.3
a cuter way to foil the spam-bots
mail_mash will transform email@address.com into a randomly mixed string of real "o" and encoded "o" characters. it's different each time the page loads, but always presents a valid mailto:email@address.com for a human clicker
note: the "mailto:" part is also prepended, mixed in to the randomness, so you don't need to provide that in your html, just <a href="',mail_mash($email_address),'"> from inside a php echo, or put a whole php echo statement inside the href if you are inside plain html.. <a href="<?php echo mail_mash($email_address); ?>">
your@address.com
would output *something like*..
mailto:your@address.com
have fun!
;o)
ps. these days I prefer GD-verify protected mail forms!
*/
/* function mail_mash() */ if (!function_exists('mail_mash')) { function mail_mash($addy) { $addy = 'mailto:'.$addy; for ($i = 0 ; $i < strlen($addy) ; $i++){ $letters[] = $addy[$i]; }
while (list($key, $val) = each($letters)) { $r = rand(0,20); if ($r > 9) { $letters[$key] = '&#'.ord($letters[$key]).';'; } }
$mashed_email_addy = implode('', $letters); return $mashed_email_addy; } }/* end function mail_mash() */
/* make a valid HTML id..
this function exists in the main corzblog functions, but cbparser goes out on its own, so...
This is from cbparser.. */ function make_valid_id_string($title) { $title = str_replace(' ', '-', strip_tags($title)); $id_title = preg_replace("/[^a-z0-9-]*/i", '', $title); while (is_numeric((substr($id_title, 0, 1))) or substr($id_title, 0, 1) == '-') { $id_title = substr($id_title, 1); } return trim(str_replace('--', '-',$id_title)); }
/* Minify a string Originally for minifying style sheets, also handy for other things! */ function minify_string($string) { // $string = preg_replace('!/\*[^*]*\*+([^/][^*]*\*+)*/!', '', $string); // remove CSS/php comments $string = str_replace(array("\r\n", "\r", "\n", "\t", ' ', ' ', ' '), '', $string); return $string; }
?>
|