corz.org uses cookies to remember that you've seen this notice explaining that corz.org uses cookies, okay!
<?php /* --- ۞---> text { encoding:utf-8;bom:no;linebreaks:unix;tabs:4sp; } */
if (realpath ($_SERVER['SCRIPT_FILENAME']) == realpath (__FILE__)) {
die ( 'to err is human, human!' ); }
/*
Text Functions v0.3.2
Some useful text functions
This script is usually included from /inc/init.php
;o)
(c) copyright 2003->tomorrow! cor + corz.org
*/
/*
function get_rand_word()
grab a random word from a blog file.
*/
function get_rand_word ($filename) {
if (!is_readable($filename)) { return 'unknown'; }
$file_contents = implode('', file($filename));
$data = explode('<!--*end*-->', $file_contents);
$this_data = $data[rand(0, (count($data)-2))];
$data_entry = explode('<!--*g*-->', $this_data);
$datatext = strip_stops($data_entry[2]);
$data_words = explode(' ', $datatext);
$words = array_unique($data_words);
sort($words);
shuffle($words);
$my_random_word = $words[rand(0,(count($words)-1))];
return $my_random_word;
}
/*
function:strip_stops()
remove unwanted ('stop') characters from an input..
*/
function strip_stops($string) {
$string = strip_tags($string);
// stuff
$stoppers = array
('.',',',':','|','"','\\','/','?','*','~','#','%','$','(',')','[',']','{','}','-','_','=','+'
,'*','','@','\\'.'$',"","\r\n","\r","\n","\t",'','',' ','©','®','&'
,'>','<','™','','',"!"
);
while (list($key, $val) = each($stoppers)) {
$string = str_replace($val, ' ', $string);
}
$string = str_replace("\t",' ',$string);
$string = str_replace(' ',' ',$string);
$string = str_replace(' ',' ',$string);
$string = strtolower($string);
// stop-words
$stop_words = array ('' // some of these are probably not necessary
,';o','a','A','all','and','are','at','be','but','by','can','do','don\'t'
,'for','got','have','he','here','I','in','if','is','it','like','me','my','n','no'
,'o','of','on','one','or','out','she','so','t','than','then','that','that\'s'
,'the','The','there','there\'s','these','this','to','too','was','we','with','you'
);
$words = explode (' ', $string);
$qs = count($stop_words);
reset($words);
while (list($key, $val) = each($words)) {
for ($i=0;$i<$qs;$i++) {
if (($words[$key] == $stop_words[$i]) or (strlen($words[$key]) < 4)){
unset($words[$key]);
break 1;
}
}
}
$string = implode(' ', $words);
return $string;
}/*
end function strip_stuff() */
/*
XSS Clean
Slightly improved version of xss_sponge().
*/
function xss_clean($data) {
// skip any null or non string values
if (is_null($data) || !is_string($data)) {
return false;
}
if (get_magic_quotes_gpc()) {
$string = stripslashes($string);
}
// fix &entity\n;
$data = str_replace(array('&','<','>'), array('&amp;','&lt;','&gt;'), $data);
// URL decode
$data = urldecode($data);
// convert Hexadecimals
$data = preg_replace('!(&#|\\\)[xX]([0-9a-fA-F]+);?!e','chr(hexdec("$2"))', $data);
// clean up entities
$data = preg_replace('!(�+[0-9]+)!','$1;',$data);
$data = html_entity_decode($data, ENT_NOQUOTES, 'UTF-8');
$data = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $data);
$data = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $data);
$data = html_entity_decode($data, ENT_COMPAT, 'UTF-8');
// remove any attribute starting with "on" or xmlns
$data = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $data);
// remove javascript: and vbscript: protocols
$data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data);
$data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data);
$data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data);
// only works in IE: <span style="width: expression(alert('Ping!'));"></span>
$data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
$data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
$data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data);
// remove namespaced elements (we do not need them)
$data = preg_replace('#</*\w+:\w[^>]*+>#i', '', $data);
do {
// remove really unwanted tags
$old_data = $data;
$data = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $data);
}
while ($old_data !== $data);
// we are done...
return $data;
}
/*
integers to words.
converts 1145432 into
"one million, one hundred and forty five thousand, four hundred and thirty two"
fairly groovy.
Requires bignumbers(below)
*/
function int2eng($number) {
$output = '';
if ($number < 1) $number = 1;
$units = array(' ','one ','two ','three ','four ','five ','six ','seven ','eight ','nine ');
$teens = array('ten ', 'eleven ','twelve ','thirteen ','fourteen ','fifteen ','sixteen ', 'seventeen ','eighteen ','nineteen ');
$tenners = array('', '','twenty ','thirty ','fourty ','fifty ','sixty ','seventy ','eighty ', 'ninety ');
$lint = strlen($number);
if ($lint > 2) $bigger = true;
for ($x = $lint ; $x >= 1 ; $x--) {
$last = substr($output, -5,4);
$digit = substr($number, 0, 1);
$number = substr($number, 1);
if ($x % 3 == 2) {
if ($digit == 1) { // 10-19..
$digit = substr($number, 0, 1);
$number = substr($number, 1);
$x--;
if ($last == 'sand') { $output .= 'and '; }
$output .= $teens[$digit];
} else { // 20-99..
if (($last == 'sand') ) { $output .= 'and '; }
$output .= $tenners[$digit];
}
} else {
if (($x % 3 != 1) and ($digit > 0) and (!empty($output))) { $output .= ', '; }
$output .= $units[$digit];
}
if ((strlen($number) % 3) == 0) {
$bignum = bignumbers(strlen($number) / 3);
if (($last == 'dred') and ($bignum != 'thousand')) { $output .= 'and '; }
$output .= $bignum;
}
if ((strlen($number) % 3) == 2 and $digit > 0) {
$output .= 'hundred and ';
}
}
// clean up the output..
$output = str_replace(' ', ' ', $output);
$output = str_replace('red and thou', 'red thou', $output);
$output = str_replace('red and mill', 'red mill', $output);
$output = str_replace('lion thousand', 'lion ', $output);
if (substr($output, -5) == ' and ') { $output = substr($output, 0, -5).' '; }
return $output;
}
/*
it just looks better, okay! */
function bignumbers($test) {
switch ($test) {
case 0:
$test = "";
break;
case 1:
$test = "thousand";
break;
case 2:
$test = "million";
break;
case 3:
$test = "trillion"; // <- that's a lot of comments!
break;
}
return $test;
}
// case-insensitive array search..
function in_arrayi($needle, $haystack) {
if (!$needle or !is_array($haystack)) {
return false;
}
for($h = 0 ; $h < count($haystack) ; $h++) {
$haystack[$h] = strtolower($haystack[$h]);
}
return in_array(strtolower($needle),$haystack);
}
// fuzzy array search!
function fuzzy_array_match($words_array, $input, $sensitivity){
$shortest = -1;
foreach ($words_array as $word) {
$lev = levenshtein($input, $word);
if ($lev == 0) {
$closest = $word;
$shortest = 0;
break;
}
if ($lev <= $shortest || $shortest < 0) {
$closest = $word;
$shortest = $lev;
}
}
if ($shortest <= $sensitivity){
return $closest;
} else {
return 0;
}
}
/*
function mail_mash() v0.3
a cuter way to foil the spam-bots
mail_mash will transform email@address.com into a randomly mixed string of real
"o" and encoded "o" characters. it's different each time the page loads,
but always presents a valid mailto:email@address.com for a human clicker
note: the "mailto:" part is also prepended, mixed in to the randomness, so you
don't need to provide that in your html, just <a href="',mail_mash($email_address),'">
from inside a php echo, or put a whole php echo statement inside the href if you
are inside plain html.. <a href="<?php echo mail_mash($email_address); ?>">
your@address.com
would output *something like*..
mailto:your@address.com
have fun!
;o)
ps. these days I prefer GD-verify protected mail forms!
*/
/*
function mail_mash() */
if (!function_exists('mail_mash')) {
function mail_mash($addy) {
$addy = 'mailto:'.$addy;
for ($i = 0 ; $i < strlen($addy) ; $i++){ $letters[] = $addy[$i]; }
while (list($key, $val) = each($letters)) {
$r = rand(0,20);
if ($r > 9) { $letters[$key] = '&#'.ord($letters[$key]).';'; }
}
$mashed_email_addy = implode('', $letters);
return $mashed_email_addy;
}
}/*
end function mail_mash() */
/*
make a valid HTML id..
this function exists in the main corzblog functions,
but cbparser goes out on its own, so...
This is from cbparser..
*/
function make_valid_id_string($title) {
$title = str_replace(' ', '-', strip_tags($title));
$id_title = preg_replace("/[^a-z0-9-]*/i", '', $title);
while (is_numeric((substr($id_title, 0, 1))) or substr($id_title, 0, 1) == '-') {
$id_title = substr($id_title, 1);
}
return trim(str_replace('--', '-',$id_title));
}
/*
Minify a string
Originally for minifying style sheets, also handy for other things!
*/
function minify_string($string) {
// $string = preg_replace('!/\*[^*]*\*+([^/][^*]*\*+)*/!', '', $string); // remove CSS/php comments
$string = str_replace(array("\r\n", "\r", "\n", "\t", ' ', ' ', ' '), '', $string);
return $string;
}
?>