<?php

/*	v0.7.3
		    :!: utf-8 (no bom); unix linefeeds; monaco 9pt, 4 spaces/tab :!:
		
		  :!: by using this software you agree to the terms of the license :!:
							(if there's one at the bottom!)
					 
							© corz.org 2004 ->

				personal density analyser..  reads words on web pages.
				
	
		I've seen tools like this online, but they want cash for them. the only
		freely downloadable one I could find was near useless. this may help
		fill the void.

		pda analyses a web page, with an eye on SEO (search engine optimisation)
		and returns useful information, density of words, headers, meta-tags,
		that sort of thing, in a hopefully meaningful way

		no need for speed here (ahhhh), I might even use some regex.

		I'll try and design with linearity and ease of cutomisation in mind,
		I figure you'll want to tweak things, add bits. hopefully everything
		should be clear enough. loads could be done to improve performance,
		I'll probably not bother with much of that.

		note: this script makes no attempt to interpret the results, or consider
		the ever-shifting limits and algorhythms of the various search-engines,
		merely presents the data as-is. you make up you're own mind which bits
		are important. most folks quote a target of 3-5% prime keyword density

		as well as a useful SEO tool, pda has uncovered a few errors in my own
		site's mark-up, stuff I'd missed, handy. other sites too!

		have fun!

		;o)
		(or



		p.d.a..
		--------

		0.7
		analysis of each different aspect now, alt tags, anchor titles, body, 
		etc. top ranking keywords are sorted and displayed for each section, and 
		there's a grand totals section at the end, too.


		0.5
		we're grabbing the other important words now, alt tags, anchor title 
		tags, displaying them, and calculating total percentages.

		the colours are configurable now, too.


		0.3
		starting to do more meaningful things with the words, the list is ranked 
		and percentage density is displayed, along with another list of the body 
		text, alphabetically sorted, bonus extra.

		0.2
		got a big list of words, and just slurging out our sorted array.

		the body words are listed now listed in a seperate box, by textstream 
		order, which gives us a better idea of how a robot "sees" the page.

		0.1
		basic loading of web page, throws up meta-tags, title, etc in progress: 
		there's no zip available yet


		I was really wrecked when I did most of this, and it shows in the code, 
		but it works!


*/


/*	basic preferences..
*/

/*
splurge out all the words for each section,
create interesting sentences, use bandwidth.
*/
$show_words = true;	// this does nothing, for now.


/* 	you think the background colours suck?
	change them..

	blues..	*/
$head_background = 	'#6698cc';
$info_background =	'#99ccff';

/*	greens	
$head_background = 	"#33cc88";
$info_background =	"#ccffdd";
*/


// keep a history..
$keep_history = true;


// name of the history file 
// using ".ht_something" prevents file being loaded by browsers
$hitfile = '.ht_pda_hits';


// an occasionally useful thing..
$show_browser_headers = false;

/*	..end preferences
*/


do_header();

if (isset($_GET['q'])) {
	$page=($_GET['q']);
	
	// if we grab this now we can insert the correct URL in the input field
	$contents = grab_page($page);	

	echo '<title>personal density analyser (analyzer) view of: '.$page.'</title>';
} else { 
	echo '<title>personal density analyser (analyzer) </title>';
	$page = '';
}

echo '
<style type="text/css"><!-- 

body {
	font: normal 100% Tahoma, Helvetica, Lucida Grande, Verdana, sans-serif;
	color: #000022;
	background: #FFF;
	}
	
a:link 		{ color:#223322;  text-decoration:none; }
a:active 	{ color:#ff00ff;  text-decoration:none; }
a:visited 	{ color:#336699;  text-decoration:none; }
a:hover		{ color:#3399ff;  text-decoration:none; }
	
.rtd { padding: 1px 4px 1px 4px; }	
.pda {
	font-size: 90%;
	color: #003300;
	font-family: Lucida Grande, Trebuchet, Tahoma, Helvetica, Verdana, sans-serif;
}
//--></style>
</head>
<body>';

//@include ($_SERVER['DOCUMENT_ROOT'].'/inc/header.php');

echo '
<form method="get" action="',$_SERVER['SCRIPT_NAME'],'">
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
<!-- personal density analyser (analyzer) ® corz.org 2004 -->
	<tr><td class="rtd" height=20></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=1 cellpadding=3 bgcolor="',$head_background,'">
		<tr>
			<td class="rtd" bgcolor="',$head_background,'" height=33>
			<span class="pda">
			<big><strong>pda: personal density analyser</strong></big>
			<small>&nbsp;&nbsp;enter a URL to analyse..</small>
			</span>
			</td>
		</tr>
		<tr>
			<td class="rtd" bgcolor="',$info_background,'" height=33 valign=middle>&nbsp;
			<input type="text" name="q" size="54" maxlength="333" value="',$page,'">
			&nbsp;<input type="submit" value="analyse!">
			</td>
		</tr>
		</table>
		</td>
	</tr>
</table>';

/*
start timer.. we like to do this
*/
$search_time = explode(' ',microtime());
$start_time = $search_time[1].substr($search_time[0],1);


if ($page != '') {

	// do our past hits
	if ($keep_history) do_history($page);
	
	// grab the <title>
	$title = get_title($contents);
	
	// convert comma to comma+space :wrapping:
	$meta_tags['keywords'] = str_replace(',',', ', $meta_tags['keywords']);
	
	// do meta-tags (grab_page() grabs the meta-tags)
	$tag_words = ' '.$title.' '.$meta_tags['description'].' '.$meta_tags['keywords'].' ';
	$scoring_tag_words = strip_stuff($tag_words);
	$scoring_tag_words = strip_stops($scoring_tag_words);

	// process headers - need wrapping for these.
	// $headers['Accept'] = str_replace(';',' ;', $headers['Accept']);
	
	// remove numericals from raw page..
	$raw_text = preg_replace('/\d/','',$contents);

	// get <body> text
	$raw_body_text = get_body($raw_text);
	
	// strip html from </body> text
	$stripped_body = strip_html($raw_body_text); // strip_tags() is problematic here
	$stripped_body = strip_stuff($stripped_body);
	
	// create an array of the individual "words"..
	$stripped_body_words =  explode(' ', $stripped_body);
	$body_word_count = count($stripped_body_words);
	
	// strip "stop-words" from the body string
	$clean_body_words = strip_stops($stripped_body);
	
	// explode the page into an array of "words" (keywords)
	$body_words = explode(' ', $clean_body_words);
	
	// create new array from keys, value = frequency of keys (words). there are no duplicates now
	$body_words = array_count_values($body_words);

	// sort words with most hits to the top of the array
	arsort($body_words);
	$uniaue_body_word_count = count($body_words);


	// grab alt tags & fix em up..
	$alt_tags = grab_alt_tags($raw_body_text);
	$alt_tag_words = explode(' ',$alt_tags);
	$alt_tags_count = count($alt_tag_words);
	
	$clean_alt_tags = strip_stuff($alt_tags);
	$clean_alt_tags = strip_stops($clean_alt_tags);
	$clean_alt_tag_words = explode(' ',$clean_alt_tags);
	
	$alt_tag_unique_words = array_count_values($clean_alt_tag_words);
	arsort($alt_tag_unique_words);
	$alt_tag_unique_count = count($alt_tag_unique_words);


	// same with anchor title tags..
	$anchor_titles = grab_anchor_titles($raw_body_text);
	$anchor_titles_words = explode(' ',$anchor_titles);
	$anchor_titles_count = count($anchor_titles_words);
	
	$clean_anchor_titles = strip_stuff($anchor_titles);
	$clean_anchor_titles = strip_stops($clean_anchor_titles);
	$clean_anchor_titles_words = explode(' ',$clean_anchor_titles);
	
	$anchor_titles_unique_words = array_count_values($clean_anchor_titles_words);
	arsort($anchor_titles_unique_words);
	$anchor_titles_unique_count = count($anchor_titles_unique_words);
	
	
	// you can plug other things in here
	
	
	// now the whole lot combined..
	$total_words = $tag_words.$stripped_body.$alt_tags.$anchor_titles;
	$all_words = explode(' ', $total_words);	
	$all_words_count = count($all_words);
	
	$clean_all = strip_stuff($total_words);	
	$clean_all = strip_stops($clean_all);
	$clean_all_words = explode(' ',$clean_all);
	
	$all_unique_words = array_count_values($clean_all_words);
	arsort($all_unique_words);
	$all_unique_words_count = count($all_unique_words);

/*
splurge out results..
*/

	echo '<!-- title -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
		<tr>
			<td class="rtd" bgcolor="'.$head_background.'" width="20%">
			<small>
			<span class="pda"><strong><big>document title:&nbsp;&nbsp;&nbsp;</big></strong>
			</span>
			</small>
			</td>
			<td class="rtd" valign=top bgcolor="'.$head_background.'">
			<small>
			<span class="pda">
			"'.$title.'"
			</span>
			</small>
			</td>
		</tr>
		</table>
		</td>
	</tr>
</table>';


/*
raw headers
*/

if ($show_browser_headers) {
	echo '<!-- meta tags -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr>
		<td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="'.$head_background.'">
				<span class="pda"><strong>raw browser headers:</strong></span><br />
				</td>
			</tr>
		</table>
		<table width="100%" align="center" cellspacing=1 cellpadding=2 bgcolor="'.$head_background.'">';
	reset($headers);
	while (list($key, $val) = each($headers)) {
		echo '
			<tr>
				<td class="rtd" valign=top align="right" width="20%" bgcolor="'.$info_background.'">
				<small><span class="pda">
				<strong>'.$key.' : </strong>
				</span></small>
				</td>
				<td class="rtd" valign=top bgcolor="'.$info_background.'">
				<small><span class="pda">'
				.$val.'
				</span></small>
				</td>
			</tr>';
	}
	echo '
		</table>
		</td>
	</tr>
</table>';
	}

/*
do meta_tags
*/

	echo '<!-- meta tags -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr>
		<td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'">
				<span class="pda"><strong>meta tags:</strong></span>
				</td>
				<td class="rtd" bgcolor="',$head_background,'"><span class="pda">
				<small>(commas in keywords have a space inserted after them to allow easy wrapping)</small> </span><br />
				</td>
			</tr>
		</table>
		<table width="100%" align="center" cellspacing=1 cellpadding=2 bgcolor="',$head_background,'">';
	reset($meta_tags);
	while (list($key, $val) = each($meta_tags)) {
		echo '
			<tr>
				<td class="rtd" valign=top align="right" width="20%" bgcolor="',$info_background,'">
				<small><span class="pda"><!--*g*-->
				<strong>'.$key.' : </strong>
				</span></small>
				</td>
				<td class="rtd" valign=top bgcolor="',$info_background,'">
				</g><small><span class="pda">'
				.$val.'
				</span></small>
				</td>
			</tr>';
	}
	echo '
		</table>
		</td>
	</tr>
</table>';



/*
display the "hot" keywords from titles and meta-data
*/

	echo '<!-- meta words -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
		<tr>
			<td class="rtd" bgcolor="',$head_background,'">
			<span class="pda"><strong>scoring meta keywords..</strong> <small>(title + meta data)</small></span><br />
			</td>
		</tr>
		</table>
		<table width="100%" align="center" cellspacing=1 cellpadding=2 bgcolor="',$head_background,'">
			<tr>
				<td class="rtd" valign=top bgcolor="',$info_background,'">
				<small><span class="pda">';
	echo $scoring_tag_words;
	echo '
				</span></small>
				</td>
			</tr>
			
		</table>
		</td>
	</tr>
</table>
		';




/*
display the alt tags
*/
echo '
<!-- anchor titles -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'" align=left>
				<span class="pda"><strong><big>&lt;alt&gt; tag text..</big></strong></span>
				</td>
				<td class="rtd" bgcolor="',$head_background,'" align="right">
				<span class="pda"><strong><big>&lt;alt&gt;&nbsp;</big></strong></span>
				</td>
			</tr>
			<tr>
		</table>
		</td>
	</tr>
</table>

<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'" colspan=5>
				<span class="pda"><strong>alt tag words..</strong></span>
				</td>
			</tr>
		</table>
		<table width="100%" align="center" cellspacing=1 cellpadding=2 bgcolor="',$head_background,'">
			<tr>
				<td class="rtd" valign=top bgcolor="',$info_background,'" colspan=5>
				<small><span class="pda">';
	echo $alt_tags;
	echo '
				</span></small>
				</td>
			</tr>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'"><small><span class="pda">total: '
				,$alt_tags_count,' words
				</span></small>
				</td>
				<td class="rtd" bgcolor="',$head_background,'"><small><span class="pda">'
				,round($alt_tags_count * (100/$all_words_count),2),
				'% of total word count</span></small>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>
		';

/*
alt tag analysis
*/

	echo '
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'">
				<span class="pda"><strong>alt tag word analysis:</strong></span>
				</td>
			</tr>
			<tr>
				<td class="rtd" bgcolor="',$info_background,'">
				<table  width="100%" align="justify" cellspacing=0 cellpadding=0>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>total alt tag word count: '
						,$alt_tags_count,'</strong></span></small></td>
					</tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>unique alt tag word count: '
						,$alt_tag_unique_count,' (without stop-words or dupliactes)</strong></span></small></td>
						</tr>
					<tr>
						<td class="rtd" height=10></td>
					</tr>
					<tr>
						<td class="rtd"  colspan=4>
						<small><span class="pda">
						<strong>top ranking keywords..
						(sorted by density/textstream read order)</strong></span></small>
						</td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" width="20%"><small><span class="pda"><strong>word:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>occurences:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>density in alt tag text:</strong></span></small></td>
					</tr>
					<tr><td class="rtd" height=2></td></tr>';

	while (list($key, $val) = each($alt_tag_unique_words)) {
		if ($val > 2) {
		echo '
					<tr>
						<td class="rtd"><small><span class="pda">&nbsp;',$key,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;',$val,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;'
						,round($val * (100/$alt_tags_count),2)
						,' %</span></small></td>';
		}
	}
	echo '
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur twice..  
						<small>(alphabetical list)</small></strong><br /></span></small></td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($alt_tag_unique_words);
	$wee_words = array();
	while (list($key, $val) = each($alt_tag_unique_words)) {
		if ($val == 2) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur only once..  
						<small>(alphabetical list)</small></strong><br /></span></small>
						</td>
					</tr>

					<tr><td class="rtd" height=10></td></tr>
					
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($alt_tag_unique_words);
	$wee_words = array();
	while (list($key, $val) = each($alt_tag_unique_words)) {
		if ($val <= 1) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>
				</table>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>';




/*
display the anchor titles
*/
echo '
<!-- anchor titles -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'" align=left>
				<span class="pda">
				<strong><big>&lt;a title&gt; anchor text..</big></strong>
				</span>
				</td>
				<td class="rtd" bgcolor="',$head_background,'" align="right">
				<span class="pda">
				<strong><big>&lt;a title&gt;&nbsp;</big></strong>
				</span>
				</td>
			</tr>
			<tr>
		</table>
		</td>
	</tr>
</table>

<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'" colspan=5><span class="pda"><strong>anchor titles..  </strong>
				</td>
			</tr>
		</table>
		<table width="100%" align="center" cellspacing=1 cellpadding=2 bgcolor="',$head_background,'">
			<tr>
				<td class="rtd" valign=top bgcolor="',$info_background,'" colspan=5>
				<small><span class="pda">';
	echo $anchor_titles;
	echo '
				</span></small>
				</td>
			</tr>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'"><small><span class="pda">total: '
				,$anchor_titles_count,' words
				</span></small></strong>
				</td>
				<td class="rtd" bgcolor="',$head_background,'"><small><span class="pda">'
				,round($anchor_titles_count * (100/$all_words_count),2),
				'% of total word count</span></small>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>
		';

/*
anchor_titles analysis
*/

	echo '
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'">
				<span class="pda"><strong>anchor title word analysis:</strong></span>
				</td>
			</tr>
			<tr>
				<td class="rtd" bgcolor="',$info_background,'">
				<table  width="100%" align="justify" cellspacing=0 cellpadding=0>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>total anchor titles word count: '
						,$anchor_titles_count,'</strong></span></small></td>
					</tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>unique anchor titles word count: '
						,$anchor_titles_unique_count,' (no stop-words or dupliactes)</strong></span></small></td>
						</tr>';

	echo '
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd"  colspan=4>
						<small><span class="pda">
						<strong>top ranking keywords..
						(sorted by density/textstream read order)</strong></span></small>
						</td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" width="20%"><small><span class="pda"><strong>word:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>occurences:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>density in anchor title text:</strong></span></small></td>
					</tr>
					<tr><td class="rtd" height=2></td></tr>';

	while (list($key, $val) = each($anchor_titles_unique_words)) {
		if ($val > 2) {
		echo '
					<tr>
						<td class="rtd"><small><span class="pda">&nbsp;',$key,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;',$val,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;'
						,round($val * (100/$anchor_titles_count),2)
						,' %</span></small></td>';
		}
	}
	
		echo '
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur twice..  
						<small>(alphabetical list)</small></strong><br /></span></small></td>
					</tr>';

		echo '
					<tr>
					<td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($anchor_titles_unique_words);
	$wee_words = array();
	while (list($key, $val) = each($anchor_titles_unique_words)) {
		if ($val == 2) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur only once..  
						<small>(alphabetical list)</small></strong><br /></span></small></td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($anchor_titles_unique_words);
	$wee_words = array();
	while (list($key, $val) = each($anchor_titles_unique_words)) {
		if ($val <= 1) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>
				</table>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>';



/*
display the body words
*/
	echo '
<!-- body words -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'" align=left>
				<span class="pda"><strong><big>&lt;body&gt; text..</big></strong></span>
				</td>
				<td class="rtd" bgcolor="',$head_background,'" align="right">
				<span class="pda"><strong><big>&lt;body&gt;&nbsp;</big></strong></span>
				</td>
			</tr>
			<tr>
		</table>
		</td>
	</tr>
</table>

<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
		<tr>
			<td class="rtd" bgcolor="',$head_background,'">
			<span class="pda"><strong>&lt;body&gt; words.. 
			<small>(in textstream read order)</small></strong></span><br />
			</td>
		</tr>
		</table>
		<table width="100%" align="center" cellspacing=1 cellpadding=2 bgcolor="',$head_background,'">
			<tr>
				<td class="rtd" valign=top bgcolor="',$info_background,'">
				<small><span class="pda">';
/*	we could order them by density, too..
	$spew_words=implode(' ', array_keys($body_words));
	echo $spew_words;	*/
	
	echo $stripped_body;
	echo '
				</span></small>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>
		';
/*
body word analysis
*/

	echo '
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'">
				<span class="pda"><strong>body word analysis:</strong></span><br />
				</td>
			</tr>
			<tr>
				<td class="rtd" bgcolor="',$info_background,'">
				<table  width="100%" align="justify" cellspacing=0 cellpadding=0>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>total body word count: '
						,$body_word_count,'</strong></span></small></td>
					</tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>unique body word count: '
						,$uniaue_body_word_count,' (no stop-words or dupliactes)</strong></span></small></td>
						</tr>';

	echo '
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd"  colspan=4>
						<small><span class="pda">
						<strong>top ranking keywords..
						(sorted by density/textstream read order)</strong></span></small>
						</td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" width="20%"><small><span class="pda"><strong>word:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>occurences:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>density in body text:</strong></span></small></td>
					</tr>
					<tr><td class="rtd" height=2></td></tr>';

	while (list($key, $val) = each($body_words)) {
		if ($val > 2) {
		echo '
					<tr>
						<td class="rtd"><small><span class="pda">&nbsp;',$key,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;',$val,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;'
						,round($val * (100/$body_word_count),2)
						,' %</span></small></td>';
		}
	}
		echo '
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur twice..  
						<small>(alphabetical list)</small></strong><br /></span></small></td>
					</tr>';

		echo '
					<tr>
					<td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($body_words);
	$wee_words = array();
	while (list($key, $val) = each($body_words)) {
		if ($val == 2) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>';
	echo '
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur only once..  
						<small>(alphabetical list)</small></strong><br /></span></small></td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($body_words);
	$wee_words = array();
	while (list($key, $val) = each($body_words)) {
		if ($val <= 1) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>
				</table>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>';



/*
totals
*/
	echo '
<!-- body words -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'" align=left>
				<span class="pda"><strong><big>total page text..</big></strong></span>
				</td>
				<td class="rtd" bgcolor="',$head_background,'" align="right">
				<span class="pda"><strong><big>totals&nbsp;</big></strong></span>
				</td>
			</tr>
			<tr>
		</table>
		</td>
	</tr>
</table>';
/*
totals analysis
*/
	echo '
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'">
				<span class="pda"><strong>total word analysis:</strong></span><br />
				</td>
			</tr>
			<tr>
				<td class="rtd" bgcolor="',$info_background,'">
				<table  width="100%" align="justify" cellspacing=0 cellpadding=0>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>total word count: '
						,$all_words_count,'</strong></span></small></td>
					</tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>total unique word count: '
						,$all_unique_words_count,' (no stop-words or dupliactes)</strong></span></small></td>
						</tr>';


	echo '
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd"  colspan=4>
						<small><span class="pda">
						<strong>top ranking words..
						(sorted by density/textstream read order)</strong></span></small>
						</td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" width="20%"><small><span class="pda"><strong>word:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>occurences:</strong></span></small></td>
						<td class="rtd"><small><span class="pda"><strong>density in total text:</strong></span></small></td>
					</tr>
					<tr><td class="rtd" height=2></td></tr>';
	reset ($all_unique_words);
	while (list($key, $val) = each($all_unique_words)) {
		if ($val > 2) {
		echo '
					<tr>
						<td class="rtd"><small><span class="pda">&nbsp;',$key,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;',$val,'</span></small></td>
						<td class="rtd"><small><span class="pda">&nbsp;'
						,round($val * (100/$body_word_count),2)
						,' %</span></small></td>';
		}
	}
		echo '
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur twice..  
						<small>(alphabetical list)</small></strong><br /></span></small></td>
					</tr>';

		echo '
					<tr>
					<td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($all_unique_words);
	$wee_words = array();
	while (list($key, $val) = each($all_unique_words)) {
		if ($val == 2) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>';
	echo '
					</tr>
					<tr><td class="rtd" height=20></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda"><strong>words that occur only once..  
						<small>(alphabetical list)</small></strong><br /></span></small></td>
					</tr>
					<tr><td class="rtd" height=10></td></tr>
					<tr>
						<td class="rtd" colspan=4><small><span class="pda">';
	reset ($all_unique_words);
	$wee_words = array();
	while (list($key, $val) = each($all_unique_words)) {
		if ($val <= 1) {
		array_push($wee_words, $key);
		}
	}
	sort($wee_words);
	$put_wees = implode(' ', $wee_words);
	echo $put_wees;
	echo ' 
						</span></small>
						</td>
					</tr>
				</table>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>';


}


// STOP the clock!
$search_time = explode(' ',microtime());
$total_time = ($search_time[1].substr($search_time[0],1)) - $start_time;



/*
copyright notice - please leave this, ta
*/

	echo '<!-- credits due -->
<table  width="80%" align="center" cellspacing=0 cellpadding=0>
	<tr><td class="rtd" height=10></td></tr>
	<tr>
		<td class="rtd">
		<table  width="100%" align="center" cellspacing=0 cellpadding=2>
			<tr>
				<td class="rtd" bgcolor="',$head_background,'">
				<span class="pda"><small>&nbsp;pda personal density analyser.. </small></span>
				</td>
				<td class="rtd" bgcolor="',$head_background,'">
				<a href="http://corz.org/engine?download=menu&amp;section=seo%20scripts"
				title="get the source for pda.. ABSOLUTELY FREE!">
				<span class="pda"><small>&nbsp;get source </small></span></a>
				</td>
				<td class="rtd" bgcolor="',$head_background,'">';
		if (!empty($_GET['q'])) {
			echo '
				<span class="pda"><small>page processed in ',substr($total_time,0,4),
				' seconds</small></span>';
		}
		echo '
				</td>
				<td class="rtd" bgcolor="',$head_background,'" align="right">
				<span class="pda"><small>&copy; corz.org 2004&nbsp;</small></span>
				</td>
			</tr>
		</table>
		</td>
	</tr>
</table>
</body>
</html>
';


/*
function:grab_page()	// we'll get meta-tags too
*/
function grab_page($page) {
global $headers, $meta_tags, $page;

	if (!$page) return;

	if (!stristr($page, 'http://')) $page = 'http://'.$page;
	$full_path = substr($page,7);

	if (stristr($full_path, '/')) {
		$path_to_file = substr($full_path, (strpos($full_path, '/')));
	} else { 
		$path_to_file = '/';
	}

	// get the data..
	$filedata = @implode('', @file($page));

	$meta_tags = @get_meta_tags($page);
	if (!is_array($meta_tags)) $meta_tags = array('description' => '', 'keywords' => '');
	//$headers = getallheaders($page);
	return strtolower($filedata);
}/*
end function grab_page()
*/


/*
function:get_title()

	something like if (eregi ("<title>(.*)</title>", etc.. is less reliable, and slower
	here we can grab a title tag in AnY CaSe, and on multiple lines, too.
	
	everything is converted to lower case already
*/
function get_title($string) {
	$t_end = 0;
	if ($grab_this = stristr($string, '<title>')) {
		$grab_this = substr($grab_this, 7, 1024);
		$t_end = strpos ($grab_this, '<');
	}
	return substr($grab_this, 0, $t_end);
}/*
end function get_title()
*/



/*
function:get_body()
*/
function get_body($string) {
		$start = strpos ($string, '<body') + 6;
		$end = strpos ($string, '</body>');
		$string = substr($string, $start, $end-$start);
// }
    return trim(substr($string, (strpos($string, '>')+1)));
}/*
end function get_body()
*/




/*
function:strip_html()
*/
function strip_html($text) {
$search = array (	"'<script[^>]*?>.*?</script>'si",	// strip javascript
					"'<[\/\!]*?[^<>]*?>'si",			// strip HTML tags
					"'([\r\n]|[\r]|[\n])[\s]+'",		// strip white space
					);

$replace = array ('','','\\1');

$text = preg_replace($search, $replace, $text);
return $text;
}/*
end function strip_html()
*/



/*
function:strip_stuff()
*/
function strip_stuff($string) {

	$stoppers = array
	('.',',',':','|','"','\\','/','?','*','~','#','%','$','(',')','[',']','{','}','-','_','=','+'
	,'*','£','@','\\'.'$',"Â","\r\n","\r","\n","\t",'»','«','&nbsp;','&copy;','&reg;','&amp;'
	,'&gt;','&lt;','&trade;','•','°'
	);
	
	while (list($key, $val) = each($stoppers)) {
		$string = str_replace($val, ' ', $string);
	}
	$string = str_replace('   ',' ',$string);
	$string = str_replace('  ',' ',$string);
	return strtolower($string);
}/*
end function strip_stuff()
*/

/*
	function:strip_stops()
	accepts $string, returns $string minus any "stop-words"
*/
function strip_stops($string) {

	$words = explode (' ', $string);
	$stop_words = array	(''
	, ' ', ';o', '', '&nbsp;', 'a', 'all', 'am', 'an', 'and', 'are', 'as'
	, 'at', 'be' ,'but', 'by', 'can', 'do', 'don\'t', 'even', 'for', 'get', 'got'
	, 'has', 'have' ,'he', 'here', 'I', 'if', 'in', 'is', 'it', 'it\'s', 'just'
	, 'like', 'me' ,'my', 'n', 'no', 'not', 'o', 'of', 'on', 'one', 'or', 'our'
	, 'out', 'pm' ,'see', 'she', 'so', 't', 'than', 'that', 'that\'s', 'the'
	, 'them', 'then' ,'there', 'there\'s', 'these', 'they', 'this', 'those', 'to'
	, 'too', 'was' ,'we', 'when', 'with', 'won\'t', 'you', 'your'
	);
	
	/*	I still can't decide which is faster, single or double quotes. my brain says
		single quotes should *always* be faster, as there's less to check for. hmm.	*/
		
	$qs = count($stop_words);
	reset($words);
	while (list($key, $val) = each($words)) {
		for ($i=0;$i<$qs;$i++) {
			if (($words[$key] == $stop_words[$i]) or ($words[$key] == ucfirst($stop_words[$i]))) {
				unset($words[$key]);
				break 1;
			}
			elseif (strlen($words[$key]) < 2) {
				unset($words[$key]);
				break 1;
			}
		}
	}	
	$string = implode(' ', $words);
	return $string;
}/*
	end function:strip_stops()
*/


/*
function:grab_alt_tags()
*/
function grab_alt_tags($text) {
	$alt_tags = '';
	while ($grab_this = stristr($text, 'alt=')) {
	
		// it could be single or double quotes around the tag.
		$grab_this =substr($grab_this,5);	// whatever comes next
		$d_tag = substr($grab_this, 0, strpos($grab_this,'"'));	// a double-quoted alt tag
		$s_tag = substr($grab_this, 0, strpos($grab_this,"'"));	// a single-quoted alt tag
		if (strlen($s_tag) > strlen($d_tag))	// whichever is shortest
			{ $tag = $d_tag; } else { $tag = $s_tag; }	// that's the one we'll use

		$alt_tags .= ' '.$tag;
		// we'll chop sections off as we go along
		$text = stristr(substr($grab_this, strlen($tag)+6), 'alt=');
	}	
return $alt_tags;
}/*
end function grab_alt_tags()
*/


/*
function:grab_anchor_titles()
*/
function grab_anchor_titles($text) {
	$a_titles = '';
	// need to be case-insensitive here, could even be "TITLE"! sheesh! some people..
	while ($grab_this = stristr($text, 'title=')) {
	
		$grab_this =substr($grab_this,7);	
		
		$d_tag = substr($grab_this, 0, strpos($grab_this,'"'));
		$s_tag = substr($grab_this, 0, strpos($grab_this,"'"));	
		if (strlen($s_tag) > strlen($d_tag))
			{ $tag = $d_tag; } else { $tag = $s_tag; }
			
		$a_titles .= ' '.$tag;
		$text = stristr(substr($grab_this, strlen($tag)+8), 'alt='); // some safe number
	}	
return $a_titles;
}/*
end function grab_anchor_titles()
*/


/*
function do_history()
*/

function do_history($url) {
global $hitfile;

	$file = $hitfile;

	if (file_exists($file)) {
		$old = $file_contents = implode('',file($file));
		// the entry itself..
		$file_pointer = @fopen($file, "w+");
		$entry = $url ."\n". $old;
		@fwrite($file_pointer, $entry);
		@fclose($file_pointer);
	}
}/*
end function do_history()
*/


/*
function do_header()
*/
function do_header() {
echo '
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
		"http://www.w3.org/TR/html4/loose.dtd"><html><head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="description" content="pda, personal density analyser, pda, free tools for webmasters, word density analysis, tag conversion. more.">'; 
@include ($_SERVER['DOCUMENT_ROOT'].'/inc/metadata.php'); 
echo '<meta name="keywords" content="pda,personal density analyser, analyzer,density analyzer,personal density analyzer,word density analyser,tools,free,free tools,word,density,analysis,word density analysis,free download,downloads,density analysis script"><style type="text/css">/*<![CDATA[*/ @import "/inc/css/main.css"; @import "/inc/css/site.css"; @import "/inc/css/footer.css"; /*]]>*/</style>';
}/*
end function do_header()
*/

?>
