<?php /* --- ۞---> text { encoding:utf-8;bom:no;linebreaks:unix;tabs:4sp; } */
/* direct access -> \/ */ $anti_hammer_version = '0.9.8.1';
if (realpath($_SERVER['SCRIPT_FILENAME']) == realpath(__FILE__)) { die(direct()); }
/*
Anti-Hammer
NOTE: This software needs an update. Feel free...
Automatically set temporary bans for web site hammering.
Interrogate Referers and protect your site against Referer Spam.
Deny h4x0r requests to sensitive resources.
Protect your valuable server resources for genuine clients.
Full details here..
https://corz.org/server/tools/anti-hammer/
Have fun!
;o)
(c) 2007->tomorrow! cor + corz.org ;o)
NOTE: This needs php >= v5.1 to work.
*/
//*> prefs..
/*
File Types
[default: $anti_hammer['types'] = 'php,html';]
Which file types (extensions) to protect with the anti-hammer?
We only want to count hits on the main pages, not associated files, css,
Javascript includes, and such (if they are generated by php), as many of
these will normally be requested within milliseconds of the initial page hit.
If we run the anti-hammer indiscriminately, such files, would automatically
count towards hammering, and folk would probably be penalized on their first
visit. If you don't use php to generate other (non - .php) files, the
anti-hammer won't be running anyway - it only runs before php scripts, as
it's designed to protect server resources, not bandwidth; basic requests get
spat out without any real processing power or memory usage.
These list items matches the extension of the *actual* physical script file,
regardless of the requested URI, so for example, these..
http://mysite.com/ (assuming something.php is your default index)
http://mysite.com/index.php
http://mysite.com/foo.php?page=bar.htm
http://mysite.com/genny.php?image=img1.jpg
.. would *all* match 'php'. Other extensions are fine, so long as they are
parsed by php on your setup. Separate entries with commas, and put the whole
thing in quotes..
Extensionless files are not supported.
*/
$anti_hammer['types'] = 'php,html';
/*
Generated Extensions
[default: $anti_hammer['gen_types'] = 'jpg,png';]
This is an list of (usually) image extensions which you serve via php. As
there may be many of these on a single page, we want to skip these, too.
These list items match the extension of the *request*, regardless of the
physical script file generating the output. Links such as..
http://mysite.com/gen.php?image=foo.jpg
http://mysite.com/png-pusher.html/foo.jpg
.. would match "jpg".
Separate entries with commas and put the whole thing in quotes..
*/
$anti_hammer['gen_types'] = 'jpg,png';
/*
NOTES:
The file type *generating* these url's MUST be included in your
$anti_hammer['types'] array (above), presumably. 'php'.
You could also use the above preference array to skip other non-
image generated types, if you have such things onsite.
*/
/*
Skip certain files and folders..
aka, basic "Ignore"..
[default: $anti_hammer['skip'] = '/chat/,/foobar/members,rdf.php,/blog/rss.php';]
A list of areas/folders and specific files you DON'T want the
anti-hammer to cover. Enter the full path (from site root) to each
file/folder.
You can also skip ALL the instances of "rss.php", etc. on your entire
site by using only the file name, e.g..
$anti_hammer['skip'] = 'rdf.php,rss.php';
This also works for folder. Using the full path enables you to target
specific files and folders, using only the name gives you blanket
coverage. Your call.
Basically, if your string is contained anywhere within the requested
URI (including the query string), the script returns control to your
page immediately, bypassing Anti-Hammer.
Do put commas *between* entries.
*/
$anti_hammer['skip'] = '/chat/,/foobar/members,rdf.php,/blog/rss.php';
// similarly, you can set some POST variables that, when encountered, will cause
// Anti-Hammer to skip processing.
//
$anti_hammer['skip_post'] = 'ajax-post';
/*
RSS feeds are a good example of a file to skip (assuming they are
php-generated). Firefox, for example, will often grab all the feeds on a
page at-once, quickly notching up a user's hammer count.
*/
/*
Hammer Time!
[default: $anti_hammer['hammer_time'] = 99;] (just under one second)
If they make two requests within this time, the counter increases by one.
The faster and more capable your server, the lower this setting can be.
The higher you set this, the more likely they are to get a warning.
100 is a reasonable setting for a fast server, enabling one-hit-per-second
spidering, but penalizing anything faster. If your site has a lot of "power
users", perhaps 75, even 50.
Enter an integer, representing 100th/s..
*/
$anti_hammer['hammer_time'] = 50;
/*
Trigger levels.
[default: $anti_hammer['trigger_levels'] = '5,10,20,30';]
Enter the number of violations that will trigger each of the four levels..
i.e. At the default settings, they get their first warning after five
violations (with a ban time of three seconds, set below). The time penalty
increases after ten and twenty violations, up to the maximum level of 30
violations (which imposes the maximum ban time of 20 seconds). You can set
the actual times in the next preference, "waiting_times".
Specify four integer values for the four triggers, separated by commas, put
the whole thing in quotes.
*/
$anti_hammer['trigger_levels'] = '5,10,20,30';
/*
Ban Times.
[default: $anti_hammer['waiting_times'] = '3,5,10,20';]
This list sets the individual times that offenders will be 'banned' for.
They will have to wait *this* long before they can try again.
Each of the four setting corresponds to one of the above trigger_levels.
Specify four integer values, separated by commas, whole thing in quotes.
*/
$anti_hammer['waiting_times'] = '3,5,10,20';
/*
Rolling Trigger Times
[default: $anti_hammer['rolling_trigger'] = false;]
This increases the ban time automatically with EACH hammer.
<hit>
You must wait three seconds..
<hit>
You must wait four seconds..
<hit>
You must wait five seconds..
And so on.
*/
$anti_hammer['rolling_trigger'] = false;
/*
Cut-Off
[default: $anti_hammer['cut_off'] = '']
You can also set an absolute cut-off point.
Anyone receiving this many hammer violations is simply dropped, and from
that point onward, their pages die before it even begins - blank - and
the client is sent a "503 Service Temporarily Unavailable" response.
This works with both preset and rolling triggers.
Leave blank to disable the cut-off.
*/
$anti_hammer['cut_off'] = '';
/*
Bye Bye! Message.
[default: $anti_hammer['cut_off_msg'] = '<h1>Bye Now!</h1>';]
A final word from our sponsor?
This is the final message they see before it all goes blank.
It should be HTML. No other text is presented.
*/
$anti_hammer['cut_off_msg'] = '<h1>Bye Now!</h1>';
/*
Ban Time
[default: $anti_hammer['ban_time'] = '12';]
And for how many hours will the above cut-off (ban) last?
*/
$anti_hammer['ban_time'] = '12';
// NOTE: If you set your Garbage Collection age to any less than this, you
// effectively reset all bans older than THAT figure.
//
// In other words, ensure your garbage collection age ('GC_age', below)
// is larger than your 'ban_time' setting here, probably x2.
// Think: if GC happened one minute after someone was banned, and their
// session ID file was >= GC_age, it would be cleaned up! Then no ban!
//
// Also Note: Humans are daily creatures; a 12h ban is effectively 24h!
/*
SEO-aware indexes
When a user asks for /index.php, they get redirected to /
immediately, bypassing anti-hammer for the initial request.
If you prefer to perform this sort of magic elsewhere (in your header, perhaps)
remember to add a sleep for anti-hammer, at least as long as your hammer time.
NOTE: this redirection only occurs when no data is POST-ed, so as not to mess
with any forms you might have running. Obviously search engine spiders would
not be making these sorts of requests (POSTing forms), anyway.
*/
$anti_hammer['seo_indexes'] = true;
/*
Log File Location
[default: $anti_hammer['log'] = '.ht_hammers';]
We will log each banned hit, for reference.
EITHER:
enter a simple file name, e.g..
$anti_hammer['log'] = '.ht_hammers';
in which case Anti-Hammer will log hammers to your "lists" folder
OR..
enter FULL path to the log location, e.g..
$anti_hammer['log'] = $_SERVER['DOCUMENT_ROOT'].'/logs/.ht_hammers'
NOTE: If you enter a custom full path location and the parent directory does
not exist, Anti-Hammer will not attempt to create it, and you will get no
logging.
*/
$anti_hammer['log'] = '.ht_hammers';
//
// It is recommend you watch this log very carefully for the first
// NOTE: few minutes/days after installation, in case of unexpected side-
// effects. And in that case, please do mail me about it!
/*
Kill Message.
[default: $anti_hammer['kill_msg'] = 'Please do not hammer this site.<br />';]
When a request is killed - send this message (before the other text).
You can use any valid HTML in here, header tags, or whatever you like..
*/
$anti_hammer['kill_msg'] = '<h1>Please do not hammer '.$_SERVER['HTTP_HOST'].'!</h1>';
/* NOTE: No <br /> is placed after this text.
If you aren't using <h> tags, and want a break, add it yourself. */
/*
Page Title.
[default: $anti_hammer['page_title'] = 'Please do not hammer this site!';]
This is what is displayed in the title bar of their browser.
Keep this one plain text.
*/
$anti_hammer['page_title'] = 'Please do not hammer this site!';
/*
WebMaster's Name
[default: $anti_hammer['webmaster'] = 'the webmaster';]
Name of the webmaster, will be included in the kill page.
e.g. "If you believe this is in error, please mail <Insert Name> about it!"
*/
$anti_hammer['webmaster'] = 'the webmaster';
/*
Admin Bypass
[default: $anti_hammer['admin_agent_string'] = 'MyCrazyUserAgentString';]
If you insert this exact string into your web browser's user-agent string
(just tag it onto the end), you can bypass the hammer altogether.
Very handy for busy webmasters.
*/
$anti_hammer['admin_agent_string'] = 'MyCrazyUniqueUserAgentString';
// It's not advisable to go messing with the main body of your
// NOTE: browser's user agent string. Lots of web designers rely on this
// information to serve you beautiful, functional web pages.
/*
WebMaster email address (string).
[default: $anti_hammer['error_mail'] = 'bugs at mydomain dot com';]
The usual text format of so-and-so at such-and-such dot com works well.
This is tagged on to the end of the massage inside <> angle brackets,
to look like an address.
*/
$anti_hammer['error_mail'] = 'bugs at mydomain dot com';
/*
Client Data Set
[default: $anti_hammer['client_data'] = 'user_agent,accepts,language,encoding,charset,remote_ip';]
You can specify which parts of the client's data set will be used to create
the client's unique ID. The more elements you use, the less chance you have
of colliding two client ID's. By default, Anti-Hammer uses them all.
But your server might be under stress or attack, or some other scenario,
and you would prefer to identify clients by, for example, only their IP
Address. That way, *any* request from that IP would be assigned the same
Client ID, regardless of what other information they may or may not
present.
This is simply a list of client properties. Choose from; 'user_agent', 'accepts',
'language', 'encoding', 'charset', and 'remote_ip', and create a simple list,
separated by commas.
*/
$anti_hammer['client_data'] = 'user_agent,accepts,language,encoding,charset,remote_ip';
/*
Lookup Failures.
When an event worth logging occurs, we can lookup the host name of the
client to add to our logs. This takes a moment, but only occurs while
logging bad clients, and can be useful in quickly identifying abusers
(or good bots using bad user agent string - to come)
*/
$anti_hammer['lookup_failures'] = true;
/*
Allow known bots?
[default: $anti_hammer['allow_bots'] = false;]
We can allow certain bots to bypass the Anti-Hammer.
Do do this, specify the expected user agent strings in..
path-to/anti-hammer/exemptions/exemptions.ini
and then supply an IP-mask file where said user agent is expected to be
making requests FROM, one ip per line, in the standard Spider IP list format
as found here..
http://www.iplists.com/
http://www.iplists.com/nw/ <- updated, reorganised, with msnbot+more
A blog URI is listed there, where list updates are posted.
(this doesn't happen a lot, maybe 2-3 times a year)
NOTE: User agent string matches are CaSe SenSiTivE! If you want to match
"msnbot" and "MSNBOT", you need two entries. (a case-insensitive
test is roughly five times slower than case-sensitive; so testing
two separate entries is much faster)
NOTE: If cooking up your own anti-hammer.ini, you probably do not want to
include the generic user agent strings (e.g. Yahoo's "Mozilla/4.0"),
which would create a lot of processing overhead, as ALL browsers
send that. Doh! (More notes within that file.)
You can set this to "true" (no quotes), in which case, all specified bots
are simply allowd to bypass the hammer. You can also set it to an integer,
e.g..
$anti_hammer['allow_bots'] = 50;
..that integer representing the hammer_time that will apply to the specified
clients. "50" would enable 2 hits-per-second spidering, but nothing faster,
which is half the normal hammer_time of One Second (hammer_time=100).
*/
$anti_hammer['allow_bots'] = false;
/*
The following two preferences control Anti-Hammer's built-in Client session
Garbage collection routines..
*/
/*
Garbage Collection Limit
[default: $anti_hammer['GC_limit'] = 10000;]
To prevent your server's hard drive filling up with stale client sessions,
we run a periodic garbage collection routine to sweep up the old files.
How periodically, is up to you. By default, Anti-Hammer will check for
garbage every 10,000 hits. I'm thinking this would be around a 2-daily hit
rate for a small site (@ 5000 hits per day).
Obviously, you can chage this number to anything you like, depending on how
busy your site is, and how much space you have on the disks.
If you don't want Anti-Hammer to clean up its garbage, set this to 0. And
ensure you have some other mechanism to handle it.
Remember to ensure that this limit falls well outside your longest ban time,
probably at least 2x that.
*/
$anti_hammer['GC_limit'] = 10000;
/*
GarbAge!
[default: $anti_hammer['GC_age'] = 24;]
How old, in hours, is considered "stale"?
Any ID files older than this will be swept away (deleted).
*/
$anti_hammer['GC_age'] = 24;
/*
NOTE: The previous two preferences have no effect if you set the following
preference ('use_php_sessions') to true. They are only for Anti-Hammer's
built-in client session files.
*/
/*
Use php sessions..
[default: $anti_hammer['use_php_sessions'] = false;]
You would think it might be a nice idea to detect if the client has cookies
enabled, and if so, use php sessions, only falling-back to some other method
when they have not. However, it is not possible to detect whether or not a
client has cookies enabled, with a single request. You need Two. Clearly,
that isn't a lot of use for a protection mechanism designed to be able to
operate before they have even had one. So you need to choose now..
By default, Anti-Hammer will use its own session mechanism, writing client-
unique data to files in a directory of your choosing, irrespective of their
ability to accept cookies. As it is an independant system, it in no way
interferes with any session magic you may have running on your site, and in
most scenarios is just as fast as php's own session handling.
However, you may wish to use that, instead; particularly if you have
millions of hits a day, and your web server stores the php sessions in a
some uberfast /tmp space you can't otherwise get to, where the difference
might be worth it. Or if in-website space is extremely limited. At any rate,
you have a choice.
NOTE: if you enable this, you will ALWAYS start a php session with each
request. This usually presents no problems, but you and your server may know
better. Testing is always advised! I tested it this way for many months on
corz.org, with no issues whatsoever, and I use php sessions all over the
site. If you use proper names in your session, everything should work fine.
ALSO NOTE: With this enabled, if the client/spider/script kiddie/etc. has
cookies disabled in their web browser, they bypass Anti-Hammer protection!
** This is why, by default, Anti-Hammer uses its *own* session mechanism. **
There should be no performance concerns; Anti-Hammer writes the data in the
same way as a php session; a simple serialized array in a flat file.
*/
$anti_hammer['use_php_sessions'] = false;
/*
Referer Spam protection
You can ask Anti-Hammer to consult black and white lists (available online)
of known bad and (optionally) good referers.
Additionally, you can have Anti-Hammer /create/ these lists dynamically,
by direct interrogation of the referring URL, black-listing referers that
have no actual link back to your site.
Welcome to..
Responsive Adaptive Dynamic Anti-Referer (RADAR) Spam Protection! (TM) heh
*/
/*
Validate Referers? (yes, there's one 'r'!)
This is the master switch for Referer Protection.
*/
$anti_hammer['validate_referers'] = true; // true/false
/* location of white and black lists..
These are inside the "lists" directory, which is inside the data_dir directory,
set above.
*/
$anti_hammer['white_list'] = 'white-list.txt';
$anti_hammer['black_list'] = 'black-list.txt';
/*
"Auxiallary" black-list. [aka, "The BANNED Referers List"]
An extra black-list. This list is read-only (it will not be updated by
Anti-Hammer) most useful if you wish to add a second bad referers list which
you have compiled by hand, or downloaded.
It would also be a handy place to store your "Permanently Banned" referers.
Essesntially, this is a list of KNOWN bad referers, certified, if you like.
It should be noted that the longer your list, the longer it will take to
check. To give you an idea, a list 432KB in length, containing 25,635 items
takes approx 0.05 seconds to check, compared to less than 0.001 seconds for
a short list with under 1000 items. I'll allow it!
*/
$anti_hammer['banned_referers_list'] = 'banned_referers.txt';
/*
If you ONLY want to use a black-list..
You don't have to use a white-list. Instead you can simply allow /all/
referers that are NOT listed in your black-list. In other words, any referer
found in your black-list gets the 'bad_referer_msg', everyone else can pass
straight through. To achieve this, comment out the white_list location preference,
like so..
//$anti_hammer['white_list'] = 'white-list.txt';
From then on, Anti-Hammer will consult only your black-list for bad
referers, allowing all others to pass.
If you don't want Anti-Hammer to interrogate and add new bad referers to the
black-list, set:
$anti_hammer['interrogate_referers']= false;
Remember to set the location of your black-list!
Obviously, using ONLY a white-list is pointless, without a black-list for
bad referers, all requests pass straight through (or must be re-fetched, if
interrogate_referers=true).
*/
/*
List (and check) ONLY domain names?
Rather than record the entire URL in the black and white lists, we can list
only the domain name. This saves time checking the lists in future, and
makes for smaller lists.
Set to true or false, you can still have partial matches in your lists, so
"www.google" matches "www.google.com", "www.google.co.uk", and so on.
The difference is, when set to true, Anti-Hammer would expect the match to
happen right from the start of the entry, e.g. "google.com" would NOT match
against a referring hit from..
http://www.google.com/search?q=foo+bar.
Of course, "www.google.com" would match the hit.
When this is set to false (use *full* URL's), matches can be anywhere in the
string, e.g. "google.com" WOULD match against a referring hit from..
http://www.google.com/search?q=foo+bar
You can have a mixed black-list, with domains-only (perhaps from an online
source) and append full URL's to it - Anti-Hammer will work with both.
However..
If you have a list of full URL's, and use Anti-Hammer in domains_only mode
(true), it's unlikely Anti-Hammer will find any matches in the full URL
entries of your list.
If you want to move from full URL's to a domain-only list, it's best to
either start with a new list, or else do some fairly simple regexp search &
replace on the full URL list to create a domain-only list. HOWEVER..
See the next preference.
*/
$anti_hammer['domains_only'] = true;
/*
The scheme is the "http" part of the referring URL. When working with full
URL's ('domains_only' = false) we normally record this, e.g.
http://www.badspammer.com/buymyshit.html
But you can disable recording the scheme, so instead we record..
www.badspammer.com/buymyshit.html
..which will probably also disable your text editor's fancy text-to-URL
magic, but does have one important advantage..
With this set to false, you can use mixed lists with both domain-only and
full-URL entries and Anti-Hammer will be able to match entries either way
around. This could be handy if you like to record full URL's for your own
generated bad-referer list but add other (domain-only) sources to your list
from time to time, perhaps from a web site that distributes such things.
If this is something that you intend to do, see the 'banned_list' preference
(above), which provides a convenient way to use an auxiallary black-list.
*/
$anti_hammer['record_scheme'] = false;
/*
What message to send to these clients?
You can use HTML in this message.
If you wish to insert more into your feck-off page, maybe ads and stuff,
search this document for "die_spammer"
*/
$anti_hammer['bad_referer_msg'] = '<h1>Bad vibes from referring page!</h1>
<h3>If you believe this is an error, please <a href="https://corz.org/corz/contact.php" title="My contact page">email the webmaster</a>.</h3>';
// If the page dies from bad ju-ju (Referer Spam Protection), you can include a file on that page.
// It's a fine place to stick an advert!
$anti_hammer['die_insert']= $_SERVER['DOCUMENT_ROOT'].'/ads/bad-ju-ju.htm';
// basically, this enables you to include a larger chunk of HTML than would
// be comfortable to insert into the 'bad_referer_msg' (above)
// And a title for the page..
$anti_hammer['title'] = 'Bad JuJu from Referring Page';
// some string you want to remove from inward links, if necessary..
$anti_hammer['chop'] = '?page=all';
/*
WARNING!!!!!!
You may want to keep your eye on the black and white lists for a few
days to ensure that there are no false-positives or false-negatives,
as well as pare down links to well-known sites, to prevent bloating your
lists, as well as speeding up the process (it's pretty fast, but still,
no point in grabbing the same site over and over). A good example would
be search engines. A single entry in your white-list, like so..
www.google.
..will prevent Anti-Hammer from having to fetch each and every hit from
/any/ of google's many international search results pages.
ALSO NOTE: "empty" referers always pass through this validation (typed
links and bots), as do internally referred pages (links from your own
pages).
*/
/*
Log Bad Referers?
The whole point of this (well, the main point) is to prevent bad referers
showing up in your referer logs, cut the noise (as well as save on server
resources, of course). However, you may still wish to manually check out
some URL's, especially if you are generating a black-list via interrogation,
to check for false-negatives.
Comment out this preference to NOT log all new bad referers.
*/
$anti_hammer['black_log'] = 'black-log.txt';
/*
One would hope you are logging your /real/ traffic sources elsewhere
*/
/*
NOTE: you can also disable the lists and simply log the bad referers, if you
wish. However, this would force checking of each and every referer, which
would be wasteful of server resources & bandwidth. It would also mean
multiple entries for each bad referer, as this list is not /checked/
/against/, it is merely a log.
*/
/*
Interrogate Referers?
[default: $anti_hammer['interrogate_referers'] = true;]
To prevent referer spam, we can instruct anti-hammer to validate unknown
referers by direct interrogation. This involves actually loading the HTML of
the referring page and looking for links back to our page.
If a link is found, the site is added to the white-list and from then on,
allowed to access our resources directly. If a link back to our site is NOT
found, Anti-Hammer adds the page to the black-list and denies referred
access to our resources, permanently.
Of course, external pages are only checked ONE time, thereafter they are in
either the black or white list.
All this can make a HUGE difference to the noise in your logs! At one point
I was getting 1000+ visits/day from referer spammers. *ouch*
NOTE: You will need to enable url fopen wrappers for this to work. e.g..
(in .htaccess):
php_flag allow_url_fopen on
php_flag allow_url_include on
Also note, a link to the requested page is also inserted, just in case anti-
hammer has flagged "inaccessible" resources, such as member-only sites, so
valid web users can still get to your page.
If you don't want anti-hammer to do any fetching and interrogating of
referring pages, but DO want it to check some pre-populated black (or white)
list, set this parameter to false. Only do this if you already have your own
pre-populated black (and white) list.
NOTE: If interrogate_referers is set to true, and no list file(s) exist(s),
Anti-Hammer will attempt to create them.
*/
$anti_hammer['interrogate_referers'] = true;
/*
Link-Checking Accuracy
Anti-Hammer can have a quick scan through the referring page for our page's
URL (level 1), or it can explode all the <a href= tags on a page and check
our page is in there somewhere (level 2) or else it can scan the document
thoroughly using complex Regular Expressions to ensure a valid link exists
on the referring page (level 3).
Level one is the quickest and least accurate method, level three the slowest
and most accurate. The following min/max times were performed on an old
(slow) laptop Apache server. The test file is a 1MB HTML page with just over
9000 links on the page, iterated Ten times, for more accurate times; the
final times listed here being 1/10th of that. Expect at least x10 speeds on
a /real/ server.
Level 1 [0.017 - 0.032s]
Level 2 [0.056 - 0.069s]
Level 3 [0.108 - 0.114s]
Unless referer spammers get *very* clever on us, level 1 will probably be
fine.
*/
$anti_hammer['link_check_accuracy'] = 1;
/*
Interrogation GET Time-Out
By default, php will time-out a request after 60 seconds.
You may not want Anti-Hammer to wait that long.
Think about it; they came FROM that page, so we would expect that page to be
available right now.
*/
$anti_hammer['time_out'] = 10;
/*
Referer GET Limit.
We set the maximum amount of web page data fetched, mainly this is to avoid
a potential server DOS Exploit.
That exploit would be..
A script that loads your web pages from a sequence of randomly generated
referer URI's of pages of HUGE size, potentially Terrabytes. One every
second or less until your server falls over, grabbing each page in its
entirety, checking for back-links.
<deep voice> ... And so the Server Wars began...
Mind you, one would hope that your server would have other security features
to deal with this sort of onslaught, doesn't php itself? And of course,
/they/ would need to have a superior server at their end, perhaps a
distributed model of some kind.. Rally-cry on a forum, the code of a fairly
simple exploit page, but a download away.. if some were so inclined.
But security isn't the only deciding factor here. Anti-Hammer isn't pulling
in all the page resources, images, video and such, just the raw HTML. I
don't have a page onsite generating over 100KB of raw source (Ctrl+U in
Firefox, et-al), though loading ALL the comments on a few of my pages does
get you a Multi-Megabyte document. How much of that is a human be willing to
read, anyway? Should *your* site be so far down a page? It's probably an
auto-scraping-mass-marketing page of some sort. That alone deserves "bad
vibes".
A quick search of my personal collection of saved web pages from the last
couple of decades reveals nothing valuable over 1MB.
Enter the maximum number of BYTES of data you are prepared to accept.
1024 = 1KB. It's okay to use multipliers here, e.g. 1024*1024 which equals
1MB, 1024*100 is 100KB, etc., to set max_referer_get to 10MB you could use:
$anti_hammer['max_referer_get'] = 1024*1024*10; // 10MB max
*/
$anti_hammer['max_referer_get'] = 1024*1024; // 1MB max
/*
URL Protection
Anti-Hammer can deny resources to clients making h4x0r-type requests to
known exploit pages. It's also a handy place to put URLs of restricted or
publicly unavailable features and such-like, e.g..
$1
%00
....//
../../
.aspx
/RPC2
/groups
/url?q=
script>alert
=profile;u
=viewprofile;u
=|w|
DSAdClient31.dll
MSOffice/cltreq.asp
Netsparker-
_vti_bin
addlink.php
administrator/
bad-test
bangsat.php
cart.php
clientaccesspolicy.xml
crossdomain.xml
customers.php
etc/group
etc/passwd
form.php
imagens_popup.php
javascript:alert
labels.rdf
owssvr.dll
password_forgotten.php
proc/self
product_category.php
product_category.php
profile.php
r57.php
register/
select *
select+
signup.php
union+select
using+SOCKS
viewtopic.php
wp-content/
xmlrpc
zboard.php
zp-core
etc.
These requests will be killed immediately with a "501 Not Implemented"
response.
This is especially useful if you have an intelligent 404 page, to prevent
wasting processor power on obviously bad requests.
If you are using the admin bypass capability, you can also put restricted
URL's in here, meaning only you will be able to access them. Neat.
To enable this functionality, simply uncomment the following preference..
*/
//$anti_hammer['banned_urls_list'] = 'banned_urls.txt';
// some message to send in this case..
$anti_hammer['bad_url_message'] = 'The requested URL is not implemented.';
/*
USER AGENT Protection (Bad Bots, spiders, suckers, etc.)
Anti-Hammer can deny resources to clients with known "User Agents". This can
be used to protect your site against web downloaders, bad bots, spiders,
scripts and more.
Note:
You can use Regular Expression in the list entries, so .htaccess ban
lists should port straight across to Anti-Hammer (after removing the
RewriteCond %{HTTP_USER_AGENT} part, of course - a simple block delete).
This eables you to have matches that ONLY match at the start of a User
Agent string and much more, e.g..
^BadBot
("^" meaning, "start anchor", in other words, "must match at the start
of the string") would match against "BadBot web sucker", but would NOT
match against "Web Sucker BadBot"
^BadBot$
("$" meaning "must match the END of the string") would only match
against "BadBot", but NOT against "BadBot web sucker" (both start and
end anchors are specified)
BadBot$
Would match against "Web Sucker BadBot", but NOT against "BadBot Web
Sucker" (only the end anchor is specified), and so on.
If you want to include any funky characters, actual dots, slashes and
such, precede those with a backslash ("\"), so that the regexp engine
treats them as literal characters, e.g..
BadBot\/1\.
Which would match against "BadBot/1.3", "BadBot/1.0", etc..
If you are not familiar with Regular Expression, I definitely recommend
you GET familiar with it, it is hugely powerful and a massive
time-saver.
These requests will be killed immediately with a "403 Forbidden" response.
To enable this functionality, simply uncomment the following preference..
*/
//$anti_hammer['banned_agents_list'] = 'banned_agents.txt';
// a message to send to clients with a bad user agent strings..
//$anti_hammer['bad_agent_message'] = 'Your client has been banned.';
$anti_hammer['bad_agent_message'] = 'This request has been denied.';
/*
Allow Empty User Agent Strings?
If you wish to deny access to clients NOT sending a User Agent String, set
this to false..
*/
$anti_hammer['allow_empty_UA'] = true;
// if set to false, we will send them a message instead of the page..
$anti_hammer['empty_UA_message'] = 'Please configure your web client to send a valid User Agent string.';
//2do.. humans could enter a code - then be treated as normal.
/*
Banned IP Addresses
By request, this simple feature enables you to ban individual IP addresses.
You can also ban entire IP blocks, by omitting tha relevant octets, for
example, and entry like this..
192.168.0.
.. would ban all IP Addresses from 192.168.0.0 - 192.168.0.255
These requests get a 403 response + terse message.
In the future, we may add IP addresses to this list dynamically, for serial
offenders.
To enable this functionality, simply uncomment the following preference..
*/
//$anti_hammer['banned_ip_list'] = 'banned_ips.txt';
// Some message to send for clients with a banned IP Address..
$anti_hammer['bad_ip_message'] = 'Your IP Address has been banned.';
/*
Advanced overrides..
*/
/*
Client ID File Prefix
[default: $anti_hammer['ID_prefix'] = 'HammerID_';]
This text is placed before the client ID in the ID filename. e.g..
"HammerID_06fa71c938a108f4a2b1f1ef091653ef"
You may wish to use a different name..
*/
$anti_hammer['ID_prefix'] = 'HammerID_';
/*
Directory Locations..
These are usually best left as-is (in which case they will be inside your
main anti-hammr directory), but you can change them if you need to.
*/
$anti_hammer['lists_folder'] = 'lists';
$anti_hammer['sessions_folder'] = 'sessions';
$anti_hammer['exemptions_folder'] = 'exemptions';
// If you need to set absolute locations outside the anti-hammer directory, the section
// just below the prefs
//
// :end regular prefs
// :advanced prefs:
//
// Transform user preferences into real locations..
// Main location of script and data directories..
$anti_hammer['data_path'] = str_replace("\\", '/', dirname(__FILE__));
// if the above line isn't working out your anti-hammer location automatically, you can set it here manually, like so..
//$anti_hammer['data_path'] = realpath($_SERVER['DOCUMENT_ROOT']).'/inc/anti-hammer';
// :end advanced prefs:
//*> end prefs
// Firstly.. SEO indexes..
//
// Redirect initial requests for folder/index.php to folder/ (neater, and better for SEO, too)
// better than using .htaccess (if you have $_POST forms, for example). And better than doing
// it later, /after/ you tripped anti-hammer!
if ($anti_hammer['seo_indexes']) {
if (substr($_SERVER['REQUEST_URI'], -9) == 'index.php' and empty($_POST)) {
header('Location: http://'.$_SERVER['HTTP_HOST'].str_replace('index.php', '', $_SERVER['REQUEST_URI']));
die;
}
}
//*> Setup Lists.
// Set the location of the lists folder..
$anti_hammer['lists_folder'] = $anti_hammer['data_path'].'/'.$anti_hammer['lists_folder'].'/';
// Referer lists..
$anti_hammer['white_list'] = $anti_hammer['lists_folder'].$anti_hammer['white_list'];
$anti_hammer['black_list'] = $anti_hammer['lists_folder'].$anti_hammer['black_list'];
$anti_hammer['banned_referers_list'] = $anti_hammer['lists_folder'].$anti_hammer['banned_referers_list'];
$anti_hammer['black_log'] = $anti_hammer['lists_folder'].$anti_hammer['black_log'];
// Banned URLs..
if (isset($anti_hammer['banned_urls_list'])) { $anti_hammer['banned_urls_list'] = $anti_hammer['lists_folder'].$anti_hammer['banned_urls_list']; }
// Banned USER AGENTS..
if (isset($anti_hammer['banned_agents_list'])) { $anti_hammer['banned_agents_list'] = $anti_hammer['lists_folder'].$anti_hammer['banned_agents_list']; }
// Banned IP Addresses
if (isset($anti_hammer['banned_ip_list'])) { $anti_hammer['banned_ip_list'] = $anti_hammer['lists_folder'].$anti_hammer['banned_ip_list']; }
// Set log location..
if (substr($anti_hammer['log'], 0, 1) != '/') {
$anti_hammer['log'] = $anti_hammer['lists_folder'].$anti_hammer['log'];
}
//*> Collect all usable, available client data..
$anti_hammer['user_agent'] = @$_SERVER['HTTP_USER_AGENT'];
$anti_hammer['accepts'] = @$_SERVER['HTTP_ACCEPT'];
$anti_hammer['language'] = @$_SERVER['HTTP_ACCEPT_LANGUAGE'];
$anti_hammer['encoding'] = @$_SERVER['HTTP_ACCEPT_ENCODING'];
$anti_hammer['charset'] = @$_SERVER['HTTP_ACCEPT_CHARSET'];
$anti_hammer['remote_ip'] = $_SERVER['REMOTE_ADDR'];
$anti_hammer['request'] = $_SERVER['REQUEST_URI'];
$anti_hammer['referer'] = @$_SERVER['HTTP_REFERER'];
$anti_hammer['host'] = $_SERVER['HTTP_HOST'];
$anti_hammer['uri'] = 'http://'.$anti_hammer['host'].$anti_hammer['request'];
//*> begin..
$gentime = explode(' ', microtime());
$anti_hammer['now_time'] = $gentime[1].substr($gentime[0], 6, -2); // 1/100th of a second accuracy!
settype($anti_hammer['now_time'], "double"); // scientifically tested!
$anti_hammer['final_time'] = 0; // will be used to set the retry header on killed page (503)
// local server access (for readfile() requests..
// (and as a potential catch-all for user pref errors!))
if ($anti_hammer['remote_ip'] == $_SERVER['SERVER_ADDR']) {
return; // there could be a potential weakness on an easy-access shared web host.
}/*
A note about readfile()..
If you use readfile() to include resources on your pages, remember,
those requests will come in right after the first, and as they are
technically brand new hits, they count towards the hammer.
Use of include is prefered.
However, the code right above this notice should prevent any issues. If
it does /not/, and include isn't working, you might want to hack in
the actual IP Address of the local server. See my debug-report.zip for
a way to easily get this sort of information in your browser.
NOTE: If you are having difficulty include-ing URI resource in your
pages, remember you need to enable BOTH php allow_url_* flags. Here is
the .htaccess version of those two switches..
php_flag allow_url_fopen on
php_flag allow_url_include on
*/
//*> Skip Known Clients..
/*
skip protection for known bots and spiders.
okay, this is some cute code! simple, but effective..
we load an ini file of user-agent=ip-mask-file pairs, and check our client's
user agent string for a match (at must match the beginning of the string
exactly). If there is a match, we load the associated IP Mask file, and
run through the IP/masks, again looking for a perfect match at the start of
the two strings. Commented lines are no problem. We use strpos() for both
tests, so it's nice and fast, and the IP test covers our comments, too.
having said (coded) all this, you gotta ask yourself, why are they hammering?
Surely it would be better get them to slow down, instead! (i.e. in robots.txt)
*/
$IP_file = '';
$anti_hammer['ini_file'] = $anti_hammer['data_path'].'/'.$anti_hammer['exemptions_folder'].'/exemptions.ini';
if ($anti_hammer['allow_bots']) {
$bot_agent_array = read_bots_ini($anti_hammer['ini_file']);
if (is_array($bot_agent_array)) {
foreach ($bot_agent_array as $bot_agent_string => $IP_file) {
if ($bot_agent_string and strpos($anti_hammer['user_agent'], $bot_agent_string) === 0) {
break;
}
}
if ($IP_file) {
$ip_array = file($anti_hammer['data_path'].'/'.$anti_hammer['exemptions_folder'].'/'.$IP_file);
}
if (is_array($ip_array)) {
foreach($ip_array as $bot_ip) {
if (@strpos($anti_hammer['remote_ip'], trim($bot_ip)) === 0) {
if ($anti_hammer['allow_bots'] > 1) {
$anti_hammer['hammer_time'] = $anti_hammer['allow_bots'];
} else {
return;
}
}
}
}
}
}
// Get user values into usable arrays and do some error-checking.
// trigger thresholds..
if (!stristr($anti_hammer['trigger_levels'], ',') or (str_word_count($anti_hammer['trigger_levels'], 0, "0123456789") != 4)) {
$anti_hammer['trigger_levels'] = '5,10,20,30';
}
// A neat way to create an array from numeric prefs..
$anti_hammer['trigger_levels'] = str_word_count($anti_hammer['trigger_levels'], 1, "0123456789");
// Get user penalty times into correct values..
if (!stristr($anti_hammer['waiting_times'], ',') or (str_word_count($anti_hammer['waiting_times'], 0, "0123456789") != 4)) {
$anti_hammer['waiting_times'] = '3,5,10,20';
}
$anti_hammer['waiting_times'] = str_word_count($anti_hammer['waiting_times'], 1, "0123456789");
// file types to protect..
if (!$anti_hammer['types']) { return; } // no types specified, forget it!
$anti_hammer['types'] = explode(',', $anti_hammer['types']);
// generated types to skip..
$anti_hammer['gen_types'] = explode(',', $anti_hammer['gen_types']);
// ignored locations..
if ($anti_hammer['skip']) {
// ignored locations..
$anti_hammer['skip'] = explode(',', $anti_hammer['skip']);
// run through ignored locations and if matched, return immediately..
foreach($anti_hammer['skip'] as $nogo) {
if (stristr($anti_hammer['request'], trim($nogo))) { return; }
}
}
// Skip requests with certain POSTed variables..
if ($anti_hammer['skip_post']) {
$anti_hammer['skip'] = explode(',', $anti_hammer['skip_post']);
// run through ignored locations and if matched, return immediately..
foreach($anti_hammer['skip'] as $nogo) {
if (is_array($_POST)) {
foreach ($_POST as $pk=>$pv) {
if ($nogo == $pk) {
return;
}
}
}
}
}
// Anti-Hammer only for php files, not generated css, etc..
$ah_type_ok = false;
foreach($anti_hammer['types'] as $ah_type) {
if (!$ah_type) { continue; } // @ to avoid strict php5 errors
if (@end(explode('.', $_SERVER['SCRIPT_FILENAME'])) == trim($ah_type)) {
$ah_type_ok = true;
}
//2do.. could make this code more efficient, for those using MANY types.
}
if ($ah_type_ok /* still! */ == false) { return; }
//*> Admin Bypass..
// (I put this down here so I can gather and test variables)
//
if (stristr($anti_hammer['user_agent'], $anti_hammer['admin_agent_string'])) {
return;
}
//*> skip generated types..
//
if (in_array(@end(explode('.', $_SERVER['REQUEST_URI'])), $anti_hammer['gen_types'])) { return; }
/*
okay, let's do it..
*/
//*> read session data..
$anti_hammer['session'] = array();
if ($anti_hammer['use_php_sessions']) {
// Regular php session..
session_start();
$anti_hammer['session'] = $_SESSION['anti_hammer'];
} else { // Anti-Hammer's built-in session mechanism..
// Create a unique ID for this client..
$id_str = '';
$anti_hammer['client_data'] = explode(',', $anti_hammer['client_data']);
foreach ($anti_hammer['client_data'] as $element) {
$id_str .= $anti_hammer[trim($element)];
}
$anti_hammer['client_id'] = md5($id_str);
$fake_sess_file = $anti_hammer['data_path'].'/'.$anti_hammer['sessions_folder'].'/'.$anti_hammer['ID_prefix'].$anti_hammer['client_id'];
if (file_exists($fake_sess_file)) {
$anti_hammer['session'] = read_fake_session($fake_sess_file);
}
}
/*
Useful use of a "cat"..
It seems to me that I unwittingly created a system whereby the less
information a client is wiling to give, the more likely they are to be
banned. I say "seems", because we create an md5 of this information, so the
actual likelyhood of colliding session ID's is astronomically low. However,
I like the *principle* of the thing.
*/
//*> Calculate Hammer Rate..
//> How much time since their last request (in 100/th Second)
if (isset($anti_hammer['session']['last_request'])) {
$hammer_rate = $anti_hammer['now_time'] - $anti_hammer['session']['last_request'] + 1; // always +1!
} else {
// new visitor (no session, yet)
$hammer_rate = $anti_hammer['hammer_time'] + 1; // so long as it's greater than hammer_time
}
//*> BANS!
/*
Check for BANS..
Their ban has elapsed..
*/
if ($hammer_rate > ($anti_hammer['ban_time']*60*60*100)) { // {@24} = 8640000 = 24 hours (in 100th/second)
$anti_hammer['session']['last_request'] = $anti_hammer['now_time'] - 1;
$hammer_rate = $anti_hammer['hammer_time'] + 1;
$anti_hammer['session']['hammer'] = $anti_hammer['trigger_levels'][0]-3; // Repeat-offenders do not start from Zero!
unset($anti_hammer['session']['cut_off']);
// do not return yet - we still need to write the updated session data.
}
/*
BANNED!
CUT_OFF has already been set -- BYE NOW!
*/
if ($anti_hammer['cut_off'] and isset($anti_hammer['session']['cut_off'])) {
send_503_kill('', 60 * 60 * $anti_hammer['ban_time']);
}
/*
OKAY, still here..
*/
//*> GC
/*
Start with Garbage Collection..
*/
if (!$anti_hammer['use_php_sessions']) {
CollectGarbage($anti_hammer['data_path'].'/'.$anti_hammer['sessions_folder'].'/Counter',
$anti_hammer['GC_limit'],$anti_hammer['ID_prefix'], $anti_hammer['GC_age']);
}
/*
Miscelleneous Protections..
*/
if (isset($anti_hammer['banned_ip_list'])) { validate_ip(); } // on fail, send 403 (Forbidden) error
if (isset($anti_hammer['banned_urls_list'])) { validate_url(); } // on fail, send 503 (Service Temporarily Unavailable) error
if (isset($anti_hammer['banned_agents_list'])) { validate_agent(); } // on fail, send 403 error
if (!$anti_hammer['allow_empty_UA'] and !$anti_hammer['user_agent']) {
$msg = '';
if (isset($anti_hammer['empty_UA_message'])) {
$msg = $anti_hammer['empty_UA_message'];
}
send_503_kill($msg);
}
//*> It's Hammer-Time!
$killpage = false;
if ($hammer_rate < $anti_hammer['hammer_time']) {
$retry_str = 'a few ';
@$anti_hammer['session']['hammer'] += 1;
if ($anti_hammer['session']['hammer'] > ($anti_hammer['trigger_levels'][0]-1)) {
// cut-off..
if ($anti_hammer['cut_off'] and $anti_hammer['session']['hammer'] > $anti_hammer['cut_off']) {
$anti_hammer['kill_msg'] = $anti_hammer['cut_off_msg'];
$anti_hammer['session']['cut_off'] = true;
}
if ($anti_hammer['cut_off'] and $anti_hammer['session']['hammer'] == $anti_hammer['cut_off']) {
$anti_hammer['kill_msg'] = '<h1>THIS IS YOUR LAST WARNING!</h1>'.$anti_hammer['kill_msg'];
}
// rolling ban time, increments with each hammer..
if ($anti_hammer['rolling_trigger']) {
$anti_hammer['session']['last_request'] = $anti_hammer['now_time'] + (($anti_hammer['session']['hammer']*100)-1);
$retry_str = ah_int2eng($anti_hammer['session']['hammer']);
} else {
// predefined ban levels.. these are more effective, as they shock the user with increasing jumps!
if (($anti_hammer['session']['hammer'] > $anti_hammer['trigger_levels'][0]) and ($anti_hammer['session']['hammer'] <= $anti_hammer['trigger_levels'][1])) {
// we simply nudge their start time forward by *this* many seconds (into the future!)..
$anti_hammer['session']['last_request'] = $anti_hammer['now_time'] + (($anti_hammer['waiting_times'][0]*100)-1); // 299 = Three second penalty.
$retry_str = ah_int2eng($anti_hammer['waiting_times'][0]);
} elseif (($anti_hammer['session']['hammer'] > $anti_hammer['trigger_levels'][1]) and ($anti_hammer['session']['hammer'] <= $anti_hammer['trigger_levels'][2])) {
$anti_hammer['session']['last_request'] = $anti_hammer['now_time'] + (($anti_hammer['waiting_times'][1]*100)-1); // Five second penalty! (by default)
$retry_str = ah_int2eng($anti_hammer['waiting_times'][1]);
} elseif (($anti_hammer['session']['hammer'] >= $anti_hammer['trigger_levels'][2]) and ($anti_hammer['session']['hammer'] <= $anti_hammer['trigger_levels'][3])) {
$anti_hammer['session']['last_request'] = $anti_hammer['now_time'] + (($anti_hammer['waiting_times'][2]*100)-1); // Ten second penalty! (etc.)
$retry_str = ah_int2eng($anti_hammer['waiting_times'][2]);
} elseif ($anti_hammer['session']['hammer'] >= $anti_hammer['trigger_levels'][3]) {
$anti_hammer['session']['last_request'] = $anti_hammer['now_time'] + (($anti_hammer['waiting_times'][3]*100)-1); // Twenty second penalty!
$retry_str = ah_int2eng($anti_hammer['waiting_times'][3]);
}
}
$killpage = true;
}
} else {
$anti_hammer['session']['last_request'] = $anti_hammer['now_time'];
}
//*> write client session data..
WriteHammerData();
//*> Kill Page
if ($killpage) {
//echo '<pre>anti_hammer:',htmlentities(print_r($anti_hammer,true)),'</pre>';//:debug
//echo '<pre>_SERVER:',htmlentities(print_r($_SERVER,true)),'</pre>';//:debug
$km = '<!DOCTYPE HTML SYSTEM><html><head><title>'.$anti_hammer['page_title'].'</title></head><body>'.$anti_hammer['kill_msg'];
$anti_hammer['session']['last_request_url'] = $_SERVER['REQUEST_URI'];
$anti_hammer['session']['last_post'] = $_POST;
if (!isset($anti_hammer['session']['cut_off'])) {
$km .= '
You must wait '.$retry_str.'seconds before trying again.<br /><br />
If you believe this is in error, please mail '.$anti_hammer['webmaster'].' about it!<br />
<'.$anti_hammer['error_mail'].'><br />
<span style="font-size:small;position:fixed;bottom:10px;right:10px;">
<a title="Automatically Ban Web Site Hammers!
Protect your valuable server resources for *genuine* clients"
id="link-Get-Anti-Hammer" href="https://corz.org/server/tools/anti-hammer/">Get Anti-Hammer protection for your own site!</a>
</span></body></html>';
}
if (file_exists(dirname($anti_hammer['log']))) {
add_log_data($anti_hammer['log'], create_log_data());
}
send_503_kill($km, $anti_hammer['final_time']+1);
}
//*> Referer spam!
if ($anti_hammer['validate_referers']) {
$validated = validate_referer();
if (!$validated) {
die_spammer();
}
}
/*
fin
*/
function get_rhost() {
global $anti_hammer;
$anti_hammer['remote_host'] = '';
if ($anti_hammer['lookup_failures']) {
$anti_hammer['remote_host'] = gethostbyaddr($anti_hammer['remote_ip']).' ';
}
}
// write the updated hammer info to the fake/session file..
function WriteHammerData() {
global $anti_hammer;
if ($anti_hammer['use_php_sessions']) {
$_SESSION['anti_hammer']['last_request'] = $anti_hammer['session']['last_request'];
$_SESSION['anti_hammer']['hammer'] = $anti_hammer['session']['hammer'];
$_SESSION['anti_hammer']['cut_off'] = $anti_hammer['session']['cut_off'];
} else {
write_fake_session($GLOBALS['fake_sess_file'], $anti_hammer['session']);
}
}
/*
Append data to a file.
To wipe the file, pass true as the 3rd paramater.
*/
function add_log_data($file, $data, $wipe=false) {
// if it's not there, try to create it..
if (!file_exists($file)) $fp = fopen($file, 'wb');
if (!file_exists($file)) {
$GLOBALS['errors']['add_log_data'] = "can't create $file";
return $GLOBALS['errors']['add_log_data'];
}
if (strlen($data) < 4) {
$GLOBALS['errors']['add_log_data'] = "string '$data' too short! (need 4 or more characters)";
return $GLOBALS['errors']['add_log_data'];
}
$flag = 'ab';
if ($wipe) { $flag = 'wb'; }
if (is_writable($file)) {
$fp = fopen($file, $flag);
$lock = flock($fp, LOCK_EX);
if ($lock) {
fwrite($fp, $data);
flock ($fp, LOCK_UN);
} else {
$GLOBALS['errors']['add_log_data'] = "couldn't lock $file";
return $GLOBALS['errors']['add_log_data'];
}
fclose($fp);
} else {
$GLOBALS['errors']['add_log_data'] = "can't write to $file";
return $GLOBALS['errors']['add_log_data'];
}
}
// read serialized array data from a file, and return as an array..
function read_fake_session($no_cookie_file) {
if (file_exists($no_cookie_file)) {
$file_handle = fopen($no_cookie_file, 'rb');
if ($file_handle) {
$lock = flock($file_handle, LOCK_EX);
if ($lock) {
$file_contents = @fread($file_handle, filesize($no_cookie_file));
flock ($file_handle, LOCK_UN);
}
}
fclose($file_handle);
} else { return false; }
$file_contents = unserialize($file_contents);
if (is_array($file_contents)) {
return $file_contents;
}
}
// serialize an array and write the string data to a file..
function write_fake_session($no_cookie_file, $array) {
$data = serialize($array);
if (empty($data)) { return; }
$fp = @fopen($no_cookie_file, 'wb');
if ($fp) {
$lock = flock($fp, LOCK_EX);
if ($lock) {
fwrite($fp, $data);
flock ($fp, LOCK_UN);
}
fclose($fp);
clearstatcache();
return (1);
}
}
/*
CollectGarbage
You could transplant this into another web app fairly easily.
Useful stuf..
*/
function CollectGarbage($count_file, $limit, $prefix='HammerID_', $GC_age=12) {
if ($limit === 0) { return; }
if (ah_increment_hit_counter($count_file) >= $limit) {
$file_list = array();
if ($the_dir = @opendir(dirname($count_file))) {
while (false != ($file = readdir($the_dir))) {
if ((ord($file) != 46) and strpos($file, $prefix) === 0) {
$file_path = dirname($count_file).'/'.$file;
if (file_exists($file_path)) {
if (filemtime($file_path) < (time() - $GC_age*60*60)) {
@unlink($file_path);
}
}
}
}
}
ah_increment_hit_counter($count_file, 0, 1); // reset the counter
}
}//2do..
// Run this in another thread? Or maybe a simple http request, perhaps
// with $_GET, to flip Ant-Hammer to GC mode in the Background - this task
// could be done after the request is already sent, even simultaneously;
// there may be a *lot* of files in this directory.
//
// Having said that, it's *very* fast, and only runs once per 10,000 or so
// ($limit) hits.
//
/*
increment a counter()
from my "file-tools.php", available elsewhere.
*/
function ah_increment_hit_counter($count_file, $report_only=false, $reset=false) {
$count = false;
if (!file_exists($count_file) or $reset) {
$file_pointer = fopen($count_file, 'wb');
fwrite ($file_pointer, '0');
fclose ($file_pointer);
}
// now the counter..
if (file_exists($count_file)) {
// read in the old score..
$count = trim(file_get_contents($count_file));
if ($report_only) { return $count; }
if (!$count) { $count = 0; }
$count++;
// write out new score..
if (is_writable($count_file)) {
$file_pointer = fopen($count_file, 'wb+');
$lock = flock($file_pointer, LOCK_EX);
if ($lock) {
fwrite($file_pointer, $count);
flock ($file_pointer, LOCK_UN);
}
fclose($file_pointer);
clearstatcache();
}
}
return $count;
}
/*
Integers To English Words.
Converts 1145432 into..
"one million, one hundred and forty five thousand, four hundred and thirty two"
Fairly groovy. ;o)
The regular version is in my "text-func.php", with some other stuff.
*/
function ah_int2eng($number) {
$output = '';
if ($number < 1) $number = 1;
$GLOBALS['anti_hammer']['final_time'] = $number;
$units = array(' ', 'one ', 'two ', 'three ', 'four ', 'five ', 'six ', 'seven ', 'eight ', 'nine ');
$teens = array('ten ', 'eleven ', 'twelve ', 'thirteen ', 'fourteen ', 'fifteen ', 'sixteen ', 'seventeen ', 'eighteen ', 'nineteen ');
$tenners = array('', '', 'twenty ', 'thirty ', 'fourty ', 'fifty ', 'sixty ', 'seventy ', 'eighty ', 'ninety ');
$lint = strlen($number);
if ($lint > 2) $bigger = true;
for ($x = $lint ; $x >= 1 ; $x--) {
$last = substr($output, -5, 4);
$digit = substr($number, 0, 1);
$number = substr($number, 1);
if ($x % 3 == 2) {
if ($digit == 1) { // 10-19..
$digit = substr($number, 0, 1);
$number = substr($number, 1);
$x--;
if ($last == 'sand') { $output .= 'and '; }
$output .= $teens[$digit];
} else { // 20-99..
if (($last == 'sand') ) { $output .= 'and '; }
$output .= $tenners[$digit];
}
} else {
if (($x % 3 != 1) and ($digit > 0) and (!empty($output))) { $output .= ', '; }
$output .= $units[$digit];
}
if ((strlen($number) % 3) == 0) {
$bignum = ah_bignumbers(strlen($number) / 3);
if (($last == 'dred') and ($bignum != 'thousand')) { $output .= 'and ';}
$output .= $bignum;
}
if ((strlen($number) % 3) == 2 and $digit > 0) {
$output .= 'hundred and ';
}
}
// clean up the output..
$output = str_replace(' ', ' ', $output);
$output = str_replace('red and thou', 'red thou', $output);
$output = str_replace('red and mill', 'red mill', $output);
$output = str_replace('lion thousand', 'lion ', $output);
if (substr($output, -5) == ' and ') { $output = substr($output, 0, -5).' '; }
return $output;
}
/*
it just looks better, okay! */
function ah_bignumbers($test) {
switch ($test) {
case 0:
$test = "";
break;
case 1:
$test = "thousand";
break;
case 2:
$test = "million";
break;
case 3:
$test = "trillion"; // <- that's a lot of comments!
break;
}
return $test;
}
/*
function read_ini() [from my 'ini-tools.php']
pull the data from the ini file and return as an array
Usage: array (string {path to file})
returns false on failure.
*/
function read_bots_ini($data_file) {
$ini_array = array();
if (is_readable($data_file)) {
$file = file($data_file);
foreach($file as $conf) {
// if first real character isn't '#' or ';' and there is a '=' in the line..
if ( (substr(trim($conf),0,1) != '#')
and (substr(trim($conf),0,1) != ';')
and (substr_count($conf,'=') >= 1) ) {
$eq = strpos($conf, '=');
$ini_array[trim(substr($conf,0,$eq))] = trim(substr($conf, $eq + 1));
}
}
unset($file);
return $ini_array;
} else {
$GLOBALS['errors']['read_bots_ini'] = "ini file: $file does not exist.";
return false;
}
}
// Validate Referer against Referer lists..
// returns true for good referers, false for bad referers..
//
function validate_referer(){
global $anti_hammer;
// if referer is empty, move on..
if ($anti_hammer['referer'] == '') { return true; }
// or some token string (not URL - "none" is common), move on..
if (!$anti_hammer['domains_only'] and strlen($anti_hammer['referer']) < 11) { return true; } // "http://t.co" is the shortest domain (twits!)
if ($anti_hammer['domains_only'] and strlen($anti_hammer['referer']) < 4) { return true; } // "t.co" is the shortest domain (twits!)
// OK, we got a real(ish) URL..
// Parse the URL into "parts"..
$this_URLArray = parse_url($anti_hammer['referer']);
// check & SET ['host']
if (!isset($this_URLArray['host'])) { // something like "baddomain.com" or "baddomain/foobar"
$this_URLArray = parse_url('http://'.$this_URLArray['path']);
}
// An internal referer..
if ($this_URLArray['host'] == $anti_hammer['host']) { return true; }
// Validate the lists..
$black_is_go = false;
$white_is_go = false;
$banned_is_go = false;
if (isset($anti_hammer['black_list']) and !file_exists($anti_hammer['black_list']) and $anti_hammer['interrogate_referers']) {
add_log_data($anti_hammer['black_list'], ''); // create the file only
}
if (isset($anti_hammer['black_list']) and file_exists($anti_hammer['black_list'])) { $black_is_go = true; }
if (isset($anti_hammer['white_list']) and !file_exists($anti_hammer['white_list']) and $anti_hammer['interrogate_referers']) {
add_log_data($anti_hammer['white_list'], '');
}
if (isset($anti_hammer['white_list']) and file_exists($anti_hammer['white_list'])) { $white_is_go = true; }
if (isset($anti_hammer['banned_referers_list']) and file_exists($anti_hammer['banned_referers_list'])) { $banned_is_go = true; }
// Test domain name or entire URL?..
$test_referer = $anti_hammer['referer'];
if ($anti_hammer['domains_only']) { $test_referer = $this_URLArray['host']; }
// There's no white list, check black-list(s) right now..
if ($black_is_go and !$white_is_go) {
if (is_listed($anti_hammer['black_list'], $test_referer, $anti_hammer['domains_only'])) { return false; }
if ($banned_is_go) { if (is_listed($anti_hammer['banned_referers_list'], $test_referer, $anti_hammer['domains_only'])) { return false; } }
}
// Using two or three dynamic lists, check white-list first (slightly quicker for friendly referers)..
if ($white_is_go) {
if (is_listed($anti_hammer['white_list'], $test_referer, $anti_hammer['domains_only'])) { return true; }
if ($black_is_go) { if (is_listed($anti_hammer['black_list'], $test_referer, $anti_hammer['domains_only'])) { return false; } }
if ($banned_is_go) { if (is_listed($anti_hammer['banned_referers_list'], $test_referer, $anti_hammer['domains_only'])) { return false; } }
}
// Still here, must be a NEW REFERER..
// Shall we Interrogate them???
if ($anti_hammer['interrogate_referers']) {
$list_pre = '';
$list_add = @$this_URLArray['path'];
// (re)set wonky prefs..
if ($anti_hammer['time_out'] < 1 or $anti_hammer['time_out'] > 120) { $anti_hammer['time_out'] = 10; }
ini_set('default_socket_timeout', $anti_hammer['time_out']);
// fetch contents of referring page..
if ($anti_hammer['max_referer_get'] < 32) { $anti_hammer['max_referer_get'] = 32; }
$ref_contents = @file_get_contents($anti_hammer['referer'], FALSE, NULL, 32, $anti_hammer['max_referer_get']);
// checking domain or full URL?..
if ($anti_hammer['domains_only']) {
$list_pre = '';
$list_add = '';
} else { // I don't like elseif!
if ($anti_hammer['record_scheme']) {
if (isset($this_URLArray['scheme'])) {
$list_pre = $this_URLArray['scheme'].'://';
} else {
$list_pre = 'http://';
}
}
}
// check page data for back-links..
// a link to our page exists on their page, add this referer to the white-list and continue..
if (link_in_page($ref_contents, $anti_hammer['uri'], $anti_hammer['link_check_accuracy'])) {
if ($white_is_go) {
add_log_data($anti_hammer['white_list'], $list_pre.$this_URLArray['host'].$list_add."\n");
}
return true;
} else {
// no link back to our site, add this referer to the black-list and end page here.
if ($black_is_go) {
add_log_data($anti_hammer['black_list'], $list_pre.$this_URLArray['host'].$list_add."\n");
}
if ($anti_hammer['black_log']) {
add_log_data($anti_hammer['black_log'], create_log_data(true, true));
}
return false;
}
}
}
function link_in_page($page_string, $link_test, $accuracy=1) {
switch ($accuracy) {
case 2: // Method 2.. (more accurate than Method 1, twice as fast as method 3)
$page_links = explode( 'href', $page_string); //, $link_limit) // limit set by GET
$plc = count($page_links);
for ($i=1; $i < $plc; $i++) { // omit first and last elements
if (strpos(substr($page_links[$i], 1, stripos($page_links[$i], '>') - 1), $link_test)
or strpos(substr($page_links[$i], 1, stripos($page_links[$i], '>') - 1), urlencode($link_test))) { return true; }
}
break;
case 3: // Method 3.. (most accurate, slowest method)
preg_match_all("/a[\s]+[^>]*?href[\s]?=[\s\"\']+(.*?)[\"\']+.*?>([^<]+|.*?)?<\/a>/", $page_string, $matches);
$matches = $matches[1];
foreach($matches as $var) {
if ($var == $link_test or $var == urlencode($link_test)) { return true; }
}
break;
default: // Default Method.. (fastest, least accurate method)
if (stripos($page_string, $link_test) or stripos($page_string, urlencode($link_test))) { return true; }
}
return false;
}
// Validate URL against Banned URLs list..
//
function validate_url(){
global $anti_hammer;
if (file_exists($anti_hammer['banned_urls_list'])) {
if (is_listed($anti_hammer['banned_urls_list'], urldecode($anti_hammer['request']))) {
$msg = '';
if (isset($anti_hammer['bad_url_message'])) {
$msg = $anti_hammer['bad_url_message'];
}
send_501_kill($msg);
}
}
}
// Validate User Agent against Banned User Agents list..
//
function validate_agent(){
global $anti_hammer;
if (file_exists($anti_hammer['banned_agents_list'])) {
if (is_listed_regex($anti_hammer['banned_agents_list'], $anti_hammer['user_agent'])) {
$msg = '';
if (isset($anti_hammer['bad_agent_message'])) {
$msg = $anti_hammer['bad_agent_message'];
}
send_403_kill($msg);
}
}
}
// Validate IP Address against Banned IP Addresses list..
//
function validate_ip(){
global $anti_hammer;
$ip_array = AH_GrabListFileIntoArray($anti_hammer['banned_ip_list']);
if (!is_array($ip_array)) { return false; }
foreach($ip_array as $banned_ip) {
if (@strpos($anti_hammer['remote_ip'], trim($banned_ip)) === 0) {
// if (ipCheck($anti_hammer['remote_ip'], trim($banned_ip))) {
$msg = '';
if (isset($anti_hammer['bad_ip_message'])) {
$msg = $anti_hammer['bad_ip_message'];
}
send_403_kill($msg);
}
}
return;
}
//// with thanks to claudiu at cnixs dot com
//function ipCheck ($IP, $CIDR) {
// list ($net, $mask) = split ("/", $CIDR);
// $ip_net = ip2long ($net);
// $ip_mask = ~((1 << (32 - $mask)) - 1);
// $ip_ip = ip2long ($IP);
// $ip_ip_net = $ip_ip & $ip_mask;
// return ($ip_ip_net == $ip_net);
//}
// returns true if there is a matching entry in a given list, otherwise false.
//
function is_listed($list_array_file, $test_ref, $only_domains=false) {
$list_array = AH_GrabListFileIntoArray($list_array_file);
if (!is_array($list_array)) { return false; }
$pos = false; // only need to set once, for new files.
foreach ($list_array as $list_ref) {
$list_ref = trim($list_ref);
if ($list_ref) {
$pos = stripos($test_ref, $list_ref); // not FALSE if the list string is IN the test referer string
if ($only_domains) {
// should match right at the start === 0
if ($pos === 0) {
return true;
}
} else {
// matching anywhere is fine.. (no match equals EXACTLY false)
if ($pos !== false) {
return true;
}
}
}
}
return false;
}
// Used to validate User Agent Strings.
//
// Returns true if there is a matching entry in a given list, otherwise false.
// NOTE: You can use regular expression in the entries.
//
function is_listed_regex($list_array_file, $test_ref) {
$list_array = AH_GrabListFileIntoArray($list_array_file);
if (!is_array($list_array)) { return false; }
foreach ($list_array as $list_ref) {
$list_ref = trim($list_ref);
if ($list_ref) {
if (@preg_match("/$list_ref/i", $test_ref)) {
return true;
}
}
}
return false;
}
// file() has improved!
function AH_GrabListFileIntoArray($list_file) {
if (!file_exists($list_file)) { return ''; }
return file($list_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
}
function die_spammer() {
global $anti_hammer;
$anti_hammer['uri'] = str_replace($anti_hammer['chop'], '', $anti_hammer['uri']);
$die_msg = '<!DOCTYPE HTML SYSTEM><html>
<head><title>'.$anti_hammer['title'].'</title>
<style type="text/css" media="screen">
/*<![CDATA[*/
<!--
body {
margin: 3rem 4rem;
background: #FF9100;
color: #330000;
font-family:Tahoma, sans-serif;
}
a:link, a:visited, a:hover, a:active {
color: #330000;
text-decoration: none;
}
a:hover { text-decoration: underline; }
pre { font-size: small; }
//-->
/*]]>*/
</style></head>
<body>
'.$anti_hammer['bad_referer_msg'].'<br />
<pre><a href="https://corz.org/server/tools/anti-hammer/" title="Ban Web Site Hammers, protect your valuable resources for /real/ clients.">'.
'Anti-Hammer</a>\'s Referer Spam Protection prevented access to: <a href="'.$anti_hammer['uri'].'">'.$anti_hammer['uri'].'</a></pre><br />
<br />';
// you may want to insert some google ads or something here..
if (file_exists($anti_hammer['die_insert'])) {
ob_start();
include $anti_hammer['die_insert'];
$die_msg .= ob_get_clean();
}
$die_msg .= '
</body>
</html>';
send_503_kill($die_msg, 1);
}
function create_log_data($log_hammer=true, $ref_first=false) {
global $anti_hammer;
$log_data = '';
if ($ref_first) {
$log_data .= "referer:\t".$anti_hammer['referer']."\n";
}
$log_data .= "page: \t".$anti_hammer['request']."\n";
if ($log_hammer) {
$log_data .= "time: \t".date('Y.m.d h:i:s A')."\t".'ID: '.$anti_hammer['client_id']."\t"."x ".$anti_hammer['session']['hammer']."\n";
}
get_rhost();
$log_data .=
"visitor:\t".$anti_hammer['remote_host'].'['.$anti_hammer['remote_ip'].']'."\t"."(".$anti_hammer['user_agent'].")"."\n"
."accepts:\t".$anti_hammer['accepts']."\n";
if (!$ref_first) {
$log_data .= "referer:\t".$anti_hammer['referer']."\n";
}
return $log_data."\n";
}
// send a Service Temporary Unavailable header..
// $ra_time (Retry-After) time in seconds.
function send_503_kill($msg='', $ra_time=1) {
global $anti_hammer;
header($_SERVER['SERVER_PROTOCOL'].' 503 Service Temporarily Unavailable');
header('Status: 503 Service Temporarily Unavailable');
if ($ra_time > 0) header('Retry-After: '.$ra_time);
header('Connection: Close');
die($msg);
}
function send_501_kill($msg='') {
header($_SERVER['SERVER_PROTOCOL'].' 501 Not Implemented');
header('Status: 501 Not Implemented');
header('Content-Type: text/plain');
header('Connection: Close');
die($msg);
}
function send_403_kill($msg='') {
header($_SERVER['SERVER_PROTOCOL'].' 403 Forbidden');
header('Status: 403 Forbidden');
header('Content-Type: text/plain');
header('Connection: Close');
if (!$msg) { $msg = "Due to abuse, possibly by a web 'sucker', \nthis request has been denied, sorree.\n\nIn other words, go away quickly!"; }
die($msg);
}
function AH_stop_watch() {
if (func_num_args() > 0) { $press = func_get_arg(0); }
else { $press = false; }
static $start_time;
$time = array_sum(explode(' ', microtime(true)));
if (!empty($press)) {
$start_time = $time;
return ($start_time);
} else {
return ($time - $start_time);
}
}
// direct access....
function direct() {
return 'This script is designed to run as a php auto-prepend, like so (in .htaccess)..<br /><br />
<code>php_value auto_prepend_file "/real/full/server/path/to/anti-hammer.php"</code><br /><br />
If you are running PHP as a CGI, put a line in your local php.ini/user.ini file, instead:<br /><br />
<code>auto_prepend_file = "/real/full/server/path/to/anti-hammer.php"</code>';
}
?>