<?php
/*
*** robots.txt processing class
* Author: IceDragon of QuickFox.org
* http://www.icerealm.org/
*
* Feel free to modify/use for any purpose.
*
* Change Log:
* 1.0.0 [20090704] - Initial release.
*/
class Robots {
/** Members **/
public $gUserAgent = NULL;
public $gRules = array();
/** Constructor **/
// Class constructor - optionally accepts a path to preload robots.txt
// exclusion rules from.
public function __construct( $path = NULL, $userAgent = FALSE )
{
if( $userAgent )
$this->SetUserAgent( $userAgent );
if( $path != NULL )
$this->Load( $path );
}
/** Methods **/
// This function loads a robots.txt file from a specific path and stores
// the exclusion rules in the $rules method. Use this to prime the class
// with data.
public function Load( $path )
{
$fd = fopen( $path, 'r' );
if( !$fd )
throw new Exception("Unable to open path `$path`");
$user_agent = "*";
{
// Read line and check if we've reached an EOF.
continue;
// Disregard comments or empty lines.
if( $line == "" || $line[0] == "#" )
continue;
// UserAgent change.
{
{
$existing_rules = $this->gRules[$user_agent];
$disallowed = array_merge( $existing_rules, $disallowed );
}
$this->gRules[$user_agent] = $disallowed;
continue;
}
// Disallow rule.
if( $split_line[1][0] != "#" )
}
// Add the last entry.
{
$existing_rules = $this->gRules[$user_agent];
$disallowed = array_merge( $existing_rules, $disallowed );
}
$this->gRules[$user_agent] = $disallowed;
} // Load()
// This function sets the UserAgent to match URLs against.
public function SetUserAgent( $userAgent )
{
$this->gUserAgent = $userAgent;
} // SetUserAgent()
// This function verifies if the current UserAgent is allowed to access the
// specific URL. Returns TRUE if allowed, FALSE otherwise.
// NOTE: Do not specify the full URL (http://...), only the part after the
// domain! i.e.: IsAllowed("/robots.txt");
public function IsAllowed( $url )
{
// Locate a matching UserAgent string from the list.
foreach( $this->gRules as $user_agent => $rules )
{
if( $user_agent != "*" && strstr( strtolower($this->gUserAgent), $user_agent ) == FALSE )
continue;
foreach( $rules as $str )
if( $this->_StartsWith( $url, $str ) )
return FALSE;
}
return TRUE;
} // IsAllowed()
// Check if a string starts with a substring.
private function _StartsWith( $str, $subStr )
{
} // _StartsWith()
} // class Robots
?>