Login   Register  
PHP Classes
elePHPant
Icontem

File: engine/handler.robots.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Kristo Vaher  >  Wave Framework  >  engine/handler.robots.php  >  Download  
File: engine/handler.robots.php
Role: Application script
Content type: text/plain
Description: Robots Handler
Class: Wave Framework
MVC framework for building Web sites and APIs
Author: By
Last change: It is now possible to return picture files and resource files as BASE64 encoded if you provide 'base64' parameter as a dynamic URL parameter. Added 'url-absolute' and 'url-relative' to the already existing 'request-url' variables that the default URL Controller returns to the View. Added 'output' return type that simply returns the whatever is stored in response arrays 'www-data' key. When using getState() method on a variable that does not exist, then it generates a notice in debugger log. Added an option to restrict URL's based on user agent being matched, this allows to create mobile specific redirects and other user agent specific features. Some State variable names have been changed to clean up the State namespace, this can break backwards compatibility. Chech the full list at update documentation. Updated jQuery to 1.9.0 release. Added isDeveloper() call and developer-ip and developer-user-agent configuration settings. Fixed a bug with Javascript API Wrapper parallel form requests.
Date: 2013-01-30 05:38
Size: 6,914 bytes
 

Contents

Class file image Download
<?php
/**
 * Wave Framework <http://www.waveframework.com>
 * Robots Handler
 *
 * Robots Handler is used to return robots.txt files, if a request is made to such a file. This 
 * handler either returns the existing /robots.txt file, or generates a new one that allows 
 * all-access to robots. Robot directives for search engines and other crawlers are actually 
 * stored on files and pages themselves, so it is not needed to specifically allow or deny 
 * anything through robots.txt file. Robots Handler also pinpoints to sitemap.xml file.
 *
 * @package    Index Gateway
 * @author     Kristo Vaher <kristo@waher.net>
 * @copyright  Copyright (c) 2012, Kristo Vaher
 * @license    GNU Lesser General Public License Version 3
 * @tutorial   /doc/pages/handler_robots.htm
 * @since      1.5.0
 * @version    3.5.0
 */
// INITIALIZATION
    // Stopping all requests that did not come from Index Gateway
    
if(!isset($resourceAddress)){
        
header('HTTP/1.1 403 Forbidden');
        die();
    }
    
// Robots.txt file is always returned in plain text format
    
header('Content-Type: text/plain;charset=utf-8;');
    
    
// This flag stores whether cache was used
    
$cacheUsed=false;
    
// Default cache timeout of one month, unless timeout is set
    
if(!isset($config['robots-cache-timeout'])){
        
$config['robots-cache-timeout']=14400// Four hours
    
}
// GENERATING ROBOTS FILE
    // Robots file is generated only if it does not exist in root
    
if(!file_exists(__ROOT__.'robots.txt')){
    
        
// ASSIGNING PARAMETERS FROM REQUEST
            // If filename includes & symbol, then system assumes it should be dynamically generated
            
$parameters=array_unique(explode('&',$resourceFile));
            
// Looking for cache
            
$cacheFilename=md5('robots.txt'.$config['version'].$resourceRequest).'.tmp';
            
$cacheDirectory=__ROOT__.'filesystem'.DIRECTORY_SEPARATOR.'cache'.DIRECTORY_SEPARATOR.'resources'.DIRECTORY_SEPARATOR.substr($cacheFilename,0,2).DIRECTORY_SEPARATOR;
            
// If cache file exists then cache modified is considered that time
            
if(file_exists($cacheDirectory.$cacheFilename)){
                
$lastModified=filemtime($cacheDirectory.$cacheFilename);
            } else {
                
// Otherwise it is server request time
                
$lastModified=$_SERVER['REQUEST_TIME'];
            }
            
        
// GENERATING NEW ROBOTS FILE OR LOADING FROM CACHE
            // If robots cannot be found from cache, it is generated
            
if(in_array('nocache',$parameters) || ($lastModified==$_SERVER['REQUEST_TIME'] || $lastModified<($_SERVER['REQUEST_TIME']-$config['robots-cache-timeout']))){
            
                
// STATE AND DATABASE
                
                    // State stores a lot of settings that are taken into account during Sitemap generation
                    
require(__ROOT__.'engine'.DIRECTORY_SEPARATOR.'class.www-state.php');
                    
$state=new WWW_State($config);
                    
// Connecting to database, if configuration is set
                    // Uncomment this if you actually need to use database connection for robots.txt file
                    // if(isset($config['database-name']) && $config['database-name']!='' && isset($config['database-type']) && isset($config['database-host']) && isset($config['database-username']) && isset($config['database-password'])){
                        // require(__ROOT__.'engine'.DIRECTORY_SEPARATOR.'class.www-database.php');
                        // $databaseConnection=new WWW_Database($config['database-type'],$config['database-host'],$config['database-name'],$config['database-username'],$config['database-password'],((isset($config['database-errors']))?$config['database-errors']:false),((isset($config['database-persistent']))?$config['database-persistent']:false));
                    // }
                    
                // GENERATING ROBOTS STRING 
                
                    // Robots string is stored here
                    
$robots='';
                    
$robots.='User-agent: *'."\n";
                    
$robots.='Disallow: '."\n";
                    
$robots.='Sitemap: '.((isset($config['limiter-https']) && $config['limiter-https']==true)?'https://':'http://').$_SERVER['HTTP_HOST'].$state->data['url-web'].'sitemap.xml';
                    
                
// WRITING TO CACHE
            
                    // Resource cache is cached in subdirectories, if directory does not exist then it is created
                    
if(!is_dir($cacheDirectory)){
                        if(!
mkdir($cacheDirectory,0755)){
                            
trigger_error('Cannot create cache folder',E_USER_ERROR);
                        }
                    }
                    
// Data is written to cache file
                    
if(!file_put_contents($cacheDirectory.$cacheFilename,$robots)){
                        
trigger_error('Cannot create resource cache',E_USER_ERROR);
                    }
            
            } else {
                
// Setting the flag for logger
                
$cacheUsed=true;
            }
            
        
// HEADERS
        
            // If cache is used, then proper headers will be sent
            
if(in_array('nocache',$parameters)){
                
// user agent is told to cache these results for set duration
                
header('Cache-Control: no-cache,no-store');
                
header('Expires: '.gmdate('D, d M Y H:i:s',$_SERVER['REQUEST_TIME']).' GMT');
                
header('Last-Modified: '.gmdate('D, d M Y H:i:s',$lastModified).' GMT');
            } else {
                
// user agent is told to cache these results for set duration
                
header('Cache-Control: public,max-age='.$config['robots-cache-timeout']);
                
header('Expires: '.gmdate('D, d M Y H:i:s',($_SERVER['REQUEST_TIME']+$config['robots-cache-timeout'])).' GMT');
                
header('Last-Modified: '.gmdate('D, d M Y H:i:s',$lastModified).' GMT');
            }
            
            
// Content length of the file
            
$contentLength=filesize($cacheDirectory.$cacheFilename);
            
// Content length is defined that can speed up website requests, letting user agent to determine file size
            
header('Content-Length: '.$contentLength);
            
        
// OUTPUT
            // Returning the file to user agent
            
readfile($cacheDirectory.$cacheFilename);
            
// File is deleted if cache was requested to be off
            
if(in_array('nocache',$parameters)){
                
unlink($cacheDirectory.$cacheFilename);
            }
        
    } else {
        
        
// RETURNING EXISTING ROBOTS FILE
        
            // This is technically considered as using cache
            
$cacheUsed=true;
            
            
// Cache headers
            
header('Cache-Control: public,max-age='.$config['robots-cache-timeout']);
            
header('Expires: '.gmdate('D, d M Y H:i:s',($_SERVER['REQUEST_TIME']+$config['robots-cache-timeout'])).' GMT');
            
// Last modified header
            
header('Last-Modified: '.gmdate('D, d M Y H:i:s',filemtime(__ROOT__.'robots.txt')).' GMT');
            
// Content length of the file
            
$contentLength=filesize(__ROOT__.'robots.txt');
            
// Content length is defined that can speed up website requests, letting user agent to determine file size
            
header('Content-Length: '.$contentLength);
            
// Since robots.txt did exist in root, it is simply returned
            
readfile(__ROOT__.'robots.txt');
    }
    
// WRITING TO LOG
    // If Logger is defined then request is logged and can be used for performance review later
    
if(isset($logger)){
        
// Assigning custom log data to logger
        
$logger->setCustomLogData(array('category'=>'robots','cache-used'=>$cacheUsed,'content-length-used'=>$contentLength,'database-query-count'=>((isset($databaseConnection))?$databaseConnection->queryCounter:0)));
        
// Writing log entry
        
$logger->writeLog();
    }
?>