PHP Classes
elePHPant
Icontem

File: example.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Rizwan Abbas  >  PHP OLX Classifieds Scraper  >  example.php  >  Download  
File: example.php
Role: Example script
Content type: text/plain
Description: Example file
Class: PHP OLX Classifieds Scraper
Extract data of classifieds published in OLX sites
Author: By
Last change: unexpected $end issue fixed and tested
Date: 2 months ago
Size: 2,045 bytes
 

Contents

Class file image Download
<?php
/* Autho: Rizwan Abbas rizwan@zeropoint.it
 * Organization: zeropoint.it
 * This script crawls the data from OLX only
 * Currently it targets cars category and all its html elements
 *
 */

ini_set('max_execution_time', 0);
error_reporting(E_ALL);

include_once(
'simple_html_dom.php');//open source class
include_once("scraper.php");
function
crawl($searchURL,$p)
{
   
$collection = array();
   
$time = time();
   
$portals = 0;
   
$o=0;
   
$p=0;
   
$g=0;
   
$html = file_get_html($searchURL);
   
$list = $html->find('td[class=offer onclick]');
    foreach(
$list as $ad) {
   
       
$url = $ad->find('a[class=marginright5 link linkWithHash detailsLink]',0)->href;
        if(
$url!=''){
            echo
"\n\n $url \n\n";
           
$data = getDetails($url);
            if(
sizeof($data)<=0){
                continue;
            }
           
$data['attributes']['City'] = trim($ad->find('small[class=breadcrumb small] >span',0)->innertext);
           
$category = explode('<span>',trim($ad->find('small[class=breadcrumb small]',0)->innertext));
           
$category = explode(' ',$category[0]);
           
$data['attributes']['Category'] = trim($category['0']);
           
$data['carinfo']['Category'] = trim($category['0']);
           
$data['carinfo']['Model'] = 'not mentioned';
           
$data['carinfo']['City'] = trim($data['attributes']['City']);
           
$data['attributes']['Manufacturer'] = trim($category['1']);
       
           
$collection[] = $data;
           
//die();
       
}
    }
    return
$collection;
}
//crawl

        //crawl only olx
       
$categories = array('cars','motorcycles','scooters','bicycles','commercial-vehicles','parts-accessories','other-vehicles');
       
$portal = "olx";
       
$pages= 2;
       
$category ="vehicles";
       
$start = date('h:i:s',time());
        for(
$i=1;$i<=$pages;$i++){
            if(
$i==1){
               
$searchURL = "http://olx.com.pk/$category/";
            }else{
               
$searchURL = "http://olx.com.pk/$category/?page=".$i;
            }
           
           
$collection = crawl($searchURL,$portal);
            print
"<pre>";
           
print_r($collection);
        }
   
$end = date('h:i:s',time());
    print
"Started At:".$start;
    print
"Ended At:".$end;

?>