PHP Classes
elePHPant
Icontem

File: example.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Mudessir Medgitov  >  Magic HTML Parser  >  example.php  >  Download  
File: example.php
Role: Example script
Content type: text/plain
Description: example
Class: Magic HTML Parser
Parse HTML documents and extract keywords
Author: By
Last change: made it more understandly
Date: 6 years ago
Size: 3,397 bytes
 

Contents

Class file image Download
<?
   
require 'class.Html.php';
   
   
$url = 'http://www.yahoo.com/';
   
   
$objHtmlParser = new Html($url);
                   
                   
   
$description = '';
   
$objHtmlParser->Clean();
   
   
//here html page will be parsed into tree structure
    //tree will be saved at $objHtmlParser->tree variable
    //but this tree is not useful still
   
$objHtmlParser->Parse(&$description);
   
   
//taking all needed tags from this tree
    //will return us more useful list of tags
   
$all_tags = array();
   
$objHtmlParser->FindAllTags($objHtmlParser->tree,&$all_tags);
   
   
//collect phrases with 1,2,3 or 4 keywords
    //add more if you need analyze long phrases
   
$stat_prepare = array(1,2,3,4);

   
   
// here how we can use $all_tags array now
   
$charset = $all_tags['meta content-type'][0]['props']['charset'];
   
   
//
    /**
     * this is keyword density functions
     * it will return sorted array with all keyword phrases
     * this will return us array with following structure:
     * Array(
     * [1]=>Array( - phrases with 1 keyword
     * [yahoo]=>Array( - tags where phrase was found
     * [title]=>Array(
     * [count]=>1 - how much repeats current phrase in this tag
     * [text]=>Yahoo!
     * )
     * [__total__] => 20, - total repeats in whole page
     * .................................
                                           
     * )
     * .........................
     * ),
     * [2] => Array( - phrases with 2 keywords
     * [real estate] => Array
                                            ( - tags where phrase was found
                                                [a] => Array
                                                    (
                                                        [count] => 2
                                                        [text] => Real Estate; Yahoo! Real Estate -
                                                    )
                           
                                                [__total__] => 2,
                                                ............................
                                            ),
            ........................

     * )
     *
     */
   
   
   
$words_stat = $objHtmlParser->KeywordsDensity($all_tags,$stat_prepare);

   
// $words_stat having not needed phrases still.
    // lets delete phrases which not repeated less than 2 times
   
$words_top_notsorted = $objHtmlParser->deleteMinEntries($words_stat,2);
   
   
// lets sort phrases by __total__(total repeats in page) value
   
foreach ($words_stat as $key=>$val){
       
$total = $words_stat[$key]['__total__'];
       
$words_stat[$key] = $objHtmlParser->SortWordsSataistic($words_stat[$key],2);
       
$words_stat[$key]['__total__'] = $total;
    }
   
   
$charset_to = "UTF-8";
   
   
// here we have good sorted words
   
print_r($words_stat);
   
   
//converting encode if was used some chines or any other chracters
   
$words_stat = $objHtmlParser->ConvertEncoding($words_stat,$charset,$charset_to);
   
$words_top_notsorted = $objHtmlParser->ConvertEncoding($words_top_notsorted,$charset,$charset_to);
   
   
// here is example how to take another data from html
   
$title = $objHtmlParser->helpConvertEncoding($all_tags['title'][0]['text'],$charset,$charset_to);
   
$keywords = $objHtmlParser->helpConvertEncoding($all_tags['meta keywords'][0]['text'],$charset,$charset_to);
   
$desc = $objHtmlParser->helpConvertEncoding($all_tags['meta description'][0]['text'],$charset,$charset_to);
   
$original_text = $objHtmlParser->helpConvertEncoding($objHtmlParser->original_text,$charset,$charset_to);
   
   
   
?>