Login   Register  
PHP Classes
elePHPant
Icontem

File: example.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Mudessir Medgitov  >  Magic HTML Parser  >  example.php  >  Download  
File: example.php
Role: Example script
Content type: text/plain
Description: example
Class: Magic HTML Parser
Parse HTML documents and extract keywords
Author: By
Last change: made it more understandly
Date: 5 years ago
Size: 3,397 bytes
 

Contents

Class file image Download
<?
    
require 'class.Html.php';
    
    
$url 'http://www.yahoo.com/';
    
    
$objHtmlParser = new Html($url);
                    
                    
    
$description '';
    
$objHtmlParser->Clean();
    
    
//here html page will be parsed into tree structure
    //tree will be saved at $objHtmlParser->tree variable
    //but this tree is not useful still 
    
$objHtmlParser->Parse(&$description);
    
    
//taking all needed tags from this tree 
    //will return us more useful list of tags
    
$all_tags = array();
    
$objHtmlParser->FindAllTags($objHtmlParser->tree,&$all_tags);
    
    
//collect phrases with 1,2,3 or 4 keywords
    //add more if you need analyze long phrases
    
$stat_prepare = array(1,2,3,4);

    
    
// here how we can use $all_tags array now
    
$charset $all_tags['meta content-type'][0]['props']['charset'];
    
    
// 
    /**
     * this is keyword density functions
     * it will return sorted array with all keyword phrases
     * this will return us array with following structure:
     * Array(
     *         [1]=>Array( - phrases with 1 keyword
     *                     [yahoo]=>Array( - tags where phrase was found
     *                                         [title]=>Array(
     *                                                         [count]=>1 - how much repeats current phrase in this tag
     *                                                         [text]=>Yahoo!
     *                                                         )
     *                                        [__total__] => 20, - total repeats in whole page
     *                                        .................................
                                            
     *                                     )
     *                    .........................
     *                     ),
     *         [2] => Array( - phrases with 2 keywords
     *                       [real estate] => Array
                                            (  - tags where phrase was found
                                                [a] => Array
                                                    (
                                                        [count] => 2
                                                        [text] => Real Estate; Yahoo! Real Estate -
                                                    )
                            
                                                [__total__] => 2,
                                                ............................
                                            ),
            ........................                                          

     * )
     * 
     */
    
    
    
$words_stat $objHtmlParser->KeywordsDensity($all_tags,$stat_prepare);

    
// $words_stat having not needed phrases still. 
    // lets delete phrases which not repeated less than 2 times
    
$words_top_notsorted $objHtmlParser->deleteMinEntries($words_stat,2);
    
    
// lets sort phrases by __total__(total repeats in page) value 
    
foreach ($words_stat as $key=>$val){
        
$total $words_stat[$key]['__total__'];
        
$words_stat[$key] = $objHtmlParser->SortWordsSataistic($words_stat[$key],2);
        
$words_stat[$key]['__total__'] = $total;
    }
    
    
$charset_to "UTF-8";
    
    
// here we have good sorted words
    
print_r($words_stat);
    
    
//converting encode if was used some chines or any other chracters
    
$words_stat $objHtmlParser->ConvertEncoding($words_stat,$charset,$charset_to);
    
$words_top_notsorted $objHtmlParser->ConvertEncoding($words_top_notsorted,$charset,$charset_to);
    
    
// here is example how to take another data from html
    
$title $objHtmlParser->helpConvertEncoding($all_tags['title'][0]['text'],$charset,$charset_to);
    
$keywords $objHtmlParser->helpConvertEncoding($all_tags['meta keywords'][0]['text'],$charset,$charset_to);
    
$desc $objHtmlParser->helpConvertEncoding($all_tags['meta description'][0]['text'],$charset,$charset_to);
    
$original_text $objHtmlParser->helpConvertEncoding($objHtmlParser->original_text,$charset,$charset_to);
    
    
    
?>