PHP Classes

File: html_info.class.php

Recommend this page to a friend!
  Classes of Sven Wagener   HTML Info   html_info.class.php   Download  
File: html_info.class.php
Role: Class source
Content type: text/plain
Description: The basic class file
Class: HTML Info
Geting information about an HTML Site
Author: By
Last change: Changed mail address
Date: 20 years ago
Size: 9,948 bytes
 

Contents

Class file image Download
<?php
/**
* Class for getting general informations about html content
* @author Sven Wagener <wagener_at_indot_dot_de>
* @include Funktion:_include_
*/
class html_info{
   
    var
$string="";
    var
$meta="";
   
   
   
/**
    * Constructor of class html_info
    * @param string $html_string The whole HTML document as String
    * @desc Constructor of class html_info
    */
   
function html_info($html_string){
       
$this->string=$html_string;
    }
   
   
/**
    * Returns the title
    * @return string $title the title of the HTML document
    * @desc Constructor of class html_info
    */
   
function get_title(){
       
$string=strtolower($this->string);
       
preg_match_all("|<title>(.*)</title>|U",$string,$matches, PREG_PATTERN_ORDER);
       
        return
$matches[1][0];
    }
   
   
/**
    * Returns the meta data
    * @return array $matches the title of the HTML document
    * @desc Returns the meta data of the HTML document in an array ($matches[$i]['name'] and $matches[$i]['content'])
    */
   
function get_meta_data(){
       
$string=strtolower($this->string);
       
preg_match_all("|<meta (.*)>|U",$string,$matches, PREG_PATTERN_ORDER);
       
       
$k=0;
       
$tmp_match_array="";
       
       
// Putting all matches in an array
       
for($i=0;$i<count($matches);$i++){
            for(
$j=0;$j<count($matches[$i]);$j++){
                if(
$matches[$i][$j]!=""){
                   
$tmp_match_array[$k]=$matches[$i][$j];
                   
$k++;
                }
            }
        }
       
       
$matches="";
       
       
// Getting detailed information of meta data and putting in array
       
$k=0;
        for(
$i=0;$i<count($tmp_match_array);$i++){
           
           
// Getting name
           
preg_match_all("|name\=\"(.*)\" |U",$tmp_match_array[$i],$name_matches, PREG_PATTERN_ORDER);
           
// Checking if entry not exists
           
$found=false;
            for(
$j=0;$j<count($matches);$j++){
                if(
$matches[$j]['name']==$name_matches[1][0]){
                   
$found=true;
                }
            }
            if(!
$found && $name_matches[1][0]!=""){
               
$matches[$k]['name']=$name_matches[1][0];
               
               
// Getting content
               
preg_match_all("|content\=\"(.*)\"|U",$tmp_match_array[$i],$content_matches, PREG_PATTERN_ORDER);
               
$matches[$k]['content']=$content_matches[1][0];
               
$k++;
            }
        }
       
       
$this->meta=$matches;
        return
$matches;
    }
   
   
/**
    * Returns all images
    * @return array $match the pictures and all information in an array
    * @desc Returns all images in an array ($match[$i]['src'], $match[$i]['alt'], $match[$i]['width'] and $match[$i]['height'])
    */
   
function get_images(){
       
$string=strtolower($this->string);
       
preg_match_all("|<img (.*)>|U",$string,$matches, PREG_PATTERN_ORDER);
       
       
// Putting all matches in an array
       
for($i=0;$i<count($matches);$i++){
            for(
$j=0;$j<count($matches[$i]);$j++){
                if(
$matches[$i][$j]!=""){
                   
$tmp_match_array[$k]=$matches[$i][$j];
                   
$k++;
                }
            }
        }
       
$k=0;
        for(
$i=0;$i<count($tmp_match_array);$i++){
           
$found=false;
            for(
$j=0;$j<count($match);$j++){
                if(
$this->get_tag_param("src",$tmp_match_array[$i])==$match[$j]['src']){
                   
$found=true;
                }
            }
            if(!
$found && $this->get_tag_param("src",$tmp_match_array[$i])!=""){
               
$match[$k]['src']=$this->get_tag_param("src",$tmp_match_array[$i]);
               
$match[$k]['alt']=$this->get_tag_param("alt",$tmp_match_array[$i]);
               
$match[$k]['width']=$this->get_tag_param("width",$tmp_match_array[$i]);
               
$match[$k]['height']=$this->get_tag_param("height",$tmp_match_array[$i]);
               
$k++;
            }
        }
       
        return
$match;
    }
   
   
/**
    * Returns all links
    * @return array $match the links and all information in an array
    * @desc Returns all links in an array ($match[$i]['href'] and $match[$i]['target'])
    */
   
function get_links(){
       
$string=strtolower($this->string);
       
preg_match_all("|<a (.*)>|U",$string,$matches, PREG_PATTERN_ORDER);
       
       
// Putting all matches in an array
       
for($i=0;$i<count($matches);$i++){
            for(
$j=0;$j<count($matches[$i]);$j++){
                if(
$matches[$i][$j]!=""){
                   
$tmp_match_array[$k]=$matches[$i][$j];
                   
// echo $tmp_match_array[$k]."<br>\n";
                   
$k++;
                }
            }
        }
       
       
$k=0;
        for(
$i=0;$i<count($tmp_match_array);$i++){
           
$found=false;
            for(
$j=0;$j<count($match);$j++){
                if(
$this->get_tag_param("href",$tmp_match_array[$i])==$match[$j]['href']){
                   
$found=true;
                }
            }
            if(!
$found && $this->get_tag_param("href",$tmp_match_array[$i])!=""){
               
$match[$k]['href']=$this->get_tag_param("href",$tmp_match_array[$i]);
               
$match[$k]['target']=$this->get_tag_param("target",$tmp_match_array[$i]);
               
$k++;
            }
        }
       
        return
$match;
    }
   
   
/**
    * Returns all strings which are formated like the given parameter
    * @param boolean $bold if string have to be formatted bold choose true
    * @param boolean $italic if string have to be formatted italic choose true
    * @param boolean $underlined if string have to be formatted underlined choose true
    * @return array $strings the strings which have been found in an array
    * @desc Returns all strings in an array which are formated like the given parameter
    */
   
function get_strings_formated($bold,$italic,$underlined){
       
$i=0;
        if(
$bold){
           
$tags[$i]['open']="<b>";
           
$tags[$i]['close']="</b>";
           
$i++;
        }
        if(
$italic){
           
$tags[$i]['open']="<i>";
           
$tags[$i]['close']="</i>";
           
$i++;
        }
        if(
$underlined){
           
$tags[$i]['open']="<u>";
           
$tags[$i]['close']="</u>";
           
$i++;
        }
       
       
$strings=$this->get_strings_in_tags($tags,$this->string);
       
        return
$strings;
    }
   
   
/**
    * Returns all strings in $string which are given to the parameter $tags
    * @param array $tags the tags in an array ($tags[$i]['open'] and $tags[$i]['close'])
    * @param string $string the HTML string
    * @return array $strings the strings which have been found in an array
    * @desc Returns all strings in $string which are given to the parameter $tags
    */
   
function get_strings_in_tags($tags,$string){
        for(
$i=0;$i<count($tags);$i++){
           
$k=0;
           
$pattern="|".$tags[$i]['open']."(.*)".$tags[$i]['close']."|U";
           
preg_match_all($pattern,$string,$matches, PREG_PATTERN_ORDER);
           
           
// Getting rest of all Tags
           
for($j=0;$j<count($tags);$j++){
                if(
$tags[$j]['open']!=$tags[$i]['open'] && $tags[$j]['close']!=$tags[$i]['close']){
                   
$new_tags[$k]=$tags[$j];
                   
$k++;
                }
            }
           
// Getting Strings from all matches
           
for($j=0;$j<count($matches[1]);$j++){
               
$new_string=$matches[1][$j];
            }
           
            if(
count($tags)==1){
                for(
$j=0;$j<count($matches[1]);$j++){
                   
$end_matches[$j]=strip_tags($matches[1][$j]);
                }
                return
$end_matches;
            }else{
                for(
$j=0;$j<count($matches[1]);$j++){
                   
$new_string=$matches[1][$j];
                   
$end_matches=array_merge($this->get_strings_in_tags($new_tags,$new_string),$end_matches);
                }
            }
        }
        return
$end_matches;
    }
   
   
/**
    * Returns all strings in $string which are between the start and end tag
    * @param string $start_tag the starting tag
    * @param string $end_tag the end tag
    * @param string $string the string to search for
    * @return array $strings the strings which have been found pusched in an array
    * @desc Returns all strings in $string which are between the start and end tag
    */
   
function get_strings_in_tag($start_tag,$end_tag,$string){
       
$pattern="|".$start_tag."(.*)".$end_tag."|U";
       
preg_match_all($pattern,$string,$matches, PREG_PATTERN_ORDER);
        for(
$j=0;$j<count($matches[1]);$j++){
           
$array[$j]=$matches[1][$j];
        }
        return
$array;
    }
   
   
/**
    * Returns all strings which are headed (<h1> ... </h1> etc)
    * @param int $from_headnumber
    * @param int $till_headnumber
    * @return array $strings the strings which have been found pusched in an array
    * @desc Returns all strings which are headed (<h1> ... </h1> etc)
    */
   
function get_strings_headed($from_headnumber,$till_headnumber){
       
$count_headers=$till_headnumber-$from_headnumber;
       
$result_arr=array();
       
        for(
$i=$from_headnumber;$i<=$till_headnumber;$i++){
           
$results=$this->get_strings_in_tag("<h$i>","</h$i>",$this->string);
             if(
$results!=""){
               
$result_arr=array_merge($result_arr,$results);
            }
        }
        return
$result_arr;
    }

   
/**
    * Returns the content of the body
    * @return string $bodytext The content of the body
    * @desc Returns the content of the body
    */
   
function get_body(){
       
// Getting body parametres
       
$pattern="|<body(.*)>|U";
       
preg_match_all($pattern,$string,$matches, PREG_PATTERN_ORDER);
               
       
// Deleting body parameters
       
$string=str_replace($matches[1][0],"",$string);
        echo
"<xmp>".$string."</xmp>";
       
$pattern="|<body>(.*)</body>|U";
       
       
// Getting text in body
       
$matches="";
       
preg_match_all($pattern,$string,$matches, PREG_SET_ORDER);
       
$string=$matches;

        for(
$i=0;$i<count($string);$i++){
            for(
$j=0;$j<count($string[$i]);$j++){
                echo
"\$string[$i][$j]".$string[$i][$j]."<br>";
            }
        }
    }
   
   
/**
    * Returns the content of the body without tags
    * @return string $bodytext the content of the body without tags
    * @desc Returns the content of the body without tags
    */
   
function get_body_text(){
       
$string=$this->string;

       
$string=strip_tags($string);
       
$string=str_replace("\n","",$string);
       
$string=str_replace("\r","",$string);
       
$string=str_replace("\t","",$string);
       
$string=str_replace("<!--","",$string);
       
$string=str_replace("//-->","",$string);
       
$string=str_replace("&nbsp;","",$string);
       
        return
$string;
    }

   
/**
    * Returns the frame urls
    * @return array $frame_urls the urls of the frame in an array
    * @desc Returns the frame urls
    */
   
function get_frame_urls(){
    }
   
    function
get_tag_param($param,$tag){
       
preg_match_all("|$param\=\"(.*)\"|U",$tag,$matches, PREG_PATTERN_ORDER);
        if(
$matches[1][0]==""){
           
preg_match_all("|$param\=(.*)|U",$tag,$matches, PREG_PATTERN_ORDER);
        }
        if(
$matches[1][0]==""){
           
preg_match_all("|$param\=\'(.*)\'|U",$tag,$matches, PREG_PATTERN_ORDER);
        }
        return
$matches[1][0];
    }
}
?>