PHP Classes
Icontem

File: class/class.openoffice23htmlparser.php


  Search   All class groups All class groups   Latest entries Latest entries   Top 10 charts Top 10 charts   Newsletter Newsletter   Blog Blog   Forums Forums   Help FAQ Help FAQ  
  Login   Register  
Recommend this page to a friend! ReTweet ReTweet Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Johan Barbier  >  oLoc  >  class/class.openoffice23htmlparser.php  
File: class/class.openoffice23htmlparser.php
Role: Class source
Content type: text/plain
Description: Soecific class for parsing HTML files from sCalc in OpenOffice
Class: oLoc
Retrieve and edit internationalized texts
 

Contents

Class file image Download
<?php
/**
Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
Copyright (C) 2007  Johan Barbier <johan.barbier@gmail.com>

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
/**
 * @desc Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
 * @author Johan Barbier <johan.barbier@gmail.com>
 * @version 20071101
 *
 */
class openoffice23htmlparser extends genparser {
    
    
/**
     * @desc Long stuff! First, checks if there is a need to transform the html file to be able to read its contents as an xml file. If so, does so :
     * Replaces the HTML header.
     * Strips all unclosed tags.
     * Replaces html entities by decimal entities.
     *
     * Then reads the file as an XML feed, and creates modules, languages, constants, translation found in the file.
     * 
     * @param string $sFile : file path
     */
    
final protected function parseUploadedTranslation($sFile) {
        if(!
file_exists($sFile)) {
            
throw new fileUploadExceptions(fileUploadExceptions::_UPLOAD_ERR_NO_FILE_);
        }
        
$sFileContents file_get_contents($sFile);
        if(
false !== strpos($sFileContents'<TBODY>')) {
            
$sFileContents preg_replace("/(<\/?)(\w+)([^>]*>)/e""'\\1'.strtolower('\\2').'\\3'"$sFileContents);
            
$sFileContents substr_replace($sFileContents,'<?xml version="1.0" encoding="'.$this->sEncoding.'"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><body><table>'0strpos($sFileContents'<tbody>'));
            
$sFileContents preg_replace('`(<\w+)\s([^>]*)(>)`''$1$3'$sFileContents);
            
$sFileContents str_replace(array('<br>''<font>''</font>'), ''$sFileContents);
            
$sFileContents str_replace(array('<td>''</td>'), array('<td><![CDATA['']]></td>'), $sFileContents);
            
file_put_contents($sFile$sFileContents);
        }
        
        
$oXml = new DOMDocument();
        
$oXml->load($sFile);
        
$oNodeList $oXml->getElementsByTagName('tr');
        
$oLanguagesList $oNodeList->item(0)->getElementsByTagName('td');
        
$iCpt 0;
        foreach(
$oLanguagesList as $oTd) {
            
$aStored[$iCpt]['LNG'] = (string)preg_replace('`\s`''',$oTd->nodeValue);
            
$iCpt ++;
        }
        for(
$i 1$i $oNodeList->length$i++) {
            
$oRowList $oNodeList->item($i)->getElementsByTagName('td');
            for(
$j 1$j $oRowList->length$j++) {
                
$aStored[$j]['MOD'][(string)preg_replace('`\s`''',$oRowList->item(0)->nodeValue)]= (string)preg_replace('`\s$`''',$oRowList->item($j)->nodeValue);
            }
        }
        foreach(
$aStored as $iK=>$aV) {
            if(!empty(
$aV['LNG'])) {
                
$aV['LNG'] = trim(html_entity_decode(preg_replace('`\s`'''$aV['LNG'])));
                
$aPays getCodes($aV['LNG']);
                if(!empty(
$aPays['ALPHA3'])) {
                    
$aV['LNG'] = $aPays['ALPHA3'];
                }
                if(!
is_dir($this->subject->LOCALE_PATH.$aV['LNG'])) {
                    
mkdir($this->subject->LOCALE_PATH.$aV['LNG'], 0755);
                }
                foreach(
$aV['MOD'] as $sConst => $sVal) {
                    if(!empty(
$sConst)) {
                        
$sConst trim(preg_replace('`\s`'''$sConst));
                        
$sVal trim($sVal);
                        
$sMod substr($sConst0strpos($sConst'_'));
                        if(!
file_exists($this->subject->LOCALE_PATH.'default/'.$sMod.'.xml')) {
                            
$this->subject->addModule($sMod$this->sEncoding);
                        } 
                        
$this->subject->addNewConstant($sConst$sMod);
                        
$this->subject->updateValue($sConst$sValfalse$aV['LNG'], $sMod);
                    }
                }
            }
        }
    }
}
?>

 
  Advertise on this site Advertise on this site   Site map Site map   Statistics Statistics   Site tips Site tips   Privacy policy Privacy policy   Contact Contact  

For more information send a message to :
info at phpclasses dot org.
Copyright (c) Icontem 1999-2009 PHP Classes - PHP Class Scripts
  PHP Book Reviews - Reviews of books and other products