Login   Register  
PHP Classes
elePHPant
Icontem

File: class/class.openoffice23htmlparser.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Johan Barbier  >  oLoc  >  class/class.openoffice23htmlparser.php  >  Download  
File: class/class.openoffice23htmlparser.php
Role: Class source
Content type: text/plain
Description: Soecific class for parsing HTML files from sCalc in OpenOffice
Class: oLoc
Retrieve and edit internationalized texts
Author: By
Last change: Bugfixing
Date: 7 years ago
Size: 4,104 bytes
 

Contents

Class file image Download
<?php
/**
Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
Copyright (C) 2007  Johan Barbier <johan.barbier@gmail.com>

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
/**
 * @desc Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
 * @author Johan Barbier <johan.barbier@gmail.com>
 * @version 20071101
 *
 */
class openoffice23htmlparser extends genparser {
    
    
/**
     * @desc Long stuff! First, checks if there is a need to transform the html file to be able to read its contents as an xml file. If so, does so :
     * Replaces the HTML header.
     * Strips all unclosed tags.
     * Replaces html entities by decimal entities.
     *
     * Then reads the file as an XML feed, and creates modules, languages, constants, translation found in the file.
     * 
     * @param string $sFile : file path
     */
    
final protected function parseUploadedTranslation($sFile) {
        if(!
file_exists($sFile)) {
            throw new 
fileUploadExceptions(fileUploadExceptions::_UPLOAD_ERR_NO_FILE_);
        }
        
$sFileContents file_get_contents($sFile);
        if(
false !== strpos($sFileContents'<TBODY>')) {
            
$sFileContents preg_replace("/(<\/?)(\w+)([^>]*>)/e""'\\1'.strtolower('\\2').'\\3'"$sFileContents);
            
$sFileContents substr_replace($sFileContents,'<?xml version="1.0" encoding="'.$this->sEncoding.'"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><body><table>'0strpos($sFileContents'<tbody>'));
            
$sFileContents preg_replace('`(<\w+)\s([^>]*)(>)`''$1$3'$sFileContents);
            
$sFileContents str_replace(array('<br>''<font>''</font>'), ''$sFileContents);
            
$sFileContents str_replace(array('<td>''</td>'), array('<td><![CDATA['']]></td>'), $sFileContents);
            
file_put_contents($sFile$sFileContents);
        }
        
        
$oXml = new DOMDocument();
        
$oXml->load($sFile);
        
$oNodeList $oXml->getElementsByTagName('tr');
        
$oLanguagesList $oNodeList->item(0)->getElementsByTagName('td');
        
$iCpt 0;
        foreach(
$oLanguagesList as $oTd) {
            
$aStored[$iCpt]['LNG'] = (string)preg_replace('`\s`''',$oTd->nodeValue);
            
$iCpt ++;
        }
        for(
$i 1$i $oNodeList->length$i++) {
            
$oRowList $oNodeList->item($i)->getElementsByTagName('td');
            for(
$j 1$j $oRowList->length$j++) {
                
$aStored[$j]['MOD'][(string)preg_replace('`\s`''',$oRowList->item(0)->nodeValue)]= (string)preg_replace('`\s$`''',$oRowList->item($j)->nodeValue);
            }
        }
        foreach(
$aStored as $iK=>$aV) {
            if(!empty(
$aV['LNG'])) {
                
$aV['LNG'] = trim(html_entity_decode(preg_replace('`\s`'''$aV['LNG'])));
                
$aPays getCodes($aV['LNG']);
                if(!empty(
$aPays['ALPHA3'])) {
                    
$aV['LNG'] = $aPays['ALPHA3'];
                }
                if(!
is_dir($this->subject->LOCALE_PATH.$aV['LNG'])) {
                    
mkdir($this->subject->LOCALE_PATH.$aV['LNG'], 0755);
                }
                foreach(
$aV['MOD'] as $sConst => $sVal) {
                    if(!empty(
$sConst)) {
                        
$sConst trim(preg_replace('`\s`'''$sConst));
                        
$sVal trim($sVal);
                        
$sMod substr($sConst0strpos($sConst'_'));
                        if(!
file_exists($this->subject->LOCALE_PATH.'default/'.$sMod.'.xml')) {
                            
$this->subject->addModule($sMod$this->sEncoding);
                        } 
                        
$this->subject->addNewConstant($sConst$sMod);
                        
$this->subject->updateValue($sConst$sValfalse$aV['LNG'], $sMod);
                    }
                }
            }
        }
    }
}
?>