Login   Register  
PHP Classes
elePHPant
Icontem

File: xmllib.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of troels knak-nielsen  >  xmllib  >  xmllib.php  >  Download  
File: xmllib.php
Role: Class source
Content type: text/plain
Description: main and only class
Class: xmllib
Parse XML documents without external libraries
Author: By
Last change: Patched bug with short-tags
Date: 7 years ago
Size: 33,761 bytes
 

Contents

Class file image Download
<?php
  /**
   * A script that reads xml documents without the need of an external library
   * Useful for reading configuration data or content data, that would otherwise
   * have been placed in a db
   *
   * known bugs/limitations :
   *        + doesn't cope well with erraneous files - so be sure to validate
   *        + a lot of the special tags like doctype etc. aren't handled well
   *        + external doctype definitions are accepted though (but ignored)
   *        + methods and behaviour doesn't follow the W3C DOM specification correctly, although mostly.
   * usage :
   * <code>
   *        // open and read file
   *        $xml =& new XmlLib_xmlParser($filename);
   *        // parse document and return rootnode
   *        $doc =& $xml->getDocument();
   * </code>
   *
   * disclaimer :
   *        this piece of code is freely usable by anyone. if it makes your life better,
   *        remember me in your eveningprayer. if it makes your life worse, try doing it any
   *        better yourself.
   *
   * @version 23. nov. 07
   * @author troels@kyberfabrikken.dk
   * @download http://www.phpclasses.org/
   * @package xmllib
   *
   */

if (function_exists('mb_internal_encoding')) {
  define('_XMLLIB_INTERNAL_ENCODING', mb_internal_encoding());
 } else {
  define('_XMLLIB_INTERNAL_ENCODING', 'UTF-8');
 }
define('_XMLLIB_IS_UTF8', _XMLLIB_INTERNAL_ENCODING == 'UTF-8');

/**
 * A generic implementation of a DOM node.
 * Somehow follows the specs at w3c
 * @see http://www.w3.org/TR/1998/REC-DOM-Level-1-19981001/
 * @package xmllib
 */
class XmlLib_Node
{
  /**
   * Holds the attributes of the node
   * @var array
   * @access public
   */
  var $attributes;

  /**
   * Holds the children of the node
   * @var array
   * @access private
   */
  var $children;

  /**
   * The nodeName
   * @var string
   * @access private
   */
  var $nodeName;

  /**
   * The namespace of the node (defaults to empty)
   * @var string
   * @access private
   */
  var $namespace;

  /**
   * The nodeType
   * @var int
   * @access private
   */
  var $nodeType;

  /**
   * Reference to parent node
   * @var reference
   * @access private
   */
  var $parent;

  /**
   * Internally used by the parser
   * @var int
   * @access private
   */
  var $_end;

  /**
   * Constructor
   * @param string $nodeName The name of the node to construct
   * @param int $nodeType The type of the node to construct.
   * @see createChild
   */
  function XmlLib_Node($nodeName='node', $nodeType=1) {
    $ns = $this->_translateNS($nodeName);
    $this->nodeName = $ns['name'];
    $this->namespace = $ns['xmlns'];
    $this->children = array();
    $this->attributes = array();
    $this->parent = null;
    $this->nodeType = $nodeType; // 1=element
  }

  /**
   * Constructs a child node, and returns it.
   * @param string $nodeName The name of the node to construct
   * @param int $nodeType The type of the node to construct.
   * @return XmlLib_Node
   */
  function & createChild($nodeName=null, $nodeType=1) {
    // create new instance of this class
    $classname = get_class($this);
    if ($nodeName == null) {
      $n =& new $classname();
    } else {
      $n =& new $classname($nodeName, $nodeType);
    }
    $this->appendChild($n);
    return $n;
  }

  /**
   * Constructs a node, and returns it.
   * The created node has no parent.
   * @param string $tagName The name of the node to construct
   * @return XmlLib_Node
   */
  function & createElement($tagName) {
    return new XmlLib_Node($tagName);
  }

  /**
   * Creates a Text node given the specified string.
   * The created node has no parent.
   * @param string $data The data for the node.
   * @return XmlLib_Node
   */
  function & createTextNode($data="") {
    $n =& new XmlLib_Node('#text', 3);
    $n->data = (_XMLLIB_IS_UTF8) ? utf8_decode($data) : $data;
    return $n;
  }

  /**
   * Creates a CDATASection node whose value is the specified string.
   * The created node has no parent.
   * @param string $data The data for the CDATASection contents.
   * @return XmlLib_Node
   */
  function & createCDATASection($data="") {
    $n =& new XmlLib_Node('#cdata-section', 4);
    $n->data = (_XMLLIB_IS_UTF8) ? utf8_decode($data) : $data;
    return $n;
  }

  /**
   * Creates a Comment node given the specified string.
   * The created node has no parent.
   * @param string $data The data for the node.
   * @return XmlLib_Node
   */
  function & createComment($data="") {
    $n =& new XmlLib_Node('#comment', 8);
    $n->data = (_XMLLIB_IS_UTF8) ? utf8_decode($data) : $data;
    return $n;
  }

  /**
   * @param XmlLib_Node $orphan The node to adopt
   * @deprecated
   * @see appendChild
   * @return XmlLib_Node
   */
  function & adopt(&$orphan) {
    return $this->appendChild($orphan);
  }

  /**
   * Assigns a node as child to current node
   * @param XmlLib_Node $orphan The node to adopt
   * @return XmlLib_Node
   */
  function & appendChild(&$orphan) {
    $orphan->parent =& $this;
    $this->children[] =& $orphan;
    $this->children[count($this->children)-1];
    return $orphan;
  }

  /**
   * Removes the child node indicated by oldChild from the list of children, and returns it.
   * @param XmlLib_Node $orphan The node to adopt
   * @returns XmlLib_Node
   */
  function & removeChild(&$oldChild) {
    $_children = array();
    $notFound = true;
    for ($i=0;$i<count($this->children);$i++) {
      if ($oldChild !== $this->children[$i]) {
        $_children[] =& $this->children[$i];
        $notFound = false;
      }
    }
    if ($notFound) {
      trigger_error('oldChild is not a child of this node', E_USER_NOTICE);
      return null;
    }
    $this->children = $_children;
    $oldChild->parent = null;
    return $oldChild;
  }

  /**
   * Replaces the child node oldChild with newChild in the list of children, and returns
   * the oldChild node. If the newChild is already in the tree, it is first removed.
   * @param XmlLib_Node $newChild The new node to put in the child list.
   * @param XmlLib_Node $oldChild The node being replaced in the list.
   * @returns XmlLib_Node The node replaced.
   */
  function & replaceChild(&$newChild, &$oldChild) {
    $notFound = true;
    for ($i=0;$i<count($this->children);$i++) {
      if ($oldChild !== $this->children[$i]) {
        $this->children[$i] = null;
        $this->children[$i] =& $newChild;
        $newChild->parent =& $this;
        $notFound = false;
      }
    }
    if ($notFound) {
      trigger_error('oldChild is not a child of this node', E_USER_NOTICE);
      return null;
    }
    $oldChild->parent = null;
    return $oldChild;
  }

  /**
   * Fetch the last childnode, or null
   * @param string $nodeName If supplied, only nodes of that nodeName will be returned
   * @return XmlLib_Node
   */
  function & lastChild($nodeName=null, $nodeType=null) {
    if (count($this->children)==0)
      return null;
    if ($nodeName == null)
      return $this->children[count($this->children)-1];
    for ($i=count($this->children);$i>=0;$i--) {
      $c =& $this->children[$i];
      if ((($nodeName == null) || ($c->nodeNameNS() == $nodeName)) && (($nodeType == null) || ($c->nodeType() == $nodeType)))
        return $c;
    }
    return null;
  }

  /**
   * Fetch the first childnode, or null
   * @param string $nodeName If supplied, only nodes of that nodeName will be returned
   * @return XmlLib_Node
   */
  function & firstChild($nodeName=null, $nodeType=null) {
    if (count($this->children)==0)
      return null;
    if ($nodeName == null)
      return $this->children[0];
    for ($i=0;$i<count($this->children);$i++) {
      $c =& $this->children[$i];
      if ((($nodeName == null) || ($c->nodeNameNS() == $nodeName)) && (($nodeType == null) || ($c->nodeType() == $nodeType)))
        return $c;
    }
    return null;
  }

  /**
   * This is a convenience method to allow easy determination of whether a node has any children.
   * @see hasChildNodes
   * @return bool
   */
  function hasChildren() {
    return (count($this->children)>0);
  }

  /**
   * This is a convenience method to allow easy determination of whether a node has any children.
   * @see hasChildren
   * @return bool
   */
  function hasChildNodes() {
    return $this->hasChildren();
  }

  /**
   * Fetch array of childnodes.
   * @param string $nodeName If supplied, only nodes of that nodeName will be returned
   * @param string $nodeType If supplied, only nodes of that nodeType will be returned
   * @see childNodes
   * @return array
   */
  function children($nodeName=null, $nodeType=null) {
    $ret = array();
    for ($i=0;$i<count($this->children);$i++) {
      $c =& $this->children[$i];
      if ((($nodeName == null) || ($c->nodeNameNS() == $nodeName)) && (($nodeType == null) || ($c->nodeType() == $nodeType)))
        $ret[count($ret)] =& $c;
    }
    return $ret;
  }

  /**
   * Alias for children()
   * @see children
   */
  function childNodes($nodeName=null, $nodeType=null) {
    return $this->children($nodeName, $nodeType);
  }

  /**
   * Iterates through the nodes children, looking for the first node with specified id.
   * @return XmlLib_Node
   */
  function & getElementById($id) {
    if (isset($this->attributes['id']) && $this->attributes['id'] == $id)
      return $this;
    for ($i=0;$i<count($this->children);$i++) {
      $node =& $this->children[$i]->getElementById($id);
      if (is_object($node))
        return $node;
    }
    return null;
  }

  /**
   * Finds the greatest used id + 1 (next free id)
   * @return int
   */
  function getCardinality() {
    $cardinal = 0;
    if (isset($this->attributes['id']))
      $cardinal = $this->attributes['id'] + 1;
    for ($i=0;$i<count($this->children);$i++) {
      $tmp = $this->children[$i]->getCardinality();
      if ($tmp > $cardinal)
        $cardinal = $tmp;
    }
    return $cardinal;
  }

  /**
   * @returns array
   */
  function getElementsByNamespace($namespace) {
    $resultSet = Array();
    for ($i=0,$l=count($this->children);$i<$l;++$i) {
      if ($this->children[$i]->namespace() == $namespace) {
        $resultSet[] =& $this->children[$i];
      }
      if ($this->children[$i]->hasChildren()) {
        $resultSet = array_merge($resultSet, $this->children[$i]->getElementsByNamespace($namespace));
      }
    }
    return $resultSet;
  }

  /**
   * If the returned value is null, the node is the topmost (root)
   * @deprecated
   * @see parentNode
   * @return XmlLib_Node
   */
  function & parent() {
    return $this->parentNode();
  }

  /**
   * If the returned value is null, the node is the topmost (root)
   * @return XmlLib_Node
   */
  function & parentNode() {
    return $this->parent;
  }

  /**
   * Retrieves the name of the node
   * @note In older versions of xmllib, nodeName was (wrongly) called type.
   * @see nodeNameNS()
   * @return string
   */
  function nodeName() {
    return $this->nodeName;
  }

  /**
   * Retrieves the name of the node with namespace if any
   * @see nodeName()
   * @return string
   */
  function nodeNameNS() {
    if ($this->namespace == "") {
      return $this->nodeName;
    }
    return $this->namespace . ':' . $this->nodeName;

  }

  /**
   * Retrieves the type of the node
   * @note In older versions of xmllib, nodeName was called type.
   * @return int
   */
  function nodeType() {
    return $this->nodeType;
  }

  /**
   * Retrieves the namespace of the node
   * @return string
   */
  function namespace() {
    return $this->namespace;
  }

  /**
   * Retrieves the named attribute.
   * @note It's legal to accees $this->attributes directly, but this presents a more clean way of accomplishing it.
   * @param string $name Name of the attribute to retrieve.
   * @return string
   */
  function getAttribute($name) {
    return (isset($this->attributes[$name])) ? $this->attributes[$name] : null;
  }

  /**
   * Sets the value of the named attribute.
   * @param string $name Name of the attribute to retrieve.
   * @param string $value Value to assign to the node. Only scalar values are allowed.
   * @return bool
   */
  function setAttribute($name, $value) {
    if (!is_scalar($value)) {
      trigger_error("Only scalar values are allowed as attribute values.", E_USER_WARNING);
      return false;
    }
    $this->attributes[$name] = $value;
    return true;
  }

  /**
   * Removes a named attribute
   * @param string $name Name of the attribute to retrieve.
   */
  function removeAttribute($name) {
    $_attributes = array();
    for ($i=0, $keys=array_keys($this->attributes); $i < count($keys); $i++) {
      if ($keys[$i] != $name)
        $_attributes[] = $this->attributes[$keys[$i]];
    }
    $this->attributes =& $_attributes;
  }

  /**
   * Fetch the value of this node, depending on its type.
   * @return string
   */
  function nodeValue() {
    if (array_key_exists('value', $this->attributes)) {
      return $this->attributes['value'];
    }
    if (count($this->children(null, 4)) == 1) {
      $n = $this->firstChild(null, 4);
      return $n->data;
    }
    if (count($this->children(null, 3)) == 1) {
      $n = $this->firstChild(null, 3);
      return $n->data;
    }
    if (count($this->children(null, 8)) == 1) {
      $n = $this->firstChild(null, 8);
      return $n->data;
    }
    return null;
  }

  /**
   */
  function setNodeValue($value) {
    if (array_key_exists('value', $this->attributes)) {
      $this->attributes['value'] = (_XMLLIB_IS_UTF8) ? utf8_decode($value) : $value;
      return true;
    }
    if (count($this->children(null, 4)) == 1) {
      $n =& $this->firstChild(null, 4);
      $n->data = (_XMLLIB_IS_UTF8) ? utf8_decode($value) : $value;
      return true;
    }
    if (count($this->children(null, 3)) == 1) {
      $n =& $this->firstChild(null, 3);
      $n->data = (_XMLLIB_IS_UTF8) ? utf8_decode($value) : $value;
      return true;
    }
    if (count($this->children(null, 8)) == 1) {
      $n =& $this->firstChild(null, 8);
      $n->data = (_XMLLIB_IS_UTF8) ? utf8_decode($value) : $value;
      return true;
    }
    return false;
  }

  /**
   * Escape html-entities for flash
   * For some reason flash doesn't seem to recognize the htmlentities
   * for singlequote, lesser than and greater than
   * @param string $str String to escape
   * @return string
   */
  function flash_escape($str) {
    $translation_table = array();
    $translation_table['&'] = '&amp;';
    $translation_table['"'] = '&quot;';
    $translation_table["'"] = '&#039;';
    $translation_table['<'] = '&#060;';
    $translation_table['>'] = '&#062;';
    return strtr($str, $translation_table);
  }

  /**
   * Escapes string with different protocols
   * Allowed values for translation param are :
   *     + htmlentities
   *     + htmlspecialchars
   *     + addslashes
   *     + flash_escape
   * @param string $str String to escape
   * @param string $translation The mode to translate the string by
   * @return string
   */
  function escape($str, $translation) {
    switch ($translation) {
    case 'flash_escape' : return XmlLib_Node::flash_escape($str);
    case 'addslashes' : return addslashes($str);
    case 'htmlspecialchars' : return htmlspecialchars($str);
    case 'htmlentities' : return htmlentities($str);
    }
    trigger_error("unknown translation : ".$translation, E_USER_WARNING);
    return $str;
  }

  /**
   * decodes c-style slashes and html-entities (unicode ones too)
   * @param string $str Encoded string
   * @return string
   */
  function unescape($str) {
    $translation_table = array_flip(get_html_translation_table(HTML_ENTITIES));
    $ret = strtr(stripslashes($str), $translation_table);
    return preg_replace('/&#(\d+);/me', "chr('\\1')",$ret);
  }

  /**
   * Recursively transforms node into string
   * @see escape
   * @param bool $add_formatting If true, blocks get indented
   * @param string $att_escape Translation to use.
   * @param string $str String to escape
   * @return string
   */
  function toString($add_formatting=false, $att_escape='addslashes') {
    $indent = "";
    $toadd = "";
    $indent_minus_one = "";
    if ($add_formatting) {
      $toadd = "\n";
      for ($p = $this->parent(); $p != null; $p = $p->parent()) {
        if ($indent != "")
          $indent_minus_one .= "\t";
        $indent .= "\t";
      }
    }

    if ($this->nodeType() == 3) {
      return XmlLib_Node::escape($this->data, $att_escape);
    } else if ($this->nodeType() == 4) {
      if (strpos($this->data, '<![CDATA[') !== false) {
        trigger_error("illegal content. cdata-section can't contain the string : <![CDATA[", E_USER_WARNING);
      }
      if (strpos($this->data, ']]>') !== false) {
        trigger_error("illegal content. cdata-section can't contain the string : ]]>", E_USER_WARNING);
      }
      return $toadd.$indent."<![CDATA[".$this->data."]]>".$toadd.$indent_minus_one;
    } else if ($this->nodeType() == 8) {
      return $toadd.$indent."<!--".$this->data."-->".$toadd.$indent_minus_one;
    } else if ($this->nodeType() != 1) {
      trigger_error("unsupported nodetype : ".$this->nodeType(), E_USER_WARNING);
    }

    $a = "";
    foreach ($this->attributes as $name => $value) {
      $value = XmlLib_Node::escape($value, $att_escape);
      $a .= " ".$name."=\"".$value."\"";
    }
    if ((count($this->children) == 0)) {
      return $indent."<".$this->nodeNameNS().$a." />";
    }
    $ret = $indent."<".$this->nodeNameNS().$a.">";
    if ((count($this->children(null,1)) == 0)) {
      // only text/cdata nodes contained
      foreach ($this->children as $child) {
        $ret .= $child->toString($add_formatting, $att_escape);
      }
      $ret .= "</".$this->nodeNameNS().">";
    } else {
      $ret .= $toadd;
      foreach ($this->children as $child) {
        $ret .= $child->toString($add_formatting, $att_escape);
        $ret .= $toadd;
      }
      $ret .= $indent."</".$this->nodeNameNS().">";
    }
    return $ret;
  }

  /**
   * Outputs document to browser, complete with header
   * Param encoding works with :
   *     + UTF-8
   *     + ISO-8859-1
   * but may work with other encodings too.
   * @param string  $encoding    The document encoding.
   * @param boolean $sendHeaders If TRUE, the correct Content-Type header will be send. (default)
   */
  function dump($encoding='UTF-8', $sendHeaders=TRUE) {
    if ($sendHeaders) {
      header("Content-Type: application/xml");
    }
    echo "<?xml version=\"1.0\" encoding=\"".$encoding."\" ?>\n";
    if ($encoding == 'UTF-8') {
      echo utf8_encode($this->toString(true, 'flash_escape'));
    } else {
      echo $this->toString(true, 'flash_escape');
    }
  }

  /**
   * Writes document to file
   * @param string $filename The filename to write to
   * @param string $encoding The document encoding.
   */
  function toFile($filename, $encoding='UTF-8') {
    $content = "<?xml version=\"1.0\" encoding=\"".$encoding."\" ?>\n";
    if ($encoding == 'UTF-8') {
      $content .= utf8_encode($this->toString(true, 'flash_escape'));
    } else {
      $content .= $this->toString(true, 'flash_escape');
    }
    $fp = fopen($filename,'wb');
    fwrite($fp, $content);
    fclose($fp);
  }

  /**
   * Makes a html decorated version of the document. Usefull for debugging.
   * @see toString
   * @return string Document as html-formatted string
   */
  function toHtml() {
    return nl2br(str_replace('&lt;', '<strong>&lt;', str_replace('&gt;', '&gt;</strong>', str_replace("\t",'&nbsp;&nbsp;&nbsp;&nbsp;',htmlentities($this->toString(true, 'htmlentities'))))));
  }

  /**
   * Translates the document into an associative array.
   * @param XmlLib_Node $node From where to start the recursion. You shouldn't use this.
   * @return array
   */
  function toArray($node=null) {
    if (is_null($node)) {
      $node = $this;
    }
    if ((isset($node->attributes['type']) && ($node->attributes['type'] == 'array') || $node->nodeName() == 'array')) {
      $a = array();
      $c =& $node->children();
      for ($i=0; $i < count($c); $i++) {
        if (in_array($c[$i]->nodeType(), array(1,3,4))) {
          array_push($a, XmlLib_Node::toArray($c[$i]));
        }
      }
      return $a;
    }

    if (!$node->hasChildren()) {
      if (count($node->attributes) > 0) {
        return $node->attributes;
      }
      return $node->nodeValue();
    }
    $a = array();
    $c =& $node->children();
    for ($i=0; $i < count($c); $i++) {
      if (in_array($c[$i]->nodeType(), array(1,3,4))) {
        $v = $c[$i]->nodeValue();
        if (!is_null($v)) {
          $a[$c[$i]->nodeName()] = $v;
        } else {
          $a[$c[$i]->nodeName()] = XmlLib_Node::toArray($c[$i]);
        }
      }
    }
    return $a;
  }

  /**
   * @access private
   * @return array
   */
  function _translateNS($nodeName) {
    if (strpos($nodeName, ':') === false) {
      return array(
        'name' => $nodeName,
        'xmlns' => ''
      );
    }
    $split = split(':', $nodeName);
    return array(
      'name' => $split[1],
      'xmlns' => $split[0]
    );
  }
}

/**
 * The xml-parser. Used to parse a document into a tree of nodes.
 * @access public
 * @package xmllib
 */
class XmlLib_xmlParser
{
  /**
   * @access private
   */
  var $parseStack;

  /**
   * @access private
   */
  var $data;

  /**
   * @access private
   */
  var $encoding;

  /**
   * @access private
   */
  var $casefolding;

  /**
   * Constructor
   * @param string $filename If a filename is given, the document will be loaded and prepared
   * @param boolean $casefolding If true, uppercase nodenames and attributenames are converted to lowercase
   */
  function XmlLib_xmlParser($filename=null, $casefolding=false) {
    $this->casefolding = $casefolding;
    if (is_string($filename)) {
      $this->loadFromFile($filename);
    }
  }

  /**
   * Loads and prepares a file
   * @param string $filename name of the file
   */
  function loadFromFile($filename) {
    if (!function_exists('file_get_contents')) {
      $fp = fopen($filename,'rb');
      $size = filesize($filename);
      $contents = fread($fp,$size);
      fclose($fp);
    } else {
      $contents = file_get_contents($filename);
    }
    $this->loadFromString($contents);
  }

  /**
   * Loads a xml-document and prepares the parsing
   * @param string $contents the document
   */
  function loadFromString($contents) {
    $this->parseStack = array();

    // find xml opening tag
    $pos1 = strpos($contents, "<?xml");
    if ($pos1 === false) {
      trigger_error ("not xml", E_USER_ERROR);
      return null;
    }
    // find xml opening tag terminator
    $pos2 = strpos($contents, "?>");
    if ($pos2 === false) {
      trigger_error ("not xml", E_USER_ERROR);
      return null;
    }
    // find the encoding
    $xml_att = substr($contents, $pos1, $pos2);
    $pos3 = strpos($xml_att, "encoding=\"");
    if ($pos3 === false) {
      $this->encoding = "UTF-8";
    } else {
      $xml_att_enc = substr($xml_att, $pos3+10, $pos2);
      $pos4 = strpos($xml_att_enc, "\"");
      if ($pos3 === false) {
      } else {
        $this->encoding = strtoupper(substr($xml_att_enc, 0, $pos4));
      }
    }

    //    if ($this->encoding == "UTF-8") {
    //      $contents = utf8_decode($contents);
    //      $pos2 = strpos($contents, "?".">");
    //    }

    // strip xml opening tag
    // this is done after utf8_decode, because the stringlength will change
    $this->data = substr($contents, $pos2+2);


    // find doctype tag if any
    $pos = strpos($this->data, "<!DOCTYPE");
    if ($pos !== false) {
      // find doctype tag terminator
      $pos = strpos($this->data, ">");
      if ($pos === false) {
        trigger_error ("xml parse error", E_USER_ERROR);
        return null;
      }
      $this->data = substr($this->data, $pos+1);
    }
    // patch for some unidentified bug in the main parser.
    // this is bound to slow the parsing down considerably, so it would be better to locate the actual error and fix that
    // replace short tag by long tags
    // i.e <tag attr1="klslk"/> will become
    // <tag attr1="klslk"></tag>
    // sometimes there is a bug of the parser
    // for short tags
    $res = $this->data;
    while (($i = strpos($res, '/>')) !== false) {
      $start = substr($res, 0, $i);
      $end = substr($res, $i + 2);
      $start_tag = strrpos($start, '<');
      $tag = substr($start, $start_tag);
      $start_space = strpos($tag, ' ');
      if ($start_space === false) {
        $start_space = strlen($tag);
      }
      $tag_name = substr($tag,1, $start_space-1);
      $res = $start . '></'.$tag_name.'>'.$end;
    }
    $this->data = $res;
  }

  /**
   * Parses the loaded document
   * @returns XmlLib_Node The rootnode of the document
   */
  function getDocument() {
    $tags = $this->parseTags();
    return $this->parseTree($tags);
  }

  /**
   * @access private
   */
  function stackPush($nodeName, $pos,  $a_begin) {
    $nodeName = strtolower($nodeName);
    if (!isset($this->parseStack[$nodeName]) || ($this->parseStack[$nodeName] == null))
      $this->parseStack[$nodeName] = array();
    $t = new XmlLib_Tag($nodeName);
    $t->begin = $pos;
    $t->a_begin = $a_begin;
    array_push($this->parseStack[$nodeName], $t);
  }

  /**
   * @access private
   */
  function stackPop($nodeName, $pos) {
    $nodeName = strtolower($nodeName);
    if (($this->parseStack[$nodeName] == null) || (count($this->parseStack[$nodeName])==0)) {
      trigger_error("xml_malformed unmatched closing tag [tag=".$nodeName." pos=".$pos."]", E_USER_ERROR);
      return null;
    }

    $t = array_pop($this->parseStack[$nodeName]);
    $t->end = $pos;
    return $t;
  }

  /**
   * @access private
   */
  function cmp($a, $b) {
    if ($a->begin == $b->begin) {
      if ($a->end == $b->end) {
        return 0;
      }
      return ($a->end > $b->end) ? 1 : -1;
    }
    return ($a->begin > $b->begin) ? 1 : -1;
  }

  /**
   * eh .. a hack really ... but it works, so who cares
   * @access private
   */
  function addTextnode($textnode_begin, $textnode_end) {
    $t = new XmlLib_Tag('#text');
    $t->nodeType = 3;
    $t->begin = $textnode_begin;
    $t->end = $textnode_end;
    if ($this->encoding == 'UTF-8') {
      $t->data = XmlLib_Node::unescape(utf8_decode(substr($this->data, $textnode_begin, $textnode_end - $textnode_begin)));
    } else {
      $t->data = XmlLib_Node::unescape(substr($this->data, $textnode_begin, $textnode_end - $textnode_begin));
    }
    $t->begin;
    return $t;
  }

  /**
   * @access private
   */
  function parseTags() {
    $utf8 = $this->encoding == 'UTF-8';
    $valid = array(  'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','-','_',':',
                     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
    $parseTags = array();
    $len = strlen($this->data);
    $textnode_begin = -1;
    for ($i=0;$i<$len;$i++) {
      $n = $i+1;
      if (substr($this->data, $i, 9) == '<![CDATA[') {
        if ($textnode_begin > -1) {
          array_push($parseTags, $this->addTextnode($textnode_begin, $i));
          $textnode_begin = -1;
        }
        $t = new XmlLib_Tag('#cdata-section');
        $t->nodeType = 4;
        $t->begin = $i+9;
        for ($i=$t->begin;substr($this->data, $i, 3) != ']]>';$i++) ;
        $t->end = $i;
        $i=$i+2;
        if ($utf8) {
          $t->data = utf8_decode(substr($this->data, $t->begin, $t->end - $t->begin));
        } else {
          $t->data = substr($this->data, $t->begin, $t->end - $t->begin);
        }
        array_push($parseTags, $t);
      } else if (substr($this->data, $i, 4) == '<!--') {
        if ($textnode_begin > -1) {
          array_push($parseTags, $this->addTextnode($textnode_begin, $i));
          $textnode_begin = -1;
        }
        $t = new XmlLib_Tag('#comment');
        $t->nodeType = 8;
        $t->begin = $i+4;
        for ($i=$t->begin;substr($this->data, $i, 3) != '-->';$i++) ;
        $t->end = $i;
        $i=$i+2;
        if ($utf8) {
          $t->data = utf8_decode(substr($this->data, $t->begin, $t->end - $t->begin));
        } else {
          $t->data = substr($this->data, $t->begin, $t->end - $t->begin);
        }
        array_push($parseTags, $t);
      } else if (($this->data{$i} == '<') && ($n<$len) && ($this->data{$n} == '/')) {
        if ($textnode_begin > -1) {
          array_push($parseTags, $this->addTextnode($textnode_begin, $i));
          $textnode_begin = -1;
        }
        // closing tag
        $n++;
        for ($c=$this->data{$n++},$nodeName=''; in_array($c, $valid); $c=$this->data{$n++}) $nodeName .= $c;
        array_push($parseTags, $this->stackPop($nodeName, $i-1));
        $i = $n-1;
      } else if ($this->data{$i} == '<') {
        if ($textnode_begin > -1) {
          array_push($parseTags, $this->addTextnode($textnode_begin, $i));
          $textnode_begin = -1;
        }
        // opening tag
        for ($c=$this->data{$n++},$nodeName=''; in_array($c, $valid); $c=$this->data{$n++}) $nodeName .= $c;
        for ($a=$n--;($this->data{$n}!='>') && !(($this->data{$n-1}=='/') && ($this->data{$n}=='>')); $n++) { ; }  // this part holds the attributes
        if ($this->casefolding) $nodeName = strtolower($nodeName);
        if ($this->data{$n-1} == '/') {
          // single-style closing
          $t = new XmlLib_Tag($nodeName);
          $t->begin = $n+1;
          $t->end = $n+1;
          $t->a_begin = $a;
          array_push($parseTags, $t);
        } else {
          $this->stackPush($nodeName, $n+1, $a);
        }
        $i = $n;
      } else if ($textnode_begin == -1 && (ord($this->data{$i}) > 32)) {
        $textnode_begin = $i;
      }
    }
    usort($parseTags, array('XmlLib_xmlParser', 'cmp'));
    return $parseTags;
  }

  /**
   * @access private
   */
  function loadAttributes(&$node, $data) {
    if ($node->nodeType != 1) {
      return ;
    }
    $valid = array(  'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','-','_',':',
                     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
    $len = strlen($data);
    $name = null;
    $value = null;
    for ($i=0;$i<$len;$i++) {
      if (is_null($value)) {  // reading name
        if ($data{$i} == '=') {  // end of name
          $value = '';
          $escapes = 0;
          $i++;  // skip opening quote
        } else if (in_array($data{$i}, $valid)){  // add to name
          if (is_null($name)) {
            $name = $data{$i};
          } else {
            $name .= $data{$i};
          }
        }
      } else {    // reading value
        if ($data{$i} == "\\")
          $escapes++;
        if ($data{$i} == "\"" && ($escapes == 0)) {  // closing quote
          if ($this->casefolding) $name = strtolower($name);
          $node->attributes[$name] = XmlLib_Node::unescape($value);
          $name = null;
          $value = null;
        } else {  // add to value
          $value .= $data{$i};
        }
        if ($data{$i-1} == "\\")
          $escapes--;
      }
    }
  }

  /**
   * @access private
   */
  function parseTree($tags) {
    for ($r=null, $i=0; $i<count($tags); $i++) {
      if (is_null($r)) {
        $r = $tags[$i]->toNode();
        $r->_end = strlen($this->data);
        if ($tags[$i]->a_begin != $tags[$i]->begin) {
          $this->loadAttributes($r, substr($this->data, $tags[$i]->a_begin, ($tags[$i]->begin - $tags[$i]->a_begin)-1));
        }
        $p =& $r;
      } else {
        $n = $tags[$i]->toNode();
        if ($tags[$i]->a_begin != $tags[$i]->begin) {
          $this->loadAttributes($n, substr($this->data, $tags[$i]->a_begin, ($tags[$i]->begin - $tags[$i]->a_begin)-1));
        }
        while ($p->_end < $tags[$i]->begin) {
          $p =& $p->parent;
        }
        $n->parent =& $p;
        array_push($p->children, $n);
        $p =& $p->lastChild();
      }
    }
    return $r;
  }
}

/**
 * Used by the parser, as an intermediate before becomming a node
 * @access private
 * @package xmllib
 */
class XmlLib_Tag
{
  var $nodeName;
  var $begin;
  var $end;
  var $a_begin;

  function XmlLib_Tag($nodeName) {
    $this->nodeName = $nodeName;
    $this->nodeType = 1;
    $this->data = null;
  }

  function & toNode() {
    $n =& new XmlLib_Node($this->nodeName);
    $n->nodeType = $this->nodeType;
    $n->_end = $this->end;
    $n->data = $this->data;
    return $n;
  }
}
?>