PHP Classes

File: Sql_Lexer.class.php

Recommend this page to a friend!
  Classes of Tom Schaefer   SQL Parse and Compile   Sql_Lexer.class.php   Download  
File: Sql_Lexer.class.php
Role: Class source
Content type: text/plain
Description: tokenizes a sql string
Class: SQL Parse and Compile
Parse and compose SQL queries programatically
Author: By
Last change: update
Date: 15 years ago
Size: 19,535 bytes
 

Contents

Class file image Download
<?php /** * PHP ctype compatibility functions. See the PHP ctype module for more * information on usage. * * @author John Millaway * @author Brent Cook * @author Thomas Schaefer * * Note: These functions expect an integer argument, like the C versions * To use with a PHP character, use ord($c). These functions do not support * string arguments like their PHP extension counterparts */ if (!extension_loaded('ctype')) { function ctype_alnum($c) { static $ctype__; return ($ctype__[$c] & 7); // (1 | 2 | 4) } function ctype_alpha($c) { static $ctype__; return ($ctype__[$c] & 3); // (1 | 2) } function ctype_cntrl($c) { static $ctype__; return ($ctype__[$c] & 40); } function ctype_digit($c) { static $ctype__; return ($ctype__[$c] & 4); } function ctype_graph($c) { static $ctype__; return ($ctype__[$c] & 27); // (20 | 1 | 2 | 4) } function ctype_lower($c) { static $ctype__; return ($ctype__[$c] & 2); } function ctype_print($c) { static $ctype__; return ($ctype__[$c] & 227); // (20 | 1 | 2 | 4 | 200) } function ctype_punct($c) { static $ctype__; return ($ctype__[$c] & 20); } function ctype_space($c) { static $ctype__; return ($ctype__[$c] & 10); } function ctype_upper($c) { static $ctype__; return ($ctype__[$c] & 1); } function ctype_xdigit($c) { static $ctype__; return ($ctype__[$c] & 104); // (100 | 4)); } $ctype__ = array( 32,32,32,32,32,32,32,32,32,40,40,40,40,40,32,32,32,32,32,32,32,32,32, 32,32,32,32,32,32,32,32,32,-120,16,16,16,16,16,16,16,16,16,16,16,16, 16,16,16,4,4,4,4,4,4,4,4,4,4,16,16,16,16,16,16,16,65,65,65,65,65,65, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,16,16,16,16,16,16,66,66,66, 66,66,66,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,16,16,16,16,32,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); } // {{{ token definitions // variables: 'ident', 'sys_var' // values: 'real_val', 'text_val', 'int_val', null // }}} final class Sql_Lexer { // array of valid tokens for the lexer to recognize // format is 'token literal'=>TOKEN_VALUE public $symbols = array(); // {{{ instance variables public $tokPtr = 0; public $tokStart = 0; public $tokLen = 0; public $tokText = ''; public $lineNo = 0; public $lineBegin = 0; public $string = ''; public $stringLen = 0; // Will not be altered by skip() public $tokAbsStart = 0; public $skipText = ''; // Provide lookahead capability. public $lookahead = 3; // Specify how many tokens to save in tokenStack, so the // token stream can be pushed back. public $tokenStack = array(); public $stackPtr = 0; // }}} // {{{ incidental functions public function __construct($string = '', $lookahead=0) { $this->string = $string; $this->stringLen = strlen($string); $this->lookahead = $lookahead; } public function get() { ++$this->tokPtr; ++$this->tokLen; return ($this->tokPtr <= $this->stringLen) ? $this->string{$this->tokPtr - 1} : null; } public function unget() { --$this->tokPtr; --$this->tokLen; } public function skip() { ++$this->tokStart; return ($this->tokPtr != $this->stringLen) ? $this->string{$this->tokPtr++} : ''; } public function revert() { $this->tokPtr = $this->tokStart; $this->tokLen = 0; } public function isCompop($c) { return (($c == '<') || ($c == '>') || ($c == '=') || ($c == '!')); } // }}} // {{{ pushBack() /* * Push back a token, so the very next call to lex() will return that token. * Calls to this function will be ignored if there is no lookahead specified * to the constructor, or the pushBack() function has already been called the * maximum number of token's that can be looked ahead. */ public function pushBack() { if($this->lookahead>0 && count($this->tokenStack)>0 && $this->stackPtr>0) { $this->stackPtr--; } } // }}} // {{{ lex() public function lex() { if($this->lookahead>0) { // The stackPtr, should always be the same as the count of // elements in the tokenStack. The stackPtr, can be thought // of as pointing to the next token to be added. If however // a pushBack() call is made, the stackPtr, will be less than the // count, to indicate that we should take that token from the // stack, instead of calling nextToken for a new token. if ($this->stackPtr<count($this->tokenStack)) { $this->tokText = $this->tokenStack[$this->stackPtr]['tokText']; $this->skipText = $this->tokenStack[$this->stackPtr]['skipText']; $token = $this->tokenStack[$this->stackPtr]['token']; // We have read the token, so now iterate again. $this->stackPtr++; return $token; } else { // If $tokenStack is full (equal to lookahead), pop the oldest // element off, to make room for the new one. if ($this->stackPtr == $this->lookahead) { // For some reason array_shift and // array_pop screw up the indexing, so we do it manually. for($i=0; $i<(count($this->tokenStack)-1); $i++) { $this->tokenStack[$i] = $this->tokenStack[$i+1]; } // Indicate that we should put the element in // at the stackPtr position. $this->stackPtr--; } $token = $this->nextToken(); $this->tokenStack[$this->stackPtr] = array('token'=>$token, 'tokText'=>$this->tokText, 'skipText'=>$this->skipText); $this->stackPtr++; return $token; } } else { return $this->nextToken(); } } // }}} public function getToken() { return array("type"=>$this->token,"value"=>$this->tokText,"line"=>$this->lineNo,"charno"=>$this->tokAbsStart); } public function hasNextToken() { $tokPtr = $this->tokPtr; $tokStart = $this->tokStart; $tokText = $this->tokText; $tokLen = $this->tokLen; $tokAbsStart = $this->tokAbsStart; $this->lex(); $tok = ($this->tokText!="*end of input*")?true:false; if($tok){ $this->tokPtr = $tokPtr; $this->tokStart = $tokStart; $this->tokText = $tokText; $this->tokAbsStart = $tokAbsStart; $this->tokStack=null; } return $tok; } /** * nextTokenIs * @param string $token * @param bool $return * @return mixed */ public function nextTokenIs($token , $return=false) { $tokPtr = $this->tokPtr; $tokStart = $this->tokStart; $tokText = $this->tokText; $tokLen = $this->tokLen; $tokAbsStart = $this->tokAbsStart; $nextToken = $this->nextToken(); $tok = ($nextToken==$token)?true:false; if($return and $tok) { return $nextToken; } if($tok) { $this->tokPtr = $tokPtr; $this->tokStart = $tokStart; $this->tokText = $tokText; $this->tokAbsStart = $tokAbsStart; $this->tokStack=null; } return $tok; } public function lookaheadToken() { $tokPtr = $this->tokPtr; $tokStart = $this->tokStart; $tokText = $this->tokText; $tokLen = $this->tokLen; $tokAbsStart = $this->tokAbsStart; $nextToken = $this->nextToken(); $this->tokPtr = $tokPtr; $this->tokStart = $tokStart; $this->tokText = $tokText; $this->tokAbsStart = $tokAbsStart; $this->tokStack=null; return $nextToken; } public function tokenIsNot($token) { $tokens = is_array($token) ? $token : array($token); $check=true; foreach($tokens as $tok) { if(Sql_Object::token()==$tok){ $check=false; } } return $check; } public function tokenIs($token) { $tokens = is_array($token) ? $token : array($token); $check=false; foreach($tokens as $tok) { if(Sql_Object::token()==$tok){ $check=true; } } return $check; } /** * nextTextIs * @param string $token * @param bool $return * @return mixed */ public function nextTextIs($token , $return=false) { $tokPtr = $this->tokPtr; $tokStart = $this->tokStart; $tokText = $this->tokText; $tokLen = $this->tokLen; $tokAbsStart = $this->tokAbsStart; $this->nextToken(); $tok = ($this->tokText==$token)?true:false; if($return and $tok) { return $this->tokText; } if($tok){ $this->tokPtr = $tokPtr; $this->tokStart = $tokStart; $this->tokText = $tokText; $this->tokAbsStart = $tokAbsStart; $this->tokStack=null; } return $tok; } // {{{ nextToken() public function nextToken() { if ($this->string == '') return; $state = 0; $this->tokAbsStart = $this->tokStart; while (true){ //echo "State: $state, Char: $c\n"; switch($state) { // {{{ State 0 : Start of token case 0: $this->tokPtr = $this->tokStart; $this->tokText = ''; $this->tokLen = 0; $c = $this->get(); if (is_null($c)) { // End Of Input $state = 1000; break; } while (($c == ' ') || ($c == "\t") || ($c == "\n") || ($c == "\r")) { if ($c == "\n" || $c == "\r") { // Handle MAC/Unix/Windows line endings. if($c == "\r") { $c = $this->skip(); // If not DOS newline if($c != "\n") $this->unget(); } ++$this->lineNo; $this->lineBegin = $this->tokPtr; } $c = $this->skip(); $this->tokLen = 1; } // Escape quotes and backslashes if ($c == '\\') { $t = $this->get(); if ($t == '\'' || $t == '\\' || $t == '"') { $this->tokText = $t; $this->tokStart = $this->tokPtr; return $this->tokText; } else { $this->unget(); // Unknown token. Revert to single char $state = 999; break; } } if (($c == '\'') || ($c == '"')) { // text string $quote = $c; $state = 12; break; } if ($c == '_') { // system variable $state = 18; break; } if (ctype_alpha(ord($c)) || ($c == '`')) { // keyword or ident $state = 1; break; } if (ctype_digit(ord($c))) { // real or int number $state = 5; break; } if ($c == '.') { $t = $this->get(); if ($t == '.') { // ellipsis if ($this->get() == '.') { $this->tokText = '...'; $this->tokStart = $this->tokPtr; return $this->tokText; } else { $state = 999; break; } } else if (ctype_digit(ord($t))) { // real number $this->unget(); $state = 7; break; } else { // period $this->unget(); } } if ($c == '#') { // Comments $state = 14; break; } if ($c == '-') { $t = $this->get(); if ($t == '-') { $state = 14; break; } elseif ($t == ' ') { $state = 15; break; } elseif (is_numeric( $t )) { $state = 15; break; } elseif (ord($t)==32 ) { $state = 16; break; } else { // negative number $this->unget(); $state = 5; break; } } if ($c == '+') { $t = $this->get(); if ($t == '+') { $state = 14; break; } elseif ($t == ' ') { $state = 15; break; } elseif (is_numeric( $t )) { $state = 15; break; } } if ($this->isCompop($c)) { // comparison operator $state = 10; break; } // Unknown token. Revert to single char $state = 999; break; // }}} // {{{ State 1 : Incomplete keyword or ident case 1: $c = $this->get(); if (ctype_alnum(ord($c)) || ($c == '_') || ($c == '.') || ($c == '`')) { $state = 1; break; } $state = 2; break; // }}} /* {{{ State 2 : Complete keyword or ident */ case 2: $this->unget(); $this->tokText = substr($this->string, $this->tokStart, $this->tokLen); $testToken = strtolower($this->tokText); if (isset($this->symbols[$testToken])) { $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return $testToken; } else { $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return 'ident'; } break; // }}} // {{{ State 5: Incomplete real or int number case 5: $c = $this->get(); if (ctype_digit(ord($c))) { $state = 5; break; } else if ($c == '.') { $t = $this->get(); if($t == '.') { // ellipsis $this->unget(); } else { // real number $state = 7; break; } } else if(ctype_alpha(ord($c))) { // number must end with non-alpha character $state = 999; break; } else { // complete number $state = 6; break; } // }}} // {{{ State 6: Complete integer number case 6: $this->unget(); $this->tokText = intval(substr($this->string, $this->tokStart, $this->tokLen)); $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return 'int_val'; break; // }}} // {{{ State 7: Incomplete real number case 7: $c = $this->get(); /* Analogy Start */ if ($c == 'e' || $c == 'E') { $state = 15; break; } /* Analogy End */ if (ctype_digit(ord($c))) { $state = 7; break; } $state = 8; break; // }}} // {{{ State 8: Complete real number */ case 8: $this->unget(); $this->tokText = floatval(substr($this->string, $this->tokStart, $this->tokLen)); $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return 'real_val'; // }}} // {{{ State 10: Incomplete comparison operator case 10: $c = $this->get(); if ($this->isCompop($c)) { $state = 10; break; } $state = 11; break; // }}} // {{{ State 11: Complete comparison operator case 11: $this->unget(); $this->tokText = substr($this->string, $this->tokStart, $this->tokLen); if($this->tokText) { $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return $this->tokText; } $state = 999; break; // }}} // {{{ State 12: Incomplete text string case 12: $bail = false; while (!$bail) { switch ($this->get()) { case '': $this->tokText = null; $bail = true; break; case "\\": if (!$this->get()) { $this->tokText = null; $bail = true; } //$bail = true; break; case $quote: $this->tokText = stripslashes(substr($this->string, ($this->tokStart+1), ($this->tokLen-2))); $bail = true; break; } } if (!is_null($this->tokText)) { $state = 13; break; } $state = 999; break; // }}} // {{{ State 13: Complete text string case 13: $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return 'text_val'; break; // }}} // {{{ State 14: Comment case 14: $c = $this->skip(); if ($c == "\n" || $c == "\r" || $c == "") { // Handle MAC/Unix/Windows line endings. if ($c == "\r") { $c = $this->skip(); // If not DOS newline if ($c != "\n") { $this->unget(); } } if ($c != "") { ++$this->lineNo; $this->lineBegin = $this->tokPtr; } // We need to skip all the text. $this->tokStart = $this->tokPtr; $state = 0; } else { $state = 14; } break; // }}} // {{{ State 15: Exponent Sign in Scientific Notation case 15: $c = $this->get(); if($c == '-' || $c == '+' || $c == '/' || $c == '*') { $state = 16; break; } $state = 999; break; // }}} // {{{ state 16: Exponent Value-first digit in Scientific Notation case 16: $c = $this->get(); if (ctype_digit(ord($c))) { $state = 17; break; } $state = 999; // if no digit, then token is unknown break; // }}} // {{{ State 17: Exponent Value in Scientific Notation case 17: $c = $this->get(); if (ctype_digit(ord($c))) { $state = 17; break; } $state = 8; // At least 1 exponent digit was required break; // }}} // {{{ State 18 : Incomplete System Variable case 18: $c = $this->get(); if (ctype_alnum(ord($c)) || $c == '_') { $state = 18; break; } $state = 19; break; // }}} // {{{ State 19: Complete Sys Var case 19: $this->unget(); $this->tokText = substr($this->string, $this->tokStart, $this->tokLen); $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return 'sys_var'; // }}} // {{{ State 999 : Unknown token. Revert to single char case 999: $this->revert(); $this->tokText = $this->get(); $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return $this->tokText; // }}} // {{{ State 1000 : End Of Input case 1000: $this->tokText = "*end of input*"; $this->skipText = substr($this->string, $this->tokAbsStart, $this->tokStart-$this->tokAbsStart); $this->tokStart = $this->tokPtr; return null; // }}} } } } // }}} }