PHP Classes
elePHPant
Icontem

File: lib_dictionary.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of David Frendin  >  lib_dictionary  >  lib_dictionary.php  >  Download  
File: lib_dictionary.php
Role: Class source
Content type: text/plain
Description: lib_dictionary
Class: lib_dictionary
Check text spelling with a dictionary in MySQL
Author: By
Last change: minor fix (v1.1.1)
Date: 8 years ago
Size: 5,461 bytes
 

Contents

Class file image Download
<?php
/***************************************************************************
 * lib_dictionary.php (ver. 1.1.1)
 * Copyright (c) 2007 David Frendin (david.frendin@gmail.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version. See the GNU General Public License
 * for more details.
 *
 ***************************************************************************/

/***************************************************************************
 * Description:
 * This library checks if a word is wrongly spelled, and makes 3 (or less/more) suggestions
 * to what might be the word the user ment.
 *
 * This spell lexicon uses mysql to store/load dictionary data and similar_text / metaphone
 * to determine if a dictornary word might be a suitable correction.
 *
 * The lib_dictionary library does _not_ require pspell or aspell, or any external
 * applications or dictionaries.
 *
 * Credits to:
 * Myself for writing it, Reza Saleh (zaalion@yahoo.com) for inspiration and english wordlist,
 * Oxymoron (php portalen) for optimization and you for reading it!
 *
 ***************************************************************************/

class dictclass
{
    var
$is_loaded;
    var
$dictionary;

    function
spell_phrase($phrase, $debug_data = false)
    {
        if (
$debug_data)
        {
           
$t1 = microtime(true);
           
$t2 = microtime(true);
           
$t1 = microtime(true);
        }
       
$words = explode(" ", $phrase);
        foreach (
$words as $word )
        {
           
$word = ereg_replace("[^A-Za-z0-9]", "", $word); //remove any special characters - makes
                                                                //it incompatible with non-english languages
                                                                //... done to remove quotation marks, comas, dots etc
           
$word = strip_tags($word);
            if (
$this->does_word_exist($word) == -1)
            {
               
$phrase = str_replace($word, "<span style=\"color: #aa0000; font-weight: bold;\">$word</span>", $phrase);
            }
        }
        if (
$debug_data)
        {
           
$t2 = microtime(true);
           
$phrase = $phrase . " " . sprintf('%.1f', ($t2 - $t1)*100 ) . "ms</b> (".sprintf('%.2f', ($t2 - $t1) )."s)";
        }
        return
$phrase;
    }
   
   
//
    // checkhighest
    // checks all values in dictionary array against the word.
    // # returns: -1 on empty, otherwise an sorted array with the top possible correct words, ranging from $match[0] and up
    //
   
function checkhighest($word, $max = 2)
    {
        if (empty(
$this->dictionary))
            return -
1;

       
$cnt = 0;
        foreach (
$this->dictionary as $lookup)
        {
           
similar_text($lookup['word'], $word, $p);
            if (
$p > $match[$cnt]['p'])
            {
                if (
$cnt < $max)
                   
$cnt++;
                for (
$i=0; $i<=$cnt; $i++)
                {
                    if (
$p > $match[$i]['p'])
                    {
                       
$match[$i]['word'] = $lookup['word'];
                       
$match[$i]['p'] = $p;
                        break;
                    }
                }
            }
        }
        return
$match;
    }

   
//
    // load_dictionary
    // does a search for possible correct words based on word lenght and metaphone.
    // # returns: -1 if not found any, or array containing all found words
    //
   
function load_dictionary($word, $size=1)
    {
        global
$db;
       
$mphone = metaphone($word);
        if (
strlen($mphone) > 3)
           
$mphone = substr($mphone, 1, strlen($mphone)-2);

       
$min = strlen($word) - $size-1;
       
$max = strlen($word) + $size;
        if (
$min < 1)
           
$min = 1;

       
$sql = "SELECT * FROM " . DICTIONARY_TABLE . " WHERE len BETWEEN {$min} AND {$max} AND (metaphone LIKE '{$mphone}%' OR metaphoner LIKE concat(reverse('{$mphone}'), '%'))";
       
$result = $db->sql_query($sql) or die("error");

       
$cnt = 0;
       
$cnt = $db->sql_numrows($result);

        if (
$cnt == 0)
        {
           
$this->is_loaded = false;
            return
false;
        }
        else
        {
           
$this->is_loaded = true;
           
$this->dictionary = $db->sql_fetchrowset($result);
            return
$row;
        }
    }

   
//
    // does_word_exist
    // connects to db to determine if the word exists in the dictionary (if so, the word is correctly spelled)
    // # returns: -1 on false, or a single-dimentional array from the db record
    //
   
function does_word_exist($word)
    {
        global
$db;
       
$sql="SELECT * FROM " . DICTIONARY_TABLE . " WHERE ( word = '".strtolower($word)."' )";
       
$result = $db->sql_query($sql) or die("sql could not connect [does_word_exist]");
        if (
$db->sql_numrows($result) == 0)
            return -
1;
        else
            return
$row;
    }

   
//
    // add_from_file
    // add new words from a file
    //
   
function add_from_file($path)
    {
        global
$db;
       
$spc = array("\r\n", "\n", "\r");
       
set_time_limit(400); //it takes time to process large wordlists
       
$handle = @fopen($path, "r");
        if (
$handle)
        {
            while (!
feof($handle))
            {
               
$line = fgets($handle, 4096);
               
$line = str_replace($spc, "", $line);
               
$line = trim($line);
               
$line = strtolower($line);
               
$exists = $this->does_word_exist(mysql_real_escape_string($line));
                if (
$exists == -1)
                {
                   
$sql = "INSERT INTO " . DICTIONARY_TABLE . " (word, metaphone, len, metaphoner) VALUES (\"".mysql_real_escape_string($line)."\", \"".metaphone($line)."\", \"strlen($line)\", \"".metaphone($line)."\")";
                    echo
"adding: $line<br>";
                   
$result = $db->sql_query($sql) or die("could not connect to db [add_from_file]");
                }
            }
           
fclose($handle);
            echo
"added wordlist";
        }
    }
}
?>