Login   Register  
PHP Classes
elePHPant
Icontem

File: examples/MapReduce/demo.map-reduce.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Daniel Martinez  >  Forker PHP  >  examples/MapReduce/demo.map-reduce.php  >  Download  
File: examples/MapReduce/demo.map-reduce.php
Role: Example script
Content type: text/plain
Description: Example script
Class: Forker PHP
Split tasks into multiple forked processes
Author: By
Last change: Changing indentation
Beginning splitting responsabilities
Date: 1 month ago
Size: 1,807 bytes
 

Contents

Class file image Download
<?php
/****************************************************************
 * [Forker]
 *
 * Example: MapReduce example counts the appearance of each
word in a set of documents

 * Usage  : php examples/MapReduce/demo.map-reduce.php > test-mp
 * Storage: FileStorage
 ****************************************************************/
require 'vendor/autoload.php';

use 
Forker\Forker;
use 
Forker\Storage\FileStorage;

$myResult 0;
$myTasks = array(
    
'quijote-1.txt',
    
'quijote-2.txt',
    
'quijote-3.txt',
);

$numberOfSubTasks 3;

$forker = new Forker(new FileStorage$myTasks$numberOfSubTasks);
$path   dirname(__FILE__);

// MAP
$forker->fork(function($key$fileName$emit) use($path){

    
$file_to_get "$path/$fileName";
    
$content     file_get_contents($file_to_get);

    foreach(
getUTF8Words($content) as $word) {
        
$emit($word1);
    }

});

// REDUCE
$mapped $forker->fetch();

// We dont set here the number of sub tasks, 
// since we don't know the total number
$forker = new Forker(new FileStorage('/tmp/reduced-words'), $mapped);

$forker->fork(function($word$counts$emit) {
    
$emit($wordis_array($counts) ? count($counts) : 1);
});

$allWords $forker->fetch();

arsort($allWordsSORT_NUMERIC);

// First 10 words most used :)
$cont 10;

foreach(
$allWords as $word => $counts) {
    echo 
$word " ("$counts .")\n";
    if (! --
$cont) break;
}

//////////////////////////////////////////////////////////
function getUTF8Words($text)
{
    
$match_arr = array();

    
//http://stackoverflow.com/questions/10684183/extract-words-from-string-with-preg-match-all
    
if(preg_match_all('/([a-zA-Z]|\xC3[\x80-\x96\x98-\xB6\xB8-\xBF]|\xC5[\x92\x93\xA0\xA1\xB8\xBD\xBE]){3,}/'$text$match_arr)) {
        return 
$match_arr[0];
    }
    return array();
}