PHP Classes
Icontem

File: maildir2mbox.php


  Search   All class groups All class groups   Latest entries Latest entries   Top 10 charts Top 10 charts   Newsletter Newsletter   Blog Blog   Forums Forums   Help FAQ Help FAQ  
  Login   Register  
Recommend this page to a friend! ReTweet ReTweet Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Charalampos Pournaris  >  Maildir To Mailbox  >  maildir2mbox.php  
File: maildir2mbox.php
Role: Class source
Content type: text/plain
Description: The php file for all the work
Class: Maildir To Mailbox
Convert messages from maildir to mbox format
 

Contents

Class file image Download
<?php
/**
 * Maildir (kmail, qmail etc.) to MailBox (Thunderbird, kmail ..) convertion script
 * 
 * I wrote this script because I needed to convert a quite big (~5GB) maildir directory structure
 * to an mbox file structure in order to open it from my thunderbird mail client. I was unable
 * to find a script/program which could work with such a big directory (or work at all) so i 
 * wrote my own.
 * 
 * You should invoke it this way: # php maildir2mbox.php Maildir (or Maildir/,
 * it's the same), where Maildir is your top level mail directory.
 * 
 * If you add #!/usr/bin/php (the path where your php binary is located) on top of the script
 * and chmod it +x then you can invoke it with ./maildir2mbox.php Maildir/.
 * 
 * A subdir named 'mboxfiles' will be created in the same directory where the script is and all 
 * the files will be placed there. Each mbox file is named according to the subdirectory
 * that the file belongs to. For example the mbox files for the Draft directory will be called
 * DraftX where X is a number for each file (if dir is bigger than 2 GB). An mbox file won't 
 * overflow the signed integer size limit (which is ~2GB) but will be splited to more files as 
 * described above.
 * 
 * I hope this script will be useful to you :)
 * 
 * UPDATED INFO (24-11-2008)
 * -------------------------
 * 
 * I have made some changes to the script. Your maildir directory should look like this:
 * 
 * mail/
 *   inbox/
 *     cur/
 *     new/
 *     tmp/
 *   sent/
 *     cur/
 *     new/
 *     tmp/
 *   other directory/
 *     cur/
 *     new/
 *     tmp/
 * 
 * You should place the script in the same level as the mail directory (in the above example)
 * and use the command: php maildir2mbox.php mail (or mail/,
 * it's the same).
 * 
 * All the mbox generated files will be placed in a directory called 'mboxfiles'. The filenames should
 * be inbox[.mbox], sent[.mbox] for the above example
 * 
 * -- END OF THE UPDATED INFO SECTION --
 * 
 * UPDATED INFO (26-11-2008)
 * -------------------------
 * 
 * - No extension is added to the output files by default
 * 
 * - Added support for subdirectory parsing (any level)
 * 
 * - Added two command line options:
 *   -e		Will add an .mbox extension to the output files
 *   -x		Will remove output files which are empty
 *   -h		Will print a help output describing options
 * 
 * -- END OF THE UPDATED INFO SECTION --
 * 
 * @author Charalampos 'DiAvOl' Pournaris <charpour@gnet.gr>
 * @version 3.0
 * @package converter 
 */

// flush on each output operation
ob_implicit_flush();
// Do not timeout this script
set_time_limit(0);

/********************** FUNCTION DEFINITIONS **********************/

/**
 * Print a message to the console. Die if iserror is true
 *  
 * @param string $msg The message to print
 * @param boolean $iserror If it is true kill the program
 */
function printConsole($msg, $iserror=false) {
	print($msg."\n");

	if ($iserror) {
		exit(1);
	}
}

/**
 * Print script usage information
 *
 * @param boolean $error
 */
function print_usage($error=false) {
	global $program_name;

print <<<OUTPUT
Usage: $program_name [options] Maildir

-h	Print this help information
-e	Add an .mbox extension to the output files
-x	Remove empty mbox files\n\n
OUTPUT;
exit($error ? 1 : 0);
}

/**
 * Check if an int variable is overflowed
 * 
 * @param $val An integer value
 * @return boolean
 */
function intOverflowed($val) {
	return gettype($val) == 'integer' ? false : true;
}

/**
 * Determines if the file is a maildir file
 * 
 * @param string $name File name to check
 * @return boolean
 */
function isMaildirFile($name) {
	return preg_match(FILE_FORMAT, trim($name));
}

/**
 * Change the default memory limit
 * <i>NOTE: If the new size is less than 8M it will be set to 8M</i>
 *
 * @param string $newlimit The new limit to be set
 * @return string|boolean
 */
function setMemoryLimit(&$newlimit) {
	$retval = false;
	$oldlimit = '';

	if(intval($newlimit) < 8) {
		$newlimit = '8M';
	}

	if ( ($oldlimit = ini_set('memory_limit',$newlimit)) !== false) {
		$retval = $oldlimit;
	}

	return $retval;
}

/**
 * Parse the given directory.
 * 
 * Optionally get all the hidden files too and ignore
 * filenames in the exception array.
 *
 * @param string $dir
 * @param boolean $gethidden
 * @param array $exceptions
 * @return array
 */
function parseDirectory($dir, $gethidden=false, $exceptions=array()) {
	$fp = null;
	$dirs = array();
	$hdirs = array();	
	$files = array();
	$hfiles = array();

	if ($fp = opendir($dir)) {
		while (false !== ($file = readdir($fp))) {
			$filename = $dir.$file;
			if ((!$gethidden && $file{0} == '.'))
				continue;
			elseif ($file == '.' || $file == '..')
				continue;
				
			if (is_file($filename))
				if ($file{0} == '.')
					$hfiles[$file] = $filename;
				else
					$files[$file] = $filename;
			elseif (is_dir($filename)) {
				if (in_array(substr($filename, strlen($dir)),$exceptions))
				continue;
				if ($file{0} == '.')
					$hdirs[$file] = $filename.DIR_SEPERATOR;
				else
					$dirs[$file] = $filename.DIR_SEPERATOR;
			}
		}

		closedir($fp);
	}

	return array('dirs'=>$dirs,'hdirs'=>$hdirs,'files'=>$files,'hfiles'=>$hfiles);
}

/**
 * Parses a directory tree using recursion
 *
 * @param DirectoryObj $root
 */
function parseTree($root, $path) {
	$dirtree = parseDirectory($path, true, array('mboxfiles'));	
	
	foreach ($dirtree['dirs'] as $dname=>$dpath) {
		$mboxdir = new DirectoryObj($dname, $dpath, $root->getMboxPath().'/'.$dname.'.sbd');
		
		$root->addChild($mboxdir);
	}

	foreach ($dirtree['hdirs'] as $hdname=>$hdpath) {
		$parent_name = substr($hdname,1,strpos($hdname,'.',1)-1);

		parseTree($root->getDirByName($parent_name), $hdpath);
	}
}

/**
 * Generate the target mbox files
 *
 * @param DirectoryObj $root
 */
function makeTree($root) {
	if ($root->hasChild())
		mkdir($root->getMboxPath());
	foreach ($root->getChild() as $chname=>$chobj) {
		printConsole('******************* Processing directory '.$chobj->getFullPath().' *******************');
		$dir = new DirParser($chobj->getFullPath(), $chobj->getMboxPath());
		$dir->parse();
		
		if ($dir->getFileCount() == 0) {
			printConsole("No files found in this directory..");
		} else {
			$dir->printSummary();
		}
			
		unset($dir);
		$dir = null;
		printConsole('*************************** Finished ***************************');
		printConsole("\n");
		
		makeTree($chobj);
	}	
}

/********************** CLASS DEFINITIONS **********************/

class DirectoryObj {
	private $name = '';
	private $fullpath = '';
	private $mboxpath = '';
	private $child = array();
	
	public function __construct($name, $fullpath, $mboxpath) {
		$this->name = $name;
		$this->fullpath = $fullpath;
		$this->mboxpath = $mboxpath;
	}
	
	public function getName() {
		return $this->name;
	}
	
	public function getFullPath() {
		return $this->fullpath;
	}
	
	public function getMboxPath() {
		return $this->mboxpath;
	}
	
	public function setMboxPath($p) {
		$this->mboxpath = $p;
	}
	
	/**
	 * Add a child directory
	 *
	 * @param DirectoryObj $child
	 */
	public function addChild($child) {
		$this->child[$child->getName()] = $child;
	}
	
	/**
	 * Returns all the child elements
	 *
	 * @return DirectoryObj
	 */
	public function getChild() {
		return $this->child;
	}
	
	/**
	 * Returns a child directory given it's name
	 *
	 * @param string $name
	 * @return DirectoryObj
	 */
	public function getDirByName($name) {
		return $this->child[$name];
	}
	
	public function hasChild() {
		return !empty($this->child);
	}
}

class DirParser {
	private $fulldir = '';
	private $dirname = '';
	private $mdirfiles = 0;
	private $parsedfiles = 0;
	private $bogusfiles = 0;
	private $output_fn = '';
	private $dirhandle = null;
	private $mboxhnd = null;
	private $mboxfn = null;
	private $mboxfilecnt = 1;
	private $totalsize = 0;
	private $chunks = 0;
	private static $total_converted = 0;
	private static $total_failed = 0;

	public function __construct($fdir, $output) {
		$this->fulldir = $fdir;
		$this->output_fn = substr($output, 0, strrpos($output, '/')+1);

		if ($this->dirhandle = opendir($fdir)) {
			$this->dirname = substr($fdir,0,strlen($fdir)-1);
			$this->dirname = substr($this->dirname,strrpos($this->dirname,DIR_SEPERATOR)+1);
		} else {
			printConsole("Error occured while opening directory: ".$fdir, true);
		}
	}

	private function openFile() {
		global $maildir, $mbox_extension;
		
		$this->mboxfn = $this->output_fn.$this->dirname.($this->mboxfilecnt == 1 ? '' : $this->mboxfilecnt).($mbox_extension ? '.mbox' : '');

		$this->mboxhnd = fopen($this->mboxfn,w);

		if (!$this->mboxhnd) {
			printConsole("Error occured while opening mbox file for writing", true);
		}
	}

	private function closeFile() {
		global $remove_empty;
		
		if (is_resource($this->mboxhnd)) {
			fclose($this->mboxhnd);
		}
		unset($this->mboxhnd);
		$this->mboxhnd = null;
		
		if ($remove_empty && $this->mdirfiles == 0) {
			unlink($this->mboxfn);
		}
	}

	public function parse() {
		$this->openFile();
		while (false !== ($rfile = readdir($this->dirhandle))) {
			if ($rfile == '..' || $rfile == '.' || !is_dir($this->fulldir.$rfile))
				continue;
			
			$curhandle = opendir($this->fulldir.$rfile);
			
			if (!$curhandle) {
				printConsole("[Error] Could not open directory: ".$this->fulldir.$rfile);
			}
			
			while (false !== ($file = readdir($curhandle))) {
			
				$filename = $this->fulldir.$rfile.'/'.$file;
	
				if (is_file($filename) && isMaildirFile($file)) {
					print("\rFilecount: ".(++$this->mdirfiles)."\tTotal size written to mbox file: ".$this->totalsize);
					$mdfile = new MailDirFile($filename);
					$mdfile->readHeader();
	
					if (!$mdfile->foundAddress()) {
						printConsole("\nHeader not found for : ".$file.", file skipped");
						$this->bogusfiles++;
						unset($mdfile);
						$mdfile = null;
						continue;
					}
	
					if (intOverflowed($this->totalsize + $mdfile->getTotalSize())) {
						$this->closeFile();
						$this->mboxfilecnt++;
						$this->chunks++;
						$this->totalsize = 0;
						print("\n\t\t\t\t\r");
						$this->openFile();
					}
	
					++$this->parsedfiles;
					$this->totalsize += $mdfile->getTotalSize();
					fwrite($this->mboxhnd,$mdfile->getMboxHeader()."\n".$mdfile->getHeader());
					$mdfile->streamWriteContents($this->mboxhnd);
					fwrite($this->mboxhnd, "\n");
		
					unset($mdfile);
					$mdfile = null;
				}
			}
			
			closedir($curhandle);
			unset($curhandle);
			$curhandle = null;			
		}
		$this->closeFile();
	}

	public function getFileCount() {
		return $this->mdirfiles;
	}

	public function printSummary() {
		printConsole("\n".'------------------------ SUMMARY ------------------------');
		printConsole("Wrote ".$this->mboxfilecnt." mbox files for this directory");
		printConsole("Converted ".$this->parsedfiles.'/'.$this->mdirfiles." files successfully");
		printConsole("Found ".$this->bogusfiles." files without a proper header and skipped them for this directory");
		printConsole('---------------------------------------------------------');

		self::$total_converted += $this->parsedfiles;
		self::$total_failed += $this->bogusfiles;
	}

	public function getTotalConverted() {
		return self::$total_converted;
	}

	public function getTotalFailed() {
		return self::$total_failed;
	}
	
	public function getDirName() {
		return $this->dirname;
	}

	public function __destruct() {
		if (is_resource($this->dirhandle))
		closedir($this->dirhandle);
		unset($this->dirhandle);
		$this->dirhandle = null;
	}
}

class MailDirFile {
	private $filename = '';
	private $filesize = '';
	private $handler = null;
	private $mboxheader = 'From ';
	private $from = '';
	private $dt = '';
	private $header = '';
	private $contents='';
	private $foundaddr = false;
	private $fullmsg = '';
	private static $search_array = array(
	"From:"=>0, "X-From-Line:"=>0, "Reply-To:"=>0, "from:"=>0
	);
	private $search_array_filled = array(
	"From:"=>'', "X-From-Line:"=>'', "Reply-To:"=>'', "from:"=>''
	);

	public function __construct($fname) {
		$this->filename = $fname;
		$this->dt = strtotime("Thu, 01 Jan 1970 00:00:00 +0000 (GMT)");
		if (!$this->handler = fopen($fname, "r")) {
			printConsole("Error occured while opening file: ".$fname, true);
		}
		$this->getFileSize();
	}

	public static function getSearchArray() {
		return self::$search_array;
	}

	private function cleanAddress($addr) {
		$addr = trim($addr);

		if ($addr{0} == '<')
		return substr($addr, 1,strlen($addr)-2);

		return $addr;
	}

	public function readContents() {
		while(!feof($this->handler)) {
			$this->contents .= fread($this->handler, 8192);
		}
	}

	public function streamWriteContents($fp) {
		while(!feof($this->handler)) {
			fwrite($fp, fread($this->handler, 8192));
		}
	}

	public function readHeader() {
		while ( ($line = fgets($this->handler)) && $line{0} != "\n") {
			$foundfrom = false;
			foreach (self::$search_array as $search=>&$count) {
				if ($this->startsWith($search, $line)) {
					$foundfrom = true;
					$count++;
					$this->foundaddr = true;
					$linfo = explode(' ',$line);

					foreach ($linfo as $item) {
						if (strpos($item,'@') !== false) {
							$this->search_array_filled[$search] = $this->cleanAddress($item);
							break;
						}
					}
					break;
				}
			}

			if (!$foundfrom && $this->startsWith("Date:", $line)) {
				$this->dt = gmdate("D M d H:i:s Y",strtotime(substr($line,6)));
			}
			$this->header .= $line;
		}
		$this->header .= $line;

		$this->from = $this->search_array_filled['From:'];

		if (empty($this->from)) {
			$this->from = $this->search_array_filled['from:'];
		}

		if (empty($this->from)) {
			$this->from = $this->search_array_filled['Reply-To:'];
		}

		if (empty($this->from)) {
			$this->from = $this->search_array_filled['X-From-Line:'];
		}

		$this->mboxheader .= $this->from.' '.$this->dt;
	}

	public function foundAddress() {
		return $this->foundaddr;
	}

	public function printContents() {
		print($this->contents);
	}

	public function printHeader() {
		print($this->header);
	}

	public function getMboxHeader() {
		return $this->mboxheader;
	}

	public function getHeader() {
		return $this->header;
	}

	public function getContents() {
		return $this->contents;
	}

	public function getMboxMessage() {
		$this->fullmsg = $this->mboxheader."\n".$this->header.$this->contents."\n";
		return $this->fullmsg;
	}

	public function getTotalSize() {
		return ($this->filesize += (strlen($this->mboxheader)+2));
	}

	private function getFileSize() {
		fseek($this->handler,0,SEEK_END);
		$this->filesize = ftell($this->handler);
		fseek($this->handler,0,SEEK_SET);
	}

	private function startsWith($str, $line) {
		return $str == substr($line,0,strlen($str));
	}

	public function getSize() {
		return $this->filesize;
	}

	public function __destruct() {
		if (is_resource($this->handler)) {
			fclose($this->handler);
		}
		unset($this->handler);
		$this->handler = null;
	}
}

/********************** MAIN CODE **********************/
define('DIR_SEPERATOR','/');
define('OUTPUT_DIR','mboxfiles');
define('FILE_FORMAT',"/^[0-9]*\.[a-zA-Z0-9_]*\.[a-zA-Z0-9\:\,]*/");

$mbox_extension = false;
$remove_empty = false;

$program_name = $argv[0];

foreach ($argv as $k=>$argument) {
	switch (trim($argument)) {
		case '-e':  {
			$mbox_extension = true;
			unset($argv[$k]);
			$argc--;
			break;
		}
		case '-x':  {
			$remove_empty = true;
			unset($argv[$k]);
			$argc--;
			break;
		}
		case '-h': {
			print_usage();
			unset($argv[$k]);
			$argc--;
			break;
		}
		default: {
			if ($argument{0} == '-') {
				printConsole("Unknown Option: $argument");
				print_usage(true);
			}
		}
	}
}

// Re-index argv
$argv = array_values($argv);

if ($argc != 2) {
	printConsole("No output directory specified");
	print_usage();
}

$maildir = $argv[1];

// Check if the last character of the given directory is the dir seperator, if not add it.
if ($maildir{strlen($maildir)-1} != DIR_SEPERATOR) {
	$maildir .= '/';
}

// If the given name is a file instead of a directory kill the script
if (!is_dir($maildir)) {
	printConsole("$maildir is not a directory", true);
}

// Variable initialization
$memory_limit = '32M';
$oldlimit = '';

printConsole('Setting memory limit...');

if ( ($oldlimit = setMemoryLimit($memory_limit)) === false ) {
	printConsole("Error setting memory limit to $memory_limit.", true);
} else {
	printConsole("New memory limit: $memory_limit\nOld memory limit: $oldlimit");
}

$rootnode = new DirectoryObj(substr($maildir,0,strpos($maildir, '/')), $maildir, OUTPUT_DIR);

parseTree($rootnode, $rootnode->getFullPath());

makeTree($rootnode);

printConsole("Finished processing all the directories.\nOutput files are located in directory: ".OUTPUT_DIR."/");
printConsole("Total files converted: ".DirParser::getTotalConverted());
printConsole("Total files failed: ".DirParser::getTotalFailed());
?>

 
  Advertise on this site Advertise on this site   Site map Site map   Statistics Statistics   Site tips Site tips   Privacy policy Privacy policy   Contact Contact  

For more information send a message to :
info at phpclasses dot org.
Copyright (c) Icontem 1999-2009 PHP Classes - PHP Class Scripts
  PHP Book Reviews - Reviews of books and other products