Login   Register  
PHP Classes
elePHPant
Icontem

File: maildir2mbox.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Charalampos Pournaris  >  Maildir To Mailbox  >  maildir2mbox.php  >  Download  
File: maildir2mbox.php
Role: Class source
Content type: text/plain
Description: The php file for all the work
Class: Maildir To Mailbox
Convert messages from maildir to mbox format
Author: By
Last change: Fixed a bug in fopen, w -> 'w' thanks to Dave Silvester
Date: 4 years ago
Size: 17,579 bytes
 

Contents

Class file image Download
<?php
/**
 * Maildir (kmail, qmail etc.) to MailBox (Thunderbird, kmail ..) convertion script
 * 
 * I wrote this script because I needed to convert a quite big (~5GB) maildir directory structure
 * to an mbox file structure in order to open it from my thunderbird mail client. I was unable
 * to find a script/program which could work with such a big directory (or work at all) so i 
 * wrote my own.
 * 
 * You should invoke it this way: # php maildir2mbox.php Maildir (or Maildir/,
 * it's the same), where Maildir is your top level mail directory.
 * 
 * If you add #!/usr/bin/php (the path where your php binary is located) on top of the script
 * and chmod it +x then you can invoke it with ./maildir2mbox.php Maildir/.
 * 
 * A subdir named 'mboxfiles' will be created in the same directory where the script is and all 
 * the files will be placed there. Each mbox file is named according to the subdirectory
 * that the file belongs to. For example the mbox files for the Draft directory will be called
 * DraftX where X is a number for each file (if dir is bigger than 2 GB). An mbox file won't 
 * overflow the signed integer size limit (which is ~2GB) but will be splited to more files as 
 * described above.
 * 
 * I hope this script will be useful to you :)
 * 
 * UPDATED INFO (24-11-2008)
 * -------------------------
 * 
 * I have made some changes to the script. Your maildir directory should look like this:
 * 
 * mail/
 *   inbox/
 *     cur/
 *     new/
 *     tmp/
 *   sent/
 *     cur/
 *     new/
 *     tmp/
 *   other directory/
 *     cur/
 *     new/
 *     tmp/
 * 
 * You should place the script in the same level as the mail directory (in the above example)
 * and use the command: php maildir2mbox.php mail (or mail/,
 * it's the same).
 * 
 * All the mbox generated files will be placed in a directory called 'mboxfiles'. The filenames should
 * be inbox[.mbox], sent[.mbox] for the above example
 * 
 * -- END OF THE UPDATED INFO SECTION --
 * 
 * UPDATED INFO (26-11-2008)
 * -------------------------
 * 
 * - No extension is added to the output files by default
 * 
 * - Added support for subdirectory parsing (any level)
 * 
 * - Added two command line options:
 *   -e		Will add an .mbox extension to the output files
 *   -x		Will remove output files which are empty
 *   -h		Will print a help output describing options
 * 
 * -- END OF THE UPDATED INFO SECTION --
 * 
 * @author Charalampos 'DiAvOl' Pournaris <charpour@gnet.gr>
 * @version 3.0
 * @package converter 
 */

// flush on each output operation
ob_implicit_flush();
// Do not timeout this script
set_time_limit(0);

/********************** FUNCTION DEFINITIONS **********************/

/**
 * Print a message to the console. Die if iserror is true
 *  
 * @param string $msg The message to print
 * @param boolean $iserror If it is true kill the program
 */
function printConsole($msg, $iserror=false) {
	print($msg."\n");

	if ($iserror) {
		exit(1);
	}
}

/**
 * Print script usage information
 *
 * @param boolean $error
 */
function print_usage($error=false) {
	global $program_name;

print <<<OUTPUT
Usage: $program_name [options] Maildir

-h	Print this help information
-e	Add an .mbox extension to the output files
-x	Remove empty mbox files\n\n
OUTPUT;
exit($error ? 1 : 0);
}

/**
 * Check if an int variable is overflowed
 * 
 * @param $val An integer value
 * @return boolean
 */
function intOverflowed($val) {
	return gettype($val) == 'integer' ? false : true;
}

/**
 * Determines if the file is a maildir file
 * 
 * @param string $name File name to check
 * @return boolean
 */
function isMaildirFile($name) {
	return preg_match(FILE_FORMAT, trim($name));
}

/**
 * Change the default memory limit
 * <i>NOTE: If the new size is less than 8M it will be set to 8M</i>
 *
 * @param string $newlimit The new limit to be set
 * @return string|boolean
 */
function setMemoryLimit(&$newlimit) {
	$retval = false;
	$oldlimit = '';

	if(intval($newlimit) < 8) {
		$newlimit = '8M';
	}

	if ( ($oldlimit = ini_set('memory_limit',$newlimit)) !== false) {
		$retval = $oldlimit;
	}

	return $retval;
}

/**
 * Parse the given directory.
 * 
 * Optionally get all the hidden files too and ignore
 * filenames in the exception array.
 *
 * @param string $dir
 * @param boolean $gethidden
 * @param array $exceptions
 * @return array
 */
function parseDirectory($dir, $gethidden=false, $exceptions=array()) {
	$fp = null;
	$dirs = array();
	$hdirs = array();	
	$files = array();
	$hfiles = array();

	if ($fp = opendir($dir)) {
		while (false !== ($file = readdir($fp))) {
			$filename = $dir.$file;
			if ((!$gethidden && $file{0} == '.'))
				continue;
			elseif ($file == '.' || $file == '..')
				continue;
				
			if (is_file($filename))
				if ($file{0} == '.')
					$hfiles[$file] = $filename;
				else
					$files[$file] = $filename;
			elseif (is_dir($filename)) {
				if (in_array(substr($filename, strlen($dir)),$exceptions))
				continue;
				if ($file{0} == '.')
					$hdirs[$file] = $filename.DIR_SEPERATOR;
				else
					$dirs[$file] = $filename.DIR_SEPERATOR;
			}
		}

		closedir($fp);
	}

	return array('dirs'=>$dirs,'hdirs'=>$hdirs,'files'=>$files,'hfiles'=>$hfiles);
}

/**
 * Parses a directory tree using recursion
 *
 * @param DirectoryObj $root
 */
function parseTree($root, $path) {
	$dirtree = parseDirectory($path, true, array('mboxfiles'));	
	
	foreach ($dirtree['dirs'] as $dname=>$dpath) {
		$mboxdir = new DirectoryObj($dname, $dpath, $root->getMboxPath().'/'.$dname.'.sbd');
		
		$root->addChild($mboxdir);
	}

	foreach ($dirtree['hdirs'] as $hdname=>$hdpath) {
		$parent_name = substr($hdname,1,strpos($hdname,'.',1)-1);

		parseTree($root->getDirByName($parent_name), $hdpath);
	}
}

/**
 * Generate the target mbox files
 *
 * @param DirectoryObj $root
 */
function makeTree($root) {
	if ($root->hasChild())
		mkdir($root->getMboxPath());
	foreach ($root->getChild() as $chname=>$chobj) {
		printConsole('******************* Processing directory '.$chobj->getFullPath().' *******************');
		$dir = new DirParser($chobj->getFullPath(), $chobj->getMboxPath());
		$dir->parse();
		
		if ($dir->getFileCount() == 0) {
			printConsole("No files found in this directory..");
		} else {
			$dir->printSummary();
		}
			
		unset($dir);
		$dir = null;
		printConsole('*************************** Finished ***************************');
		printConsole("\n");
		
		makeTree($chobj);
	}	
}

/********************** CLASS DEFINITIONS **********************/

class DirectoryObj {
	private $name = '';
	private $fullpath = '';
	private $mboxpath = '';
	private $child = array();
	
	public function __construct($name, $fullpath, $mboxpath) {
		$this->name = $name;
		$this->fullpath = $fullpath;
		$this->mboxpath = $mboxpath;
	}
	
	public function getName() {
		return $this->name;
	}
	
	public function getFullPath() {
		return $this->fullpath;
	}
	
	public function getMboxPath() {
		return $this->mboxpath;
	}
	
	public function setMboxPath($p) {
		$this->mboxpath = $p;
	}
	
	/**
	 * Add a child directory
	 *
	 * @param DirectoryObj $child
	 */
	public function addChild($child) {
		$this->child[$child->getName()] = $child;
	}
	
	/**
	 * Returns all the child elements
	 *
	 * @return DirectoryObj
	 */
	public function getChild() {
		return $this->child;
	}
	
	/**
	 * Returns a child directory given it's name
	 *
	 * @param string $name
	 * @return DirectoryObj
	 */
	public function getDirByName($name) {
		return $this->child[$name];
	}
	
	public function hasChild() {
		return !empty($this->child);
	}
}

class DirParser {
	private $fulldir = '';
	private $dirname = '';
	private $mdirfiles = 0;
	private $parsedfiles = 0;
	private $bogusfiles = 0;
	private $output_fn = '';
	private $dirhandle = null;
	private $mboxhnd = null;
	private $mboxfn = null;
	private $mboxfilecnt = 1;
	private $totalsize = 0;
	private $chunks = 0;
	private static $total_converted = 0;
	private static $total_failed = 0;

	public function __construct($fdir, $output) {
		$this->fulldir = $fdir;
		$this->output_fn = substr($output, 0, strrpos($output, '/')+1);

		if ($this->dirhandle = opendir($fdir)) {
			$this->dirname = substr($fdir,0,strlen($fdir)-1);
			$this->dirname = substr($this->dirname,strrpos($this->dirname,DIR_SEPERATOR)+1);
		} else {
			printConsole("Error occured while opening directory: ".$fdir, true);
		}
	}

	private function openFile() {
		global $maildir, $mbox_extension;
		
		$this->mboxfn = $this->output_fn.$this->dirname.($this->mboxfilecnt == 1 ? '' : $this->mboxfilecnt).($mbox_extension ? '.mbox' : '');

		$this->mboxhnd = fopen($this->mboxfn,'w');

		if (!$this->mboxhnd) {
			printConsole("Error occured while opening mbox file for writing", true);
		}
	}

	private function closeFile() {
		global $remove_empty;
		
		if (is_resource($this->mboxhnd)) {
			fclose($this->mboxhnd);
		}
		unset($this->mboxhnd);
		$this->mboxhnd = null;
		
		if ($remove_empty && $this->mdirfiles == 0) {
			unlink($this->mboxfn);
		}
	}

	public function parse() {
		$this->openFile();
		while (false !== ($rfile = readdir($this->dirhandle))) {
			if ($rfile == '..' || $rfile == '.' || !is_dir($this->fulldir.$rfile))
				continue;
			
			$curhandle = opendir($this->fulldir.$rfile);
			
			if (!$curhandle) {
				printConsole("[Error] Could not open directory: ".$this->fulldir.$rfile);
			}
			
			while (false !== ($file = readdir($curhandle))) {
			
				$filename = $this->fulldir.$rfile.'/'.$file;
	
				if (is_file($filename) && isMaildirFile($file)) {
					print("\rFilecount: ".(++$this->mdirfiles)."\tTotal size written to mbox file: ".$this->totalsize);
					$mdfile = new MailDirFile($filename);
					$mdfile->readHeader();
	
					if (!$mdfile->foundAddress()) {
						printConsole("\nHeader not found for : ".$file.", file skipped");
						$this->bogusfiles++;
						unset($mdfile);
						$mdfile = null;
						continue;
					}
	
					if (intOverflowed($this->totalsize + $mdfile->getTotalSize())) {
						$this->closeFile();
						$this->mboxfilecnt++;
						$this->chunks++;
						$this->totalsize = 0;
						print("\n\t\t\t\t\r");
						$this->openFile();
					}
	
					++$this->parsedfiles;
					$this->totalsize += $mdfile->getTotalSize();
					fwrite($this->mboxhnd,$mdfile->getMboxHeader()."\n".$mdfile->getHeader());
					$mdfile->streamWriteContents($this->mboxhnd);
					fwrite($this->mboxhnd, "\n");
		
					unset($mdfile);
					$mdfile = null;
				}
			}
			
			closedir($curhandle);
			unset($curhandle);
			$curhandle = null;			
		}
		$this->closeFile();
	}

	public function getFileCount() {
		return $this->mdirfiles;
	}

	public function printSummary() {
		printConsole("\n".'------------------------ SUMMARY ------------------------');
		printConsole("Wrote ".$this->mboxfilecnt." mbox files for this directory");
		printConsole("Converted ".$this->parsedfiles.'/'.$this->mdirfiles." files successfully");
		printConsole("Found ".$this->bogusfiles." files without a proper header and skipped them for this directory");
		printConsole('---------------------------------------------------------');

		self::$total_converted += $this->parsedfiles;
		self::$total_failed += $this->bogusfiles;
	}

	public function getTotalConverted() {
		return self::$total_converted;
	}

	public function getTotalFailed() {
		return self::$total_failed;
	}
	
	public function getDirName() {
		return $this->dirname;
	}

	public function __destruct() {
		if (is_resource($this->dirhandle))
		closedir($this->dirhandle);
		unset($this->dirhandle);
		$this->dirhandle = null;
	}
}

class MailDirFile {
	private $filename = '';
	private $filesize = '';
	private $handler = null;
	private $mboxheader = 'From ';
	private $from = '';
	private $dt = '';
	private $header = '';
	private $contents='';
	private $foundaddr = false;
	private $fullmsg = '';
	private static $search_array = array(
	"From:"=>0, "X-From-Line:"=>0, "Reply-To:"=>0, "from:"=>0
	);
	private $search_array_filled = array(
	"From:"=>'', "X-From-Line:"=>'', "Reply-To:"=>'', "from:"=>''
	);

	public function __construct($fname) {
		$this->filename = $fname;
		$this->dt = strtotime("Thu, 01 Jan 1970 00:00:00 +0000 (GMT)");
		if (!$this->handler = fopen($fname, "r")) {
			printConsole("Error occured while opening file: ".$fname, true);
		}
		$this->getFileSize();
	}

	public static function getSearchArray() {
		return self::$search_array;
	}

	private function cleanAddress($addr) {
		$addr = trim($addr);

		if ($addr{0} == '<')
		return substr($addr, 1,strlen($addr)-2);

		return $addr;
	}

	public function readContents() {
		while(!feof($this->handler)) {
			$this->contents .= fread($this->handler, 8192);
		}
	}

	public function streamWriteContents($fp) {
		while(!feof($this->handler)) {
			fwrite($fp, fread($this->handler, 8192));
		}
	}

	public function readHeader() {
		while ( ($line = fgets($this->handler)) && $line{0} != "\n") {
			$foundfrom = false;
			foreach (self::$search_array as $search=>&$count) {
				if ($this->startsWith($search, $line)) {
					$foundfrom = true;
					$count++;
					$this->foundaddr = true;
					$linfo = explode(' ',$line);

					foreach ($linfo as $item) {
						if (strpos($item,'@') !== false) {
							$this->search_array_filled[$search] = $this->cleanAddress($item);
							break;
						}
					}
					break;
				}
			}

			if (!$foundfrom && $this->startsWith("Date:", $line)) {
				$this->dt = gmdate("D M d H:i:s Y",strtotime(substr($line,6)));
			}
			$this->header .= $line;
		}
		$this->header .= $line;

		$this->from = $this->search_array_filled['From:'];

		if (empty($this->from)) {
			$this->from = $this->search_array_filled['from:'];
		}

		if (empty($this->from)) {
			$this->from = $this->search_array_filled['Reply-To:'];
		}

		if (empty($this->from)) {
			$this->from = $this->search_array_filled['X-From-Line:'];
		}

		$this->mboxheader .= $this->from.' '.$this->dt;
	}

	public function foundAddress() {
		return $this->foundaddr;
	}

	public function printContents() {
		print($this->contents);
	}

	public function printHeader() {
		print($this->header);
	}

	public function getMboxHeader() {
		return $this->mboxheader;
	}

	public function getHeader() {
		return $this->header;
	}

	public function getContents() {
		return $this->contents;
	}

	public function getMboxMessage() {
		$this->fullmsg = $this->mboxheader."\n".$this->header.$this->contents."\n";
		return $this->fullmsg;
	}

	public function getTotalSize() {
		return ($this->filesize += (strlen($this->mboxheader)+2));
	}

	private function getFileSize() {
		fseek($this->handler,0,SEEK_END);
		$this->filesize = ftell($this->handler);
		fseek($this->handler,0,SEEK_SET);
	}

	private function startsWith($str, $line) {
		return $str == substr($line,0,strlen($str));
	}

	public function getSize() {
		return $this->filesize;
	}

	public function __destruct() {
		if (is_resource($this->handler)) {
			fclose($this->handler);
		}
		unset($this->handler);
		$this->handler = null;
	}
}

/********************** MAIN CODE **********************/
define('DIR_SEPERATOR','/');
define('OUTPUT_DIR','mboxfiles');
define('FILE_FORMAT',"/^[0-9]*\.[a-zA-Z0-9_]*\.[a-zA-Z0-9\:\,]*/");

$mbox_extension = false;
$remove_empty = false;

$program_name = $argv[0];

foreach ($argv as $k=>$argument) {
	switch (trim($argument)) {
		case '-e':  {
			$mbox_extension = true;
			unset($argv[$k]);
			$argc--;
			break;
		}
		case '-x':  {
			$remove_empty = true;
			unset($argv[$k]);
			$argc--;
			break;
		}
		case '-h': {
			print_usage();
			unset($argv[$k]);
			$argc--;
			break;
		}
		default: {
			if ($argument{0} == '-') {
				printConsole("Unknown Option: $argument");
				print_usage(true);
			}
		}
	}
}

// Re-index argv
$argv = array_values($argv);

if ($argc != 2) {
	printConsole("No output directory specified");
	print_usage();
}

$maildir = $argv[1];

// Check if the last character of the given directory is the dir seperator, if not add it.
if ($maildir{strlen($maildir)-1} != DIR_SEPERATOR) {
	$maildir .= '/';
}

// If the given name is a file instead of a directory kill the script
if (!is_dir($maildir)) {
	printConsole("$maildir is not a directory", true);
}

// Variable initialization
$memory_limit = '32M';
$oldlimit = '';

printConsole('Setting memory limit...');

if ( ($oldlimit = setMemoryLimit($memory_limit)) === false ) {
	printConsole("Error setting memory limit to $memory_limit.", true);
} else {
	printConsole("New memory limit: $memory_limit\nOld memory limit: $oldlimit");
}

$rootnode = new DirectoryObj(substr($maildir,0,strpos($maildir, '/')), $maildir, OUTPUT_DIR);

parseTree($rootnode, $rootnode->getFullPath());

makeTree($rootnode);

printConsole("Finished processing all the directories.\nOutput files are located in directory: ".OUTPUT_DIR."/");
printConsole("Total files converted: ".DirParser::getTotalConverted());
printConsole("Total files failed: ".DirParser::getTotalFailed());
?>