diff options
author | Andreas Gohr <andi@splitbrain.org> | 2012-11-03 17:54:02 +0100 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2012-11-03 17:54:02 +0100 |
commit | bee9f377bc547c99fe99b4e38199cb92cf668554 (patch) | |
tree | c2017db6147beb791b42d2d19b47ab921cdccd80 /inc/Tar.class.php | |
parent | 2005b6b650f2523cb58a005961a55a6f099c70c3 (diff) | |
download | rpg-bee9f377bc547c99fe99b4e38199cb92cf668554.tar.gz rpg-bee9f377bc547c99fe99b4e38199cb92cf668554.tar.bz2 |
Completely rewritten Tar library
This new class is only losely based on our previous library. The
whole API was changed to make it more flexible and memory saving.
Some fisrt unit tests are included
Diffstat (limited to 'inc/Tar.class.php')
-rw-r--r-- | inc/Tar.class.php | 614 |
1 files changed, 614 insertions, 0 deletions
diff --git a/inc/Tar.class.php b/inc/Tar.class.php new file mode 100644 index 000000000..7f5e5af4a --- /dev/null +++ b/inc/Tar.class.php @@ -0,0 +1,614 @@ +<?php +/** + * This class allows the extraction of existing and the creation of new Unix TAR archives. + * To keep things simple, the modification of existing archives is not supported. It handles + * uncompressed, gzip and bzip2 compressed tar files. + * + * To list the contents of an existing TAR archive, open() it and use contents() on it: + * + * $tar = new Tar(); + * $tar->open('myfile.tgz'); + * $toc = $tar->contents(); + * print_r($toc); + * + * To extract the contents of an existing TAR archive, open() it and use extract() on it: + * + * $tar = new Tar(); + * $tar->open('myfile.tgz'); + * $tar->extract(/tmp); + * + * To create a new TAR archive directly on the filesystem (low memory requirements), create() it, + * add*() files and close() it: + * + * $tar = new Tar(); + * $tar->create('myfile.tgz'); + * $tar->addFile(...); + * $tar->addData(...); + * ... + * $tar->close(); + * + * To create a TAR archive directly in memory, create() it, add*() files and then either save() + * or getData() it: + * + * $tar = new Tar(); + * $tar->create(); + * $tar->addFile(...); + * $tar->addData(...); + * ... + * $tar->save('myfile.tgz'); // compresses and saves it + * echo $tar->getArchive(Tar::COMPRESS_GZIP); // compresses and returns it + * + * @author Andreas Gohr <andi@splitbrain.org> + * @author Bouchon <tarlib@bouchon.org> (Maxg) + * @license GPL 2 + */ +class Tar { + + const COMPRESS_AUTO = 0; + const COMPRESS_NONE = 1; + const COMPRESS_GZIP = 2; + const COMPRESS_BZIP = 3; + + protected $file = ''; + protected $comptype = Tar::COMPRESS_AUTO; + protected $fh; + protected $memory = ''; + protected $closed = true; + protected $writeaccess = false; + + /** + * Open an existing TAR file for reading + * + * @param string $file + * @param int $comptype + * @throws TarIOException + */ + public function open($file, $comptype = Tar::COMPRESS_AUTO) { + // determine compression + if($comptype == Tar::COMPRESS_AUTO) $comptype = $this->filetype($file); + $this->compressioncheck($comptype); + + $this->comptype = $comptype; + $this->file = $file; + + if($this->comptype === Tar::COMPRESS_GZIP) { + $this->fh = @gzopen($this->file, 'rb'); + } elseif($this->comptype === Tar::COMPRESS_BZIP) { + $this->fh = @bzopen($this->file, 'r'); + } else { + $this->fh = @fopen($this->file, 'rb'); + } + + if(!$this->fh) throw(new TarIOException('Could not open file for reading: '.$this->file)); + $this->closed = false; + } + + /** + * Read the contents of a TAR archive + * + * This function lists the files stored in the archive, and returns an indexed array of associative + * arrays containing for each file the following information: + * + * checksum Tar Checksum of the file + * filename The full name of the stored file (up to 100 c.) + * mode UNIX permissions in DECIMAL, not octal + * uid The Owner ID + * gid The Group ID + * size Uncompressed filesize + * mtime Timestamp of last modification + * typeflag Empty for files, set for folders + * link Is it a symlink? + * uname Owner name + * gname Group name + * + * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. + * Reopen the file with open() again if you want to do additional operations + */ + public function contents() { + if($this->closed || !$this->file) throw(new TarIOException('Can not read from a closed archive')); + + $result = Array(); + while($read = $this->readbytes(512)) { + $header = $this->parseHeader($read); + if(!is_array($header)) continue; + + $this->skipbytes(ceil($header['size'] / 512) * 512, 1); + $result[] = $header; + } + + $this->close(); + return $result; + } + + /** + * Extract an existing TAR archive + * + * The $strip parameter allows you to strip a certain number of path components from the filenames + * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when + * an integer is passed as $strip. + * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, + * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. + * + * By default this will extract all files found in the archive. You can restrict the output using the $include + * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If + * $include is set only files that match this expression will be extracted. Files that match the $exclude + * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against + * stripped filenames as described above. + * + * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. + * Reopen the file with open() again if you want to do additional operations + * + * @param string $outdir the target directory for extracting + * @param int|string $strip either the number of path components or a fixed prefix to strip + * @param string $exclude a regular expression of files to exclude + * @param string $include a regular expression of files to include + * @throws TarIOException + * @return array + */ + function extract($outdir, $strip='', $exclude='', $include='') { + if($this->closed || !$this->file) throw(new TarIOException('Can not read from a closed archive')); + + $outdir = rtrim($outdir,'/'); + io_mkdir_p($outdir); + $striplen = strlen($strip); + + $extracted = array(); + + while($dat = $this->readbytes(512)) { + // read the file header + $header = $this->parseHeader($dat); + if(!is_array($header)) continue; + if(!$header['filename']) continue; + + // strip prefix + $filename = $this->cleanPath($header['filename']); + if(is_int($strip)) { + // if $strip is an integer we strip this many path components + $parts = explode('/',$filename); + if(!$header['typeflag']){ + $base = array_pop($parts); // keep filename itself + }else{ + $base = ''; + } + $filename = join('/',array_slice($parts,$strip)); + if($base) $filename .= "/$base"; + }else{ + // ifstrip is a string, we strip a prefix here + if(substr($filename,0,$striplen) == $strip) $filename = substr($filename,$striplen); + } + + // check if this should be extracted + $extract = true; + if(!$filename){ + $extract = false; + }else{ + if($include){ + if(preg_match($include, $filename)){ + $extract = true; + }else{ + $extract = false; + } + } + if($exclude && preg_match($exclude, $filename)){ + $extract = false; + } + } + + // Now do the extraction (or not) + if($extract) { + $extracted[] = $header; + + $output = "$outdir/$filename"; + $directory = ($header['typeflag']) ? $output : dirname($output); + io_mkdir_p($directory); + + // is this a file? + if(!$header['typeflag']){ + $fp = @fopen($output, "wb"); + if(!$fp) throw(new TarIOException('Could not open file for writing: '.$output)); + + $size = floor($header['size'] / 512); + for($i = 0; $i < $size; $i++) { + fwrite($fp, $this->readbytes(512), 512); + } + if(($header['size'] % 512) != 0) fwrite($fp, $this->readbytes(512), $header['size'] % 512); + + fclose($fp); + touch($output, $header['mtime']); + chmod($output, $header['perm']); + }else{ + $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories + } + }else{ + $this->skipbytes(ceil($header['size'] / 512) * 512); + } + } + + $this->close(); + return $extracted; + } + + /** + * Create a new TAR file + * + * If $file is empty, the tar file will be created in memory + * + * @param string $file + * @param int $comptype + * @param int $complevel + * @throws TarIOException + * @throws TarIllegalCompressionException + */ + public function create($file = '', $comptype = Tar::COMPRESS_AUTO, $complevel = 9) { + // determine compression + if($comptype == Tar::COMPRESS_AUTO) $comptype = $this->filetype($file); + $this->compressioncheck($comptype); + + $this->comptype = $comptype; + $this->file = $file; + $this->memory = ''; + $this->fh = 0; + + if($this->file) { + if($this->comptype === Tar::COMPRESS_GZIP) { + $this->fh = @gzopen($this->file, 'wb'.$complevel); + } elseif($this->comptype === Tar::COMPRESS_BZIP) { + $this->fh = @bzopen($this->file, 'w'); + } else { + $this->fh = @fopen($this->file, 'wb'); + } + + if(!$this->fh) throw(new TarIOException('Could not open file for writing: '.$this->file)); + } + $this->writeaccess = false; + $this->closed = false; + } + + /** + * Add a file to the current TAR archive using an existing file in the filesystem + * + * @todo handle directory adding + * @param string $file the original file + * @param string $name the name to use for the file in the archive + * @throws TarBadFilename + * @throws TarIOException + */ + public function addFile($file, $name = '') { + if($this->closed) throw(new TarIOException('Archive has been closed, files can no longer be added')); + + if(!$name) $name = $file; + $name = $this->cleanPath($name); + + // FIXME ustar should support up 256 chars + if(strlen($name) > 99) throw(new TarBadFilename('Filenames may not exceed 99 bytes: '.$name)); + + $fp = fopen($file, 'rb'); + if(!$fp) throw(new TarIOException('Could not open file for reading: '.$file)); + + // create file header and copy all stat info from the original file + clearstatcache(false, $file); + $stat = stat($file); + $this->writeFileHeader( + $name, + $stat[4], + $stat[5], + fileperms($file), + filesize($file), + filemtime($file), + false + ); + + while(!feof($fp)) { + $packed = pack("a512", fread($fp, 512)); + $this->writebytes($packed); + } + fclose($fp); + } + + /** + * Add a file to the current TAR archive using in memory data + * + * @param $name + * @param $data + * @param int $uid + * @param int $gid + * @param int $perm + * @param int $mtime + * @throws TarIOException + * @throws TarBadFilename + */ + public function addData($name, $data, $uid = 0, $gid = 0, $perm = 0666, $mtime = 0) { + if($this->closed) throw(new TarIOException('Archive has been closed, files can no longer be added')); + + $name = $this->cleanPath($name); + + // FIXME ustar should support up 256 chars + if(strlen($name) > 99) throw(new TarBadFilename('Filenames may not exceed 99 bytes: '.$name)); + + $len = strlen($data); + + $this->writeFileHeader( + $name, + $uid, + $gid, + $perm, + $len, + ($mtime) ? $mtime : time(), + false + ); + + for($s = 0; $s < $len; $s += 512) { + $this->writebytes(pack("a512", substr($data, $s, 512))); + } + } + + /** + * Add the closing footer to the archive if in write mode, close all file handles + * + * After a call to this function no more data can be added to the archive, for + * read access no reading is allowed anymore + * + * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which + * consists of two 512 blocks of zero bytes" + * + * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 + */ + public function close() { + if($this->closed) return; // we did this already + + // write footer + if($this->writeaccess){ + $this->writebytes(pack("a512", "")); + $this->writebytes(pack("a512", "")); + } + + // close file handles + if($this->file){ + if($this->comptype === Tar::COMPRESS_GZIP){ + gzclose($this->fh); + }elseif($this->comptype === Tar::COMPRESS_BZIP){ + bzclose($this->fh); + }else{ + fclose($this->fh); + } + + $this->file = ''; + $this->fh = 0; + } + + $this->closed = true; + } + + /** + * Returns the created in-memory archive data + * + * This implicitly calls close() on the Archive + */ + public function getArchive($comptype = Tar::COMPRESS_AUTO, $complevel = 9) { + $this->close(); + + if($comptype === Tar::COMPRESS_AUTO) $comptype = $this->comptype; + $this->compressioncheck($comptype); + + if($comptype === Tar::COMPRESS_GZIP) return gzcompress($this->memory, $complevel); + if($comptype === Tar::COMPRESS_BZIP) return bzcompress($this->memory); + return $this->memory; + } + + /** + * Save the created in-memory archive data + * + * Note: It more memory effective to specify the filename in the create() function and + * let the library work on the new file directly. + * + * @param $file + * @param int $comptype + * @param int $complevel + * @throws TarIOException + */ + public function save($file, $comptype = Tar::COMPRESS_AUTO, $complevel = 9) { + if($comptype === Tar::COMPRESS_AUTO) $comptype = $this->filetype($file); + + if(!file_put_contents($file, $this->getArchive($comptype, $complevel))) { + throw(new TarIOException('Could not write to file: '.$file)); + } + } + + /** + * Read from the open file pointer + * + * @param int $length bytes to read + * @return string + */ + protected function readbytes($length) { + if($this->comptype === Tar::COMPRESS_GZIP) { + return @gzread($this->fh, $length); + } elseif($this->comptype === Tar::COMPRESS_BZIP) { + return @bzread($this->fh, $length); + } else { + return @fread($this->fh, $length); + } + } + + /** + * Write to the open filepointer or memory + * + * @param string $data + * @throws TarIOException + * @return int number of bytes written + */ + protected function writebytes($data) { + if(!$this->file) { + $this->memory .= $data; + $written = strlen($data); + } elseif($this->comptype === Tar::COMPRESS_GZIP) { + $written = @gzwrite($this->fh, $data); + } elseif($this->comptype === Tar::COMPRESS_BZIP) { + $written = @bzwrite($this->fh, $data); + } else { + $written = @fwrite($this->fh, $data); + } + if($written === false) throw(new TarIOException('Failed to write to archive stream')); + return $written; + } + + /** + * Skip forward in the open file pointer + * + * This is basically a wrapper around seek() (and a workarounf for bzip2) + * + * @param int $bytes seek to this position + */ + function skipbytes($bytes) { + if($this->comptype === Tar::COMPRESS_GZIP){ + @gzseek($this->fh, $bytes, SEEK_CUR); + }elseif($this->comptype === Tar::COMPRESS_BZIP){ + // there is no seek in bzip2, we simply read on + @bzread($this->fh, $bytes); + }else{ + @fseek($this->fh, $bytes, SEEK_CUR); + } + } + + /** + * Write a file header + * + * @param string $name + * @param int $uid + * @param int $gid + * @param int $perm + * @param int $size + * @param int $mtime + * @param bool $isdir + */ + protected function writeFileHeader($name, $uid, $gid, $perm, $size, $mtime, $isdir = false) { + // values are needed in octal + $uid = sprintf("%6s ", DecOct($uid)); + $gid = sprintf("%6s ", DecOct($gid)); + $perm = sprintf("%6s ", DecOct($perm)); + $size = sprintf("%11s ", DecOct($size)); + $mtime = sprintf("%11s", DecOct($mtime)); + $dir = ($isdir) ? '5' : ''; + + $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); + $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $dir, '', '', '', '', '', '', '', '', ""); + + for($i = 0, $chks = 0; $i < 148; $i++) + $chks += ord($data_first[$i]); + + for($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) + $chks += ord($data_last[$j]); + + $this->writebytes($data_first); + + $chks = pack("a8", sprintf("%6s ", DecOct($chks))); + $this->writebytes($chks.$data_last); + } + + /** + * Decode the given tar file header + * + * @todo how to handle filenames >100 chars? + * @param string $block a 512 byte block containign the header data + * @return array|bool + */ + protected function parseHeader($block) { + if(!$block || strlen($block) != 512) return false; + + for($i = 0, $chks = 0; $i < 148; $i++) + $chks += ord($block[$i]); + + for($i = 156, $chks += 256; $i < 512; $i++) + $chks += ord($block[$i]); + + $headers = @unpack("a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor", $block); + if(!$headers) return false; + + $return['checksum'] = OctDec(trim($headers['checksum'])); + if($return['checksum'] != $chks) return false; + + $return['filename'] = trim($headers['filename']); + $return['perm'] = OctDec(trim($headers['perm'])); + $return['uid'] = OctDec(trim($headers['uid'])); + $return['gid'] = OctDec(trim($headers['gid'])); + $return['size'] = OctDec(trim($headers['size'])); + $return['mtime'] = OctDec(trim($headers['mtime'])); + $return['typeflag'] = $headers['typeflag']; + $return['link'] = trim($headers['link']); + $return['uname'] = trim($headers['uname']); + $return['gname'] = trim($headers['gname']); + + return $return; + } + + /** + * Cleans up a path and removes relative parts + * + * @param string $p_dir + * @return string + */ + protected function cleanPath($p_dir) { + $r = ''; + if($p_dir) { + $subf = explode("/", $p_dir); + + for($i = count($subf) - 1; $i >= 0; $i--) { + if($subf[$i] == ".") { + # do nothing + } elseif($subf[$i] == "..") { + $i--; + } elseif(!$subf[$i] && $i != count($subf) - 1 && $i) { + # do nothing + } else { + $r = $subf[$i].($i != (count($subf) - 1) ? "/".$r : ""); + } + } + } + return $r; + } + + /** + * Checks if the given compression type is available and throws an exception if not + * + * @param $comptype + * @throws TarIllegalCompressionException + */ + protected function compressioncheck($comptype) { + if($comptype === Tar::COMPRESS_GZIP && !function_exists('gzopen')) { + throw(new TarIllegalCompressionException('No gzip support available')); + } + + if($comptype === Tar::COMPRESS_BZIP && !function_exists('bzopen')) { + throw(new TarIllegalCompressionException('No bzip2 support available')); + } + } + + /** + * Guesses the wanted compression from the given filename extension + * + * You don't need to call this yourself. It's used when you pass Tar::COMPRESS_AUTO somewhere + * + * @param string $file + * @return int + */ + public function filetype($file) { + $file = strtolower($file); + if(substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { + $comptype = Tar::COMPRESS_GZIP; + } elseif(substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { + $comptype = Tar::COMPRESS_BZIP; + } else { + $comptype = Tar::COMPRESS_NONE; + } + return $comptype; + } +} + +class TarBadFilename extends Exception { +} + +class TarIOException extends Exception { +} + +class TarIllegalCompressionException extends Exception { +}
\ No newline at end of file |