100 chars) are supported in POSIX ustar and GNU longlink formats. * * @author Andreas Gohr * @package splitbrain\PHPArchive * @license MIT */ class Tar extends Archive { protected $file = ''; protected $comptype = Archive::COMPRESS_AUTO; protected $complevel = 9; protected $fh; protected $memory = ''; protected $closed = true; protected $writeaccess = false; /** * Sets the compression to use * * @param int $level Compression level (0 to 9) * @param int $type Type of compression to use (use COMPRESS_* constants) * @return mixed */ public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) { $this->compressioncheck($type); $this->comptype = $type; $this->complevel = $level; } /** * Open an existing TAR file for reading * * @param string $file * @throws ArchiveIOException */ public function open($file) { $this->file = $file; // update compression to mach file if ($this->comptype == Tar::COMPRESS_AUTO) { $this->setCompression($this->complevel, $this->filetype($file)); } // open file handles if ($this->comptype === Archive::COMPRESS_GZIP) { $this->fh = @gzopen($this->file, 'rb'); } elseif ($this->comptype === Archive::COMPRESS_BZIP) { $this->fh = @bzopen($this->file, 'r'); } else { $this->fh = @fopen($this->file, 'rb'); } if (!$this->fh) { throw new ArchiveIOException('Could not open file for reading: '.$this->file); } $this->closed = false; } /** * Read the contents of a TAR archive * * This function lists the files stored in the archive * * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. * Reopen the file with open() again if you want to do additional operations * * @throws ArchiveIOException * @returns FileInfo[] */ public function contents() { if ($this->closed || !$this->file) { throw new ArchiveIOException('Can not read from a closed archive'); } $result = array(); while ($read = $this->readbytes(512)) { $header = $this->parseHeader($read); if (!is_array($header)) { continue; } $this->skipbytes(ceil($header['size'] / 512) * 512); $result[] = $this->header2fileinfo($header); } $this->close(); return $result; } /** * Extract an existing TAR archive * * The $strip parameter allows you to strip a certain number of path components from the filenames * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when * an integer is passed as $strip. * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. * * By default this will extract all files found in the archive. You can restrict the output using the $include * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If * $include is set only files that match this expression will be extracted. Files that match the $exclude * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against * stripped filenames as described above. * * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. * Reopen the file with open() again if you want to do additional operations * * @param string $outdir the target directory for extracting * @param int|string $strip either the number of path components or a fixed prefix to strip * @param string $exclude a regular expression of files to exclude * @param string $include a regular expression of files to include * @throws ArchiveIOException * @return FileInfo[] */ public function extract($outdir, $strip = '', $exclude = '', $include = '') { if ($this->closed || !$this->file) { throw new ArchiveIOException('Can not read from a closed archive'); } $outdir = rtrim($outdir, '/'); @mkdir($outdir, 0777, true); if (!is_dir($outdir)) { throw new ArchiveIOException("Could not create directory '$outdir'"); } $extracted = array(); while ($dat = $this->readbytes(512)) { // read the file header $header = $this->parseHeader($dat); if (!is_array($header)) { continue; } $fileinfo = $this->header2fileinfo($header); // apply strip rules $fileinfo->strip($strip); // skip unwanted files if (!strlen($fileinfo->getPath()) || !$fileinfo->match($include, $exclude)) { $this->skipbytes(ceil($header['size'] / 512) * 512); continue; } // create output directory $output = $outdir.'/'.$fileinfo->getPath(); $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); @mkdir($directory, 0777, true); // extract data if (!$fileinfo->getIsdir()) { $fp = fopen($output, "wb"); if (!$fp) { throw new ArchiveIOException('Could not open file for writing: '.$output); } $size = floor($header['size'] / 512); for ($i = 0; $i < $size; $i++) { fwrite($fp, $this->readbytes(512), 512); } if (($header['size'] % 512) != 0) { fwrite($fp, $this->readbytes(512), $header['size'] % 512); } fclose($fp); touch($output, $fileinfo->getMtime()); chmod($output, $fileinfo->getMode()); } else { $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories } $extracted[] = $fileinfo; } $this->close(); return $extracted; } /** * Create a new TAR file * * If $file is empty, the tar file will be created in memory * * @param string $file * @throws ArchiveIOException */ public function create($file = '') { $this->file = $file; $this->memory = ''; $this->fh = 0; if ($this->file) { // determine compression if ($this->comptype == Archive::COMPRESS_AUTO) { $this->setCompression($this->complevel, $this->filetype($file)); } if ($this->comptype === Archive::COMPRESS_GZIP) { $this->fh = @gzopen($this->file, 'wb'.$this->complevel); } elseif ($this->comptype === Archive::COMPRESS_BZIP) { $this->fh = @bzopen($this->file, 'w'); } else { $this->fh = @fopen($this->file, 'wb'); } if (!$this->fh) { throw new ArchiveIOException('Could not open file for writing: '.$this->file); } } $this->writeaccess = true; $this->closed = false; } /** * Add a file to the current TAR archive using an existing file in the filesystem * * @param string $file path to the original file * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original * @throws ArchiveIOException */ public function addFile($file, $fileinfo = '') { if (is_string($fileinfo)) { $fileinfo = FileInfo::fromPath($file, $fileinfo); } if ($this->closed) { throw new ArchiveIOException('Archive has been closed, files can no longer be added'); } $fp = fopen($file, 'rb'); if (!$fp) { throw new ArchiveIOException('Could not open file for reading: '.$file); } // create file header $this->writeFileHeader($fileinfo); // write data while (!feof($fp)) { $data = fread($fp, 512); if ($data === false) { break; } if ($data === '') { break; } $packed = pack("a512", $data); $this->writebytes($packed); } fclose($fp); } /** * Add a file to the current TAR archive using the given $data as content * * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data * @param string $data binary content of the file to add * @throws ArchiveIOException */ public function addData($fileinfo, $data) { if (is_string($fileinfo)) { $fileinfo = new FileInfo($fileinfo); } if ($this->closed) { throw new ArchiveIOException('Archive has been closed, files can no longer be added'); } $len = strlen($data); $fileinfo->setSize($len); $this->writeFileHeader($fileinfo); for ($s = 0; $s < $len; $s += 512) { $this->writebytes(pack("a512", substr($data, $s, 512))); } } /** * Add the closing footer to the archive if in write mode, close all file handles * * After a call to this function no more data can be added to the archive, for * read access no reading is allowed anymore * * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which * consists of two 512 blocks of zero bytes" * * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 */ public function close() { if ($this->closed) { return; } // we did this already // write footer if ($this->writeaccess) { $this->writebytes(pack("a512", "")); $this->writebytes(pack("a512", "")); } // close file handles if ($this->file) { if ($this->comptype === Archive::COMPRESS_GZIP) { gzclose($this->fh); } elseif ($this->comptype === Archive::COMPRESS_BZIP) { bzclose($this->fh); } else { fclose($this->fh); } $this->file = ''; $this->fh = 0; } $this->writeaccess = false; $this->closed = true; } /** * Returns the created in-memory archive data * * This implicitly calls close() on the Archive */ public function getArchive() { $this->close(); if ($this->comptype === Archive::COMPRESS_AUTO) { $this->comptype = Archive::COMPRESS_NONE; } if ($this->comptype === Archive::COMPRESS_GZIP) { return gzcompress($this->memory, $this->complevel); } if ($this->comptype === Archive::COMPRESS_BZIP) { return bzcompress($this->memory); } return $this->memory; } /** * Save the created in-memory archive data * * Note: It more memory effective to specify the filename in the create() function and * let the library work on the new file directly. * * @param string $file * @throws ArchiveIOException */ public function save($file) { if ($this->comptype === Archive::COMPRESS_AUTO) { $this->setCompression($this->filetype($this->complevel, $file)); } if (!file_put_contents($file, $this->getArchive())) { throw new ArchiveIOException('Could not write to file: '.$file); } } /** * Read from the open file pointer * * @param int $length bytes to read * @return string */ protected function readbytes($length) { if ($this->comptype === Archive::COMPRESS_GZIP) { return @gzread($this->fh, $length); } elseif ($this->comptype === Archive::COMPRESS_BZIP) { return @bzread($this->fh, $length); } else { return @fread($this->fh, $length); } } /** * Write to the open filepointer or memory * * @param string $data * @throws ArchiveIOException * @return int number of bytes written */ protected function writebytes($data) { if (!$this->file) { $this->memory .= $data; $written = strlen($data); } elseif ($this->comptype === Archive::COMPRESS_GZIP) { $written = @gzwrite($this->fh, $data); } elseif ($this->comptype === Archive::COMPRESS_BZIP) { $written = @bzwrite($this->fh, $data); } else { $written = @fwrite($this->fh, $data); } if ($written === false) { throw new ArchiveIOException('Failed to write to archive stream'); } return $written; } /** * Skip forward in the open file pointer * * This is basically a wrapper around seek() (and a workaround for bzip2) * * @param int $bytes seek to this position */ function skipbytes($bytes) { if ($this->comptype === Archive::COMPRESS_GZIP) { @gzseek($this->fh, $bytes, SEEK_CUR); } elseif ($this->comptype === Archive::COMPRESS_BZIP) { // there is no seek in bzip2, we simply read on @bzread($this->fh, $bytes); } else { @fseek($this->fh, $bytes, SEEK_CUR); } } /** * Write the given file metat data as header * * @param FileInfo $fileinfo */ protected function writeFileHeader(FileInfo $fileinfo) { $this->writeRawFileHeader( $fileinfo->getPath(), $fileinfo->getUid(), $fileinfo->getGid(), $fileinfo->getMode(), $fileinfo->getSize(), $fileinfo->getMtime(), $fileinfo->getIsdir() ? '5' : '0' ); } /** * Write a file header to the stream * * @param string $name * @param int $uid * @param int $gid * @param int $perm * @param int $size * @param int $mtime * @param string $typeflag Set to '5' for directories */ protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') { // handle filename length restrictions $prefix = ''; $namelen = strlen($name); if ($namelen > 100) { $file = basename($name); $dir = dirname($name); if (strlen($file) > 100 || strlen($dir) > 155) { // we're still too large, let's use GNU longlink $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); for ($s = 0; $s < $namelen; $s += 512) { $this->writebytes(pack("a512", substr($name, $s, 512))); } $name = substr($name, 0, 100); // cut off name } else { // we're fine when splitting, use POSIX ustar $prefix = $dir; $name = $file; } } // values are needed in octal $uid = sprintf("%6s ", decoct($uid)); $gid = sprintf("%6s ", decoct($gid)); $perm = sprintf("%6s ", decoct($perm)); $size = sprintf("%11s ", decoct($size)); $mtime = sprintf("%11s", decoct($mtime)); $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); for ($i = 0, $chks = 0; $i < 148; $i++) { $chks += ord($data_first[$i]); } for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { $chks += ord($data_last[$j]); } $this->writebytes($data_first); $chks = pack("a8", sprintf("%6s ", decoct($chks))); $this->writebytes($chks.$data_last); } /** * Decode the given tar file header * * @param string $block a 512 byte block containign the header data * @return array|bool */ protected function parseHeader($block) { if (!$block || strlen($block) != 512) { return false; } for ($i = 0, $chks = 0; $i < 148; $i++) { $chks += ord($block[$i]); } for ($i = 156, $chks += 256; $i < 512; $i++) { $chks += ord($block[$i]); } $header = @unpack( "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", $block ); if (!$header) { return false; } $return['checksum'] = OctDec(trim($header['checksum'])); if ($return['checksum'] != $chks) { return false; } $return['filename'] = trim($header['filename']); $return['perm'] = OctDec(trim($header['perm'])); $return['uid'] = OctDec(trim($header['uid'])); $return['gid'] = OctDec(trim($header['gid'])); $return['size'] = OctDec(trim($header['size'])); $return['mtime'] = OctDec(trim($header['mtime'])); $return['typeflag'] = $header['typeflag']; $return['link'] = trim($header['link']); $return['uname'] = trim($header['uname']); $return['gname'] = trim($header['gname']); // Handle ustar Posix compliant path prefixes if (trim($header['prefix'])) { $return['filename'] = trim($header['prefix']).'/'.$return['filename']; } // Handle Long-Link entries from GNU Tar if ($return['typeflag'] == 'L') { // following data block(s) is the filename $filename = trim($this->readbytes(ceil($header['size'] / 512) * 512)); // next block is the real header $block = $this->readbytes(512); $return = $this->parseHeader($block); // overwrite the filename $return['filename'] = $filename; } return $return; } /** * Creates a FileInfo object from the given parsed header * * @param $header * @return FileInfo */ protected function header2fileinfo($header) { $fileinfo = new FileInfo(); $fileinfo->setPath($header['filename']); $fileinfo->setMode($header['perm']); $fileinfo->setUid($header['uid']); $fileinfo->setGid($header['gid']); $fileinfo->setSize($header['size']); $fileinfo->setMtime($header['mtime']); $fileinfo->setOwner($header['uname']); $fileinfo->setGroup($header['gname']); $fileinfo->setIsdir((bool) $header['typeflag']); return $fileinfo; } /** * Checks if the given compression type is available and throws an exception if not * * @param $comptype * @throws ArchiveIllegalCompressionException */ protected function compressioncheck($comptype) { if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { throw new ArchiveIllegalCompressionException('No gzip support available'); } if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { throw new ArchiveIllegalCompressionException('No bzip2 support available'); } } /** * Guesses the wanted compression from the given filename extension * * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere * * @param string $file * @return int */ public function filetype($file) { $file = strtolower($file); if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { $comptype = Archive::COMPRESS_GZIP; } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { $comptype = Archive::COMPRESS_BZIP; } else { $comptype = Archive::COMPRESS_NONE; } return $comptype; } }