diff options
author | Tom N Harris <tnharris@whoopdedo.org> | 2007-10-12 02:03:27 +0200 |
---|---|---|
committer | Tom N Harris <tnharris@whoopdedo.org> | 2007-10-12 02:03:27 +0200 |
commit | b6344591177f30487593e75a56945d9ddd3d907b (patch) | |
tree | 2db8c25a8c587ee24bf952bbfe7080bca951fa9f | |
parent | ab5679231d9d126d668c1b3538fc9594af6c3b3f (diff) | |
download | rpg-b6344591177f30487593e75a56945d9ddd3d907b.tar.gz rpg-b6344591177f30487593e75a56945d9ddd3d907b.tar.bz2 |
Reduce memory requirement for indexer
darcs-hash:20071012000327-6942e-bdef26ce258dea0229ad8b8dbbc7c089dea880ad.gz
-rw-r--r-- | inc/indexer.php | 129 |
1 files changed, 102 insertions, 27 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index 0b4e60b13..12e774579 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -63,12 +63,14 @@ function wordlen($w){ * * @author Tom N Harris <tnharris@whoopdedo.org> */ -function idx_saveIndex($pre, $wlen, $idx){ +function idx_saveIndex($pre, $wlen, &$idx){ global $conf; $fn = $conf['indexdir'].'/'.$pre.$wlen; $fh = @fopen($fn.'.tmp','w'); if(!$fh) return false; - fwrite($fh,join('',$idx)); + foreach ($idx as $line) { + fwrite($fh,$line); + } fclose($fh); if($conf['fperm']) chmod($fn.'.tmp', $conf['fperm']); io_rename($fn.'.tmp', $fn.'.idx'); @@ -90,6 +92,8 @@ function idx_getIndex($pre, $wlen){ /** * Create an empty index file if it doesn't exist yet. * + * FIXME: This function isn't currently used. It will probably be removed soon. + * * @author Tom N Harris <tnharris@whoopdedo.org> */ function idx_touchIndex($pre, $wlen){ @@ -102,6 +106,77 @@ function idx_touchIndex($pre, $wlen){ } /** + * Read a line ending with \n. + * Returns false on EOF. + * + * @author Tom N Harris <tnharris@whoopdedo.org> + */ +function _freadline($fh) { + if (feof($fh)) return false; + $ln = ''; + while (($buf = fgets($fh,4096)) !== false) { + $ln .= $buf; + if (substr($buf,-1) == "\n") break; + } + if ($ln === '') return false; + if (substr($ln,-1) != "\n") $ln .= "\n"; + return $ln; +} + +/** + * Write a line to an index file. + * + * @author Tom N Harris <tnharris@whoopdedo.org> + */ +function idx_saveIndexLine($pre, $wlen, $idx, $line){ + global $conf; + if(substr($line,-1) != "\n") $line .= "\n"; + $fn = $conf['indexdir'].'/'.$pre.$wlen; + $fh = @fopen($fn.'.tmp','w'); + if(!$fh) return false; + $ih = @fopen($fn.'.idx','r'); + if ($ih) { + $ln = -1; + while (($curline = _freadline($ih)) !== false) { + if (++$ln == $idx) { + fwrite($fh, $line); + } else { + fwrite($fh, $curline); + } + } + if ($idx > $ln) { + fwrite($fh,$line); + } + fclose($ih); + } else { + fwrite($fh,$line); + } + fclose($fh); + if($conf['fperm']) chmod($fn.'.tmp', $conf['fperm']); + io_rename($fn.'.tmp', $fn.'.idx'); + return true; +} + +/** + * Read a single line from an index (if it exists). + * + * @author Tom N Harris <tnharris@whoopdedo.org> + */ +function idx_getIndexLine($pre, $wlen, $idx){ + global $conf; + $fn = $conf['indexdir'].'/'.$pre.$wlen.'.idx'; + if(!@file_exists($fn)) return ''; + $fh = @fopen($fn,'r'); + if(!$fh) return ''; + $ln = -1; + while (($line = _freadline($fh)) !== false) { + if (++$ln == $idx) break; + } + fclose($fh); + return "$line"; +} + +/** * Split a page into words * * Returns an array of word counts, false if an error occurred. @@ -245,36 +320,31 @@ function idx_addPage($page){ } // Remove obsolete index entries - $pageword_idx = idx_getIndex('pageword',''); - if ($pid<count($pageword_idx)) { - $oldwords = explode(':',trim($pageword_idx[$pid])); + $pageword_idx = trim(idx_getIndexLine('pageword','',$pid)); + if ($pageword_idx !== '') { + $oldwords = explode(':',$pageword_idx); $delwords = array_diff($oldwords, $pagewords); + $upwords = array(); foreach ($delwords as $word) { if($word=='') continue; list($wlen,$wid) = explode('*',$word); $wid = (int)$wid; - // make the disk cache work for its money - // $pagewords is sorted, so this shouldn't be a significant penalty - $index = idx_getIndex('i',$wlen); - $index[$wid] = idx_updateIndexLine($index[$wid],$pid,0); - idx_saveIndex('i',$wlen,$index); + $upwords[$wlen][] = $wid; } - if (!empty($delwords)) { - // Save the reverse index - $pageword_idx[$pid] = join(':',$pagewords)."\n"; - if(!idx_saveIndex('pageword','',$pageword_idx)){ - trigger_error("Failed to write word index", E_USER_ERROR); - return false; + foreach ($upwords as $wlen => $widx) { + $index = idx_getIndex('i',$wlen); + foreach ($widx as $wid) { + $index[$wid] = idx_updateIndexLine($index[$wid],$pid,0); } - } - } else { - // Save the reverse index - $pageword_idx[$pid] = join(':',$pagewords)."\n"; - if(!idx_saveIndex('pageword','',$pageword_idx)){ - trigger_error("Failed to write word index", E_USER_ERROR); - return false; + idx_saveIndex('i',$wlen,$index); } } + // Save the reverse index + $pageword_idx = join(':',$pagewords)."\n"; + if(!idx_saveIndexLine('pageword','',$pid,$pageword_idx)){ + trigger_error("Failed to write word index", E_USER_ERROR); + return false; + } return true; } @@ -592,13 +662,18 @@ function idx_upgradePageWords(){ } } - $pageword_idx = array(); - foreach ($pagewords as $line) - $pageword_idx[] = join(':',$line)."\n"; - if(!idx_saveIndex('pageword','',$pageword_idx)){ + $fn = $conf['indexdir'].'/pageword'; + $fh = @fopen($fn.'.tmp','w'); + if (!$fh){ trigger_error("Failed to write word index", E_USER_ERROR); return false; } + foreach ($pagewords as $line){ + fwrite($fh, join(':',$line)."\n"); + } + fclose($fh); + if($conf['fperm']) chmod($fn.'.tmp', $conf['fperm']); + io_rename($fn.'.tmp', $fn.'.idx'); return true; } |