diff options
Diffstat (limited to 'inc/indexer.php')
-rw-r--r-- | inc/indexer.php | 230 |
1 files changed, 220 insertions, 10 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index f22aee3a0..e518907d7 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -102,6 +102,10 @@ function wordlen($w){ * @author Tom N Harris <tnharris@whoopdedo.org> */ class Doku_Indexer { + /** + * @var array $pidCache Cache for getPID() + */ + protected $pidCache = array(); /** * Adds the contents of a page to the fulltext index @@ -120,7 +124,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -256,7 +260,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -335,6 +339,109 @@ class Doku_Indexer { } /** + * Rename a page in the search index without changing the indexed content. This function doesn't check if the + * old or new name exists in the filesystem. It returns an error if the old page isn't in the page list of the + * indexer and it deletes all previously indexed content of the new page. + * + * @param string $oldpage The old page name + * @param string $newpage The new page name + * @return string|bool If the page was successfully renamed, can be a message in the case of an error + */ + public function renamePage($oldpage, $newpage) { + if (!$this->lock()) return 'locked'; + + $pages = $this->getPages(); + + $id = array_search($oldpage, $pages); + if ($id === false) { + $this->unlock(); + return 'page is not in index'; + } + + $new_id = array_search($newpage, $pages); + if ($new_id !== false) { + // make sure the page is not in the index anymore + if ($this->deletePageNoLock($newpage) !== true) { + return false; + } + + $pages[$new_id] = 'deleted:'.time().rand(0, 9999); + } + + $pages[$id] = $newpage; + + // update index + if (!$this->saveIndex('page', '', $pages)) { + $this->unlock(); + return false; + } + + // reset the pid cache + $this->pidCache = array(); + + $this->unlock(); + return true; + } + + /** + * Renames a meta value in the index. This doesn't change the meta value in the pages, it assumes that all pages + * will be updated. + * + * @param string $key The metadata key of which a value shall be changed + * @param string $oldvalue The old value that shall be renamed + * @param string $newvalue The new value to which the old value shall be renamed, can exist (then values will be merged) + * @return bool|string If renaming the value has been successful, false or error message on error. + */ + public function renameMetaValue($key, $oldvalue, $newvalue) { + if (!$this->lock()) return 'locked'; + + // change the relation references index + $metavalues = $this->getIndex($key, '_w'); + $oldid = array_search($oldvalue, $metavalues); + if ($oldid !== false) { + $newid = array_search($newvalue, $metavalues); + if ($newid !== false) { + // free memory + unset ($metavalues); + + // okay, now we have two entries for the same value. we need to merge them. + $indexline = $this->getIndexKey($key, '_i', $oldid); + if ($indexline != '') { + $newindexline = $this->getIndexKey($key, '_i', $newid); + $pagekeys = $this->getIndex($key, '_p'); + $parts = explode(':', $indexline); + foreach ($parts as $part) { + list($id, $count) = explode('*', $part); + $newindexline = $this->updateTuple($newindexline, $id, $count); + + $keyline = explode(':', $pagekeys[$id]); + // remove old meta value + $keyline = array_diff($keyline, array($oldid)); + // add new meta value when not already present + if (!in_array($newid, $keyline)) { + array_push($keyline, $newid); + } + $pagekeys[$id] = implode(':', $keyline); + } + $this->saveIndex($key, '_p', $pagekeys); + unset($pagekeys); + $this->saveIndexKey($key, '_i', $oldid, ''); + $this->saveIndexKey($key, '_i', $newid, $newindexline); + } + } else { + $metavalues[$oldid] = $newvalue; + if (!$this->saveIndex($key, '_w', $metavalues)) { + $this->unlock(); + return false; + } + } + } + + $this->unlock(); + return true; + } + + /** * Remove a page from the index * * Erases entries in all known indexes. @@ -347,10 +454,26 @@ class Doku_Indexer { if (!$this->lock()) return "locked"; + $result = $this->deletePageNoLock($page); + + $this->unlock(); + + return $result; + } + + /** + * Remove a page from the index without locking the index, only use this function if the index is already locked + * + * Erases entries in all known indexes. + * + * @param string $page a page name + * @return boolean the function completed successfully + * @author Tom N Harris <tnharris@whoopdedo.org> + */ + protected function deletePageNoLock($page) { // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { - $this->unlock(); return false; } @@ -376,7 +499,6 @@ class Doku_Indexer { } // Save the reverse index if (!$this->saveIndexKey('pageword', '', $pid, "")) { - $this->unlock(); return false; } @@ -393,6 +515,37 @@ class Doku_Indexer { $this->saveIndexKey($metaname.'_p', '', $pid, ''); } + return true; + } + + /** + * Clear the whole index + * + * @return bool If the index has been cleared successfully + */ + public function clear() { + global $conf; + + if (!$this->lock()) return false; + + @unlink($conf['indexdir'].'/page.idx'); + @unlink($conf['indexdir'].'/title.idx'); + @unlink($conf['indexdir'].'/pageword.idx'); + @unlink($conf['indexdir'].'/metadata.idx'); + $dir = @opendir($conf['indexdir']); + if($dir!==false){ + while(($f = readdir($dir)) !== false){ + if(substr($f,-4)=='.idx' && + (substr($f,0,1)=='i' || substr($f,0,1)=='w' + || substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx')) + @unlink($conf['indexdir']."/$f"); + } + } + @unlink($conf['indexdir'].'/lengths.idx'); + + // clear the pid cache + $this->pidCache = array(); + $this->unlock(); return true; } @@ -454,6 +607,58 @@ class Doku_Indexer { } /** + * Get the numeric PID of a page + * + * @param string $page The page to get the PID for + * @return bool|int The page id on success, false on error + */ + public function getPID($page) { + // return PID without locking when it is in the cache + if (isset($this->pidCache[$page])) return $this->pidCache[$page]; + + if (!$this->lock()) + return false; + + // load known documents + $pid = $this->getPIDNoLock($page); + if ($pid === false) { + $this->unlock(); + return false; + } + + $this->unlock(); + return $pid; + } + + /** + * Get the numeric PID of a page without locking the index. + * Only use this function when the index is already locked. + * + * @param string $page The page to get the PID for + * @return bool|int The page id on success, false on error + */ + protected function getPIDNoLock($page) { + // avoid expensive addIndexKey operation for the most recently requested pages by using a cache + if (isset($this->pidCache[$page])) return $this->pidCache[$page]; + $pid = $this->addIndexKey('page', '', $page); + // limit cache to 10 entries by discarding the oldest element as in DokuWiki usually only the most recently + // added item will be requested again + if (count($this->pidCache) > 10) array_shift($this->pidCache); + $this->pidCache[$page] = $pid; + return $pid; + } + + /** + * Get the page id of a numeric PID + * + * @param int $pid The PID to get the page id for + * @return string The page id + */ + public function getPageFromPID($pid) { + return $this->getIndexKey('page', '', $pid); + } + + /** * Find pages in the fulltext index containing the words, * * The search words must be pre-tokenized, meaning only letters and @@ -946,7 +1151,7 @@ class Doku_Indexer { * @param string $idx name of the index * @param string $suffix subpart identifier * @param string $value line to find in the index - * @return int line number of the value in the index + * @return int|bool line number of the value in the index or false if writing the index failed * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function addIndexKey($idx, $suffix, $value) { @@ -1140,8 +1345,8 @@ class Doku_Indexer { * @author Tom N Harris <tnharris@whoopdedo.org> */ function idx_get_indexer() { - static $Indexer = null; - if (is_null($Indexer)) { + static $Indexer; + if (!isset($Indexer)) { $Indexer = new Doku_Indexer(); } return $Indexer; @@ -1223,6 +1428,12 @@ function idx_addPage($page, $verbose=false, $force=false) { return $result; } + $Indexer = idx_get_indexer(); + $pid = $Indexer->getPID($page); + if ($pid === false) { + if ($verbose) print("Indexer: getting the PID failed for $page".DOKU_LF); + return false; + } $body = ''; $metadata = array(); $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); @@ -1230,14 +1441,13 @@ function idx_addPage($page, $verbose=false, $force=false) { $metadata['relation_references'] = array_keys($references); else $metadata['relation_references'] = array(); - $data = compact('page', 'body', 'metadata'); + $data = compact('page', 'body', 'metadata', 'pid'); $evt = new Doku_Event('INDEXER_PAGE_ADD', $data); if ($evt->advise_before()) $data['body'] = $data['body'] . " " . rawWiki($page); $evt->advise_after(); unset($evt); extract($data); - $Indexer = idx_get_indexer(); $result = $Indexer->addPageWords($page, $body); if ($result === "locked") { if ($verbose) print("Indexer: locked".DOKU_LF); |