From 03aafe1cc677b4b0b0b0ef9ed04d8ab72f3ce583 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Mon, 3 Dec 2012 00:13:01 +0100 Subject: Indexer: add getPID/getPageFromPID functions and PID to INDEXER_PAGE_ADD This allows plugins to get the PID for a page and also to get the page for a certain PID. That way plugins can build their own index that uses numeric ids. --- inc/indexer.php | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/inc/indexer.php b/inc/indexer.php index f22aee3a0..cbb817d78 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -120,7 +120,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -256,7 +256,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -348,7 +348,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->addIndexKey('page', '', $page); + $pid = $this->getPIDNoLock($page); if ($pid === false) { $this->unlock(); return false; @@ -453,6 +453,48 @@ class Doku_Indexer { return array_values($wordlist); } + /** + * Get the numeric PID of a page + * + * @param string $page The page to get the PID for + * @return bool|int The page id on success, false on error + */ + public function getPID($page) { + if (!$this->lock()) + return false; + + // load known documents + $pid = $this->getPIDNoLock($page); + if ($pid === false) { + $this->unlock(); + return false; + } + + $this->unlock(); + return $pid; + } + + /** + * Get the numeric PID of a page without locking the index. + * Only use this function when the index is already locked. + * + * @param string $page The page to get the PID for + * @return bool|int The page id on success, false on error + */ + protected function getPIDNoLock($page) { + return $this->addIndexKey('page', '', $page); + } + + /** + * Get the page id of a numeric PID + * + * @param int $pid The PID to get the page id for + * @return string The page id + */ + public function getPageFromPID($pid) { + return $this->getIndexKey('page', '', $pid); + } + /** * Find pages in the fulltext index containing the words, * @@ -946,7 +988,7 @@ class Doku_Indexer { * @param string $idx name of the index * @param string $suffix subpart identifier * @param string $value line to find in the index - * @return int line number of the value in the index + * @return int|bool line number of the value in the index or false if writing the index failed * @author Tom N Harris */ protected function addIndexKey($idx, $suffix, $value) { @@ -1223,6 +1265,12 @@ function idx_addPage($page, $verbose=false, $force=false) { return $result; } + $Indexer = idx_get_indexer(); + $pid = $Indexer->getPID($page); + if ($pid === false) { + if ($verbose) print("Indexer: getting the PID failed for $page".DOKU_LF); + return false; + } $body = ''; $metadata = array(); $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); @@ -1230,14 +1278,13 @@ function idx_addPage($page, $verbose=false, $force=false) { $metadata['relation_references'] = array_keys($references); else $metadata['relation_references'] = array(); - $data = compact('page', 'body', 'metadata'); + $data = compact('page', 'body', 'metadata', 'pid'); $evt = new Doku_Event('INDEXER_PAGE_ADD', $data); if ($evt->advise_before()) $data['body'] = $data['body'] . " " . rawWiki($page); $evt->advise_after(); unset($evt); extract($data); - $Indexer = idx_get_indexer(); $result = $Indexer->addPageWords($page, $body); if ($result === "locked") { if ($verbose) print("Indexer: locked".DOKU_LF); -- cgit v1.2.3 From 0572700e3546546abe8375a0fff7245673f2b02a Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Tue, 4 Dec 2012 15:35:00 +0100 Subject: Indexer: Add test case for the PID functions --- _test/tests/inc/indexer_pid.test.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 _test/tests/inc/indexer_pid.test.php diff --git a/_test/tests/inc/indexer_pid.test.php b/_test/tests/inc/indexer_pid.test.php new file mode 100644 index 000000000..8c58b1abd --- /dev/null +++ b/_test/tests/inc/indexer_pid.test.php @@ -0,0 +1,18 @@ + + */ +class indexer_pid_test extends DokuWikiTest { + function test_pid() { + $indexer = idx_get_indexer(); + $syntaxPID = $indexer->getPID('wiki:syntax'); + $this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\''); + $dokuwikiPID = $indexer->getPID('wiki:dokuwiki'); + $this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\' after getting the PID for wiki:dokuwiki'); + $this->assertEquals($syntaxPID, $indexer->getPID('wiki:syntax'), 'getPID(\'wiki:syntax\') didn\'t returned different PIDs when called twice'); + $this->assertNotEquals($syntaxPID, $dokuwikiPID, 'Same PID returned for different pages'); + $this->assertTrue(is_numeric($syntaxPID) && is_numeric($dokuwikiPID), 'PIDs are not numeric'); + } +} -- cgit v1.2.3 From 3d2ce006bd7cffab5cda27f01787d2fd66ab630e Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Tue, 4 Dec 2012 16:13:46 +0100 Subject: Indexer: Add cache for getPID() This avoids re-reading of the page index file for every getPID()-call by using a simple FIFO cache, limited to 10 items. In idx_addPage() and the functions that it calls getPID() is called 3 times for the same PID. --- inc/indexer.php | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/inc/indexer.php b/inc/indexer.php index cbb817d78..05b082872 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -102,6 +102,10 @@ function wordlen($w){ * @author Tom N Harris */ class Doku_Indexer { + /** + * @var array $pidCache Cache for getPID() + */ + protected $pidCache = array(); /** * Adds the contents of a page to the fulltext index @@ -460,6 +464,9 @@ class Doku_Indexer { * @return bool|int The page id on success, false on error */ public function getPID($page) { + // return PID without locking when it is in the cache + if (isset($this->pidCache[$page])) return $this->pidCache[$page]; + if (!$this->lock()) return false; @@ -482,7 +489,14 @@ class Doku_Indexer { * @return bool|int The page id on success, false on error */ protected function getPIDNoLock($page) { - return $this->addIndexKey('page', '', $page); + // avoid expensive addIndexKey operation for the most recently requested pages by using a cache + if (isset($this->pidCache[$page])) return $this->pidCache[$page]; + $pid = $this->addIndexKey('page', '', $page); + // limit cache to 10 entries by discarding the oldest element as in DokuWiki usually only the most recently + // added item will be requested again + if (count($this->pidCache) > 10) array_shift($this->pidCache); + $this->pidCache[$page] = $pid; + return $pid; } /** -- cgit v1.2.3 From 1421e5483ad1ca8780331077141e0c07b6530bc5 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Tue, 4 Dec 2012 16:15:49 +0100 Subject: Indexer: Make $Indexer in idx_get_indexer() global instead of static This avoids problems with test cases that use the indexer and the PID cache - the index is cleaned between test cases but the PID cache wasn't cleaned. Now PHPUnit can unset the global $Indexer between test cases. --- inc/indexer.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/indexer.php b/inc/indexer.php index 05b082872..4dfaa33fb 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1196,8 +1196,8 @@ class Doku_Indexer { * @author Tom N Harris */ function idx_get_indexer() { - static $Indexer = null; - if (is_null($Indexer)) { + global $Indexer; + if (!isset($Indexer)) { $Indexer = new Doku_Indexer(); } return $Indexer; -- cgit v1.2.3 From 3cf3c7d60c81d562c8331a377a76e86d41f8f528 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Thu, 20 Dec 2012 17:47:05 +0100 Subject: Add clear function to the indexer that deletes the whole index --- bin/indexer.php | 52 +--------------------------------------------------- inc/indexer.php | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 51 deletions(-) diff --git a/bin/indexer.php b/bin/indexer.php index f6aeb4f0e..78f470ae0 100755 --- a/bin/indexer.php +++ b/bin/indexer.php @@ -91,63 +91,13 @@ function _index($id){ _quietecho("done.\n"); } -/** - * lock the indexer system - */ -function _lock(){ - global $conf; - $lock = $conf['lockdir'].'/_indexer.lock'; - $said = false; - while(!@mkdir($lock, $conf['dmode'])){ - if(time()-@filemtime($lock) > 60*5){ - // looks like a stale lock - remove it - @rmdir($lock); - }else{ - if($said){ - _quietecho("."); - }else{ - _quietecho("Waiting for lockfile (max. 5 min)"); - $said = true; - } - sleep(15); - } - } - if($conf['dperm']) chmod($lock, $conf['dperm']); - if($said) _quietecho("\n"); -} - -/** - * unlock the indexer sytem - */ -function _unlock(){ - global $conf; - $lock = $conf['lockdir'].'/_indexer.lock'; - @rmdir($lock); -} - /** * Clear all index files */ function _clearindex(){ - global $conf; - _lock(); _quietecho("Clearing index... "); - io_saveFile($conf['indexdir'].'/page.idx',''); - io_saveFile($conf['indexdir'].'/title.idx',''); - io_saveFile($conf['indexdir'].'/pageword.idx',''); - io_saveFile($conf['indexdir'].'/metadata.idx',''); - $dir = @opendir($conf['indexdir']); - if($dir!==false){ - while(($f = readdir($dir)) !== false){ - if(substr($f,-4)=='.idx' && - (substr($f,0,1)=='i' || substr($f,0,1)=='w' - || substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx')) - @unlink($conf['indexdir']."/$f"); - } - } - @unlink($conf['indexdir'].'/lengths.idx'); + idx_get_indexer()->clear(); _quietecho("done.\n"); - _unlock(); } function _quietecho($msg) { diff --git a/inc/indexer.php b/inc/indexer.php index 4dfaa33fb..b9eaf31d9 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -401,6 +401,38 @@ class Doku_Indexer { return true; } + /** + * Clear the whole index + * + * @return bool If the index has been cleared successfully + */ + public function clear() { + global $conf; + + if (!$this->lock()) return false; + + @unlink($conf['indexdir'].'/page.idx'); + @unlink($conf['indexdir'].'/title.idx'); + @unlink($conf['indexdir'].'/pageword.idx'); + @unlink($conf['indexdir'].'/metadata.idx'); + $dir = @opendir($conf['indexdir']); + if($dir!==false){ + while(($f = readdir($dir)) !== false){ + if(substr($f,-4)=='.idx' && + (substr($f,0,1)=='i' || substr($f,0,1)=='w' + || substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx')) + @unlink($conf['indexdir']."/$f"); + } + } + @unlink($conf['indexdir'].'/lengths.idx'); + + // clear the pid cache + $this->pidCache = array(); + + $this->unlock(); + return true; + } + /** * Split the text into words for fulltext search * -- cgit v1.2.3 From 4f7083212993bf72f0b8969e6d57f587faaa5dfc Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Thu, 20 Dec 2012 17:47:39 +0100 Subject: Remove global $Indexer, clear index in the testing system instead --- _test/core/DokuWikiTest.php | 2 ++ inc/indexer.php | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/_test/core/DokuWikiTest.php b/_test/core/DokuWikiTest.php index b9e151456..91eb5293b 100644 --- a/_test/core/DokuWikiTest.php +++ b/_test/core/DokuWikiTest.php @@ -30,6 +30,8 @@ abstract class DokuWikiTest extends PHPUnit_Framework_TestCase { // remove any leftovers from the last run if(is_dir(DOKU_TMP_DATA)){ + // clear indexer data and cache + idx_get_indexer()->clear(); TestUtils::rdelete(DOKU_TMP_DATA); } diff --git a/inc/indexer.php b/inc/indexer.php index b9eaf31d9..7a62345bf 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1228,7 +1228,7 @@ class Doku_Indexer { * @author Tom N Harris */ function idx_get_indexer() { - global $Indexer; + static $Indexer; if (!isset($Indexer)) { $Indexer = new Doku_Indexer(); } -- cgit v1.2.3 From bff17c53db3caca2e08837dc163e10a15f82860b Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Thu, 20 Dec 2012 17:49:19 +0100 Subject: Remove superfluous requires and variables from bin/indexer.php --- bin/indexer.php | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/bin/indexer.php b/bin/indexer.php index 78f470ae0..6f6b5d9fa 100755 --- a/bin/indexer.php +++ b/bin/indexer.php @@ -5,11 +5,6 @@ if ('cli' != php_sapi_name()) die(); ini_set('memory_limit','128M'); if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); require_once(DOKU_INC.'inc/init.php'); -require_once(DOKU_INC.'inc/common.php'); -require_once(DOKU_INC.'inc/pageutils.php'); -require_once(DOKU_INC.'inc/search.php'); -require_once(DOKU_INC.'inc/indexer.php'); -require_once(DOKU_INC.'inc/auth.php'); require_once(DOKU_INC.'inc/cliopts.php'); session_write_close(); @@ -67,10 +62,6 @@ function _usage() { function _update(){ global $conf; - global $INDEXER; - - $INDEXER = idx_get_indexer(); - $data = array(); _quietecho("Searching pages... "); search($data,$conf['datadir'],'search_allpages',array('skipacl' => true)); @@ -82,7 +73,6 @@ function _update(){ } function _index($id){ - global $INDEXER; global $CLEAR; global $QUIET; -- cgit v1.2.3