summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Hamann <michael@content-space.de>2013-01-06 11:25:37 -0800
committerMichael Hamann <michael@content-space.de>2013-01-06 11:25:37 -0800
commitfdf855d93183bf3eb0074877172dab6809d9ef7d (patch)
treec81cd6190534c8a284baf6a2fea9738271cdb48a
parentb9ad156b06ca6d5530803836871777b10894f130 (diff)
parentbff17c53db3caca2e08837dc163e10a15f82860b (diff)
downloadrpg-fdf855d93183bf3eb0074877172dab6809d9ef7d.tar.gz
rpg-fdf855d93183bf3eb0074877172dab6809d9ef7d.tar.bz2
Merge pull request #148 from michitux/indexer
Indexer: Add the PID to INDEXER_PAGE_ADD and add getPID/getPageFromPID functions
-rw-r--r--_test/core/DokuWikiTest.php2
-rw-r--r--_test/tests/inc/indexer_pid.test.php18
-rwxr-xr-xbin/indexer.php62
-rw-r--r--inc/indexer.php109
4 files changed, 122 insertions, 69 deletions
diff --git a/_test/core/DokuWikiTest.php b/_test/core/DokuWikiTest.php
index b9e151456..91eb5293b 100644
--- a/_test/core/DokuWikiTest.php
+++ b/_test/core/DokuWikiTest.php
@@ -30,6 +30,8 @@ abstract class DokuWikiTest extends PHPUnit_Framework_TestCase {
// remove any leftovers from the last run
if(is_dir(DOKU_TMP_DATA)){
+ // clear indexer data and cache
+ idx_get_indexer()->clear();
TestUtils::rdelete(DOKU_TMP_DATA);
}
diff --git a/_test/tests/inc/indexer_pid.test.php b/_test/tests/inc/indexer_pid.test.php
new file mode 100644
index 000000000..8c58b1abd
--- /dev/null
+++ b/_test/tests/inc/indexer_pid.test.php
@@ -0,0 +1,18 @@
+<?php
+/**
+ * Tests the pid functions of the indexer.
+ *
+ * @author Michael Hamann <michael@content-space.de>
+ */
+class indexer_pid_test extends DokuWikiTest {
+ function test_pid() {
+ $indexer = idx_get_indexer();
+ $syntaxPID = $indexer->getPID('wiki:syntax');
+ $this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\'');
+ $dokuwikiPID = $indexer->getPID('wiki:dokuwiki');
+ $this->assertEquals('wiki:syntax', $indexer->getPageFromPID($syntaxPID), 'getPageFromPID(getPID(\'wiki:syntax\')) != \'wiki:syntax\' after getting the PID for wiki:dokuwiki');
+ $this->assertEquals($syntaxPID, $indexer->getPID('wiki:syntax'), 'getPID(\'wiki:syntax\') didn\'t returned different PIDs when called twice');
+ $this->assertNotEquals($syntaxPID, $dokuwikiPID, 'Same PID returned for different pages');
+ $this->assertTrue(is_numeric($syntaxPID) && is_numeric($dokuwikiPID), 'PIDs are not numeric');
+ }
+}
diff --git a/bin/indexer.php b/bin/indexer.php
index f6aeb4f0e..6f6b5d9fa 100755
--- a/bin/indexer.php
+++ b/bin/indexer.php
@@ -5,11 +5,6 @@ if ('cli' != php_sapi_name()) die();
ini_set('memory_limit','128M');
if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
require_once(DOKU_INC.'inc/init.php');
-require_once(DOKU_INC.'inc/common.php');
-require_once(DOKU_INC.'inc/pageutils.php');
-require_once(DOKU_INC.'inc/search.php');
-require_once(DOKU_INC.'inc/indexer.php');
-require_once(DOKU_INC.'inc/auth.php');
require_once(DOKU_INC.'inc/cliopts.php');
session_write_close();
@@ -67,10 +62,6 @@ function _usage() {
function _update(){
global $conf;
- global $INDEXER;
-
- $INDEXER = idx_get_indexer();
-
$data = array();
_quietecho("Searching pages... ");
search($data,$conf['datadir'],'search_allpages',array('skipacl' => true));
@@ -82,7 +73,6 @@ function _update(){
}
function _index($id){
- global $INDEXER;
global $CLEAR;
global $QUIET;
@@ -92,62 +82,12 @@ function _index($id){
}
/**
- * lock the indexer system
- */
-function _lock(){
- global $conf;
- $lock = $conf['lockdir'].'/_indexer.lock';
- $said = false;
- while(!@mkdir($lock, $conf['dmode'])){
- if(time()-@filemtime($lock) > 60*5){
- // looks like a stale lock - remove it
- @rmdir($lock);
- }else{
- if($said){
- _quietecho(".");
- }else{
- _quietecho("Waiting for lockfile (max. 5 min)");
- $said = true;
- }
- sleep(15);
- }
- }
- if($conf['dperm']) chmod($lock, $conf['dperm']);
- if($said) _quietecho("\n");
-}
-
-/**
- * unlock the indexer sytem
- */
-function _unlock(){
- global $conf;
- $lock = $conf['lockdir'].'/_indexer.lock';
- @rmdir($lock);
-}
-
-/**
* Clear all index files
*/
function _clearindex(){
- global $conf;
- _lock();
_quietecho("Clearing index... ");
- io_saveFile($conf['indexdir'].'/page.idx','');
- io_saveFile($conf['indexdir'].'/title.idx','');
- io_saveFile($conf['indexdir'].'/pageword.idx','');
- io_saveFile($conf['indexdir'].'/metadata.idx','');
- $dir = @opendir($conf['indexdir']);
- if($dir!==false){
- while(($f = readdir($dir)) !== false){
- if(substr($f,-4)=='.idx' &&
- (substr($f,0,1)=='i' || substr($f,0,1)=='w'
- || substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx'))
- @unlink($conf['indexdir']."/$f");
- }
- }
- @unlink($conf['indexdir'].'/lengths.idx');
+ idx_get_indexer()->clear();
_quietecho("done.\n");
- _unlock();
}
function _quietecho($msg) {
diff --git a/inc/indexer.php b/inc/indexer.php
index f22aee3a0..7a62345bf 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -102,6 +102,10 @@ function wordlen($w){
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
class Doku_Indexer {
+ /**
+ * @var array $pidCache Cache for getPID()
+ */
+ protected $pidCache = array();
/**
* Adds the contents of a page to the fulltext index
@@ -120,7 +124,7 @@ class Doku_Indexer {
return "locked";
// load known documents
- $pid = $this->addIndexKey('page', '', $page);
+ $pid = $this->getPIDNoLock($page);
if ($pid === false) {
$this->unlock();
return false;
@@ -256,7 +260,7 @@ class Doku_Indexer {
return "locked";
// load known documents
- $pid = $this->addIndexKey('page', '', $page);
+ $pid = $this->getPIDNoLock($page);
if ($pid === false) {
$this->unlock();
return false;
@@ -348,7 +352,7 @@ class Doku_Indexer {
return "locked";
// load known documents
- $pid = $this->addIndexKey('page', '', $page);
+ $pid = $this->getPIDNoLock($page);
if ($pid === false) {
$this->unlock();
return false;
@@ -398,6 +402,38 @@ class Doku_Indexer {
}
/**
+ * Clear the whole index
+ *
+ * @return bool If the index has been cleared successfully
+ */
+ public function clear() {
+ global $conf;
+
+ if (!$this->lock()) return false;
+
+ @unlink($conf['indexdir'].'/page.idx');
+ @unlink($conf['indexdir'].'/title.idx');
+ @unlink($conf['indexdir'].'/pageword.idx');
+ @unlink($conf['indexdir'].'/metadata.idx');
+ $dir = @opendir($conf['indexdir']);
+ if($dir!==false){
+ while(($f = readdir($dir)) !== false){
+ if(substr($f,-4)=='.idx' &&
+ (substr($f,0,1)=='i' || substr($f,0,1)=='w'
+ || substr($f,-6)=='_w.idx' || substr($f,-6)=='_i.idx' || substr($f,-6)=='_p.idx'))
+ @unlink($conf['indexdir']."/$f");
+ }
+ }
+ @unlink($conf['indexdir'].'/lengths.idx');
+
+ // clear the pid cache
+ $this->pidCache = array();
+
+ $this->unlock();
+ return true;
+ }
+
+ /**
* Split the text into words for fulltext search
*
* TODO: does this also need &$stopwords ?
@@ -454,6 +490,58 @@ class Doku_Indexer {
}
/**
+ * Get the numeric PID of a page
+ *
+ * @param string $page The page to get the PID for
+ * @return bool|int The page id on success, false on error
+ */
+ public function getPID($page) {
+ // return PID without locking when it is in the cache
+ if (isset($this->pidCache[$page])) return $this->pidCache[$page];
+
+ if (!$this->lock())
+ return false;
+
+ // load known documents
+ $pid = $this->getPIDNoLock($page);
+ if ($pid === false) {
+ $this->unlock();
+ return false;
+ }
+
+ $this->unlock();
+ return $pid;
+ }
+
+ /**
+ * Get the numeric PID of a page without locking the index.
+ * Only use this function when the index is already locked.
+ *
+ * @param string $page The page to get the PID for
+ * @return bool|int The page id on success, false on error
+ */
+ protected function getPIDNoLock($page) {
+ // avoid expensive addIndexKey operation for the most recently requested pages by using a cache
+ if (isset($this->pidCache[$page])) return $this->pidCache[$page];
+ $pid = $this->addIndexKey('page', '', $page);
+ // limit cache to 10 entries by discarding the oldest element as in DokuWiki usually only the most recently
+ // added item will be requested again
+ if (count($this->pidCache) > 10) array_shift($this->pidCache);
+ $this->pidCache[$page] = $pid;
+ return $pid;
+ }
+
+ /**
+ * Get the page id of a numeric PID
+ *
+ * @param int $pid The PID to get the page id for
+ * @return string The page id
+ */
+ public function getPageFromPID($pid) {
+ return $this->getIndexKey('page', '', $pid);
+ }
+
+ /**
* Find pages in the fulltext index containing the words,
*
* The search words must be pre-tokenized, meaning only letters and
@@ -946,7 +1034,7 @@ class Doku_Indexer {
* @param string $idx name of the index
* @param string $suffix subpart identifier
* @param string $value line to find in the index
- * @return int line number of the value in the index
+ * @return int|bool line number of the value in the index or false if writing the index failed
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function addIndexKey($idx, $suffix, $value) {
@@ -1140,8 +1228,8 @@ class Doku_Indexer {
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
function idx_get_indexer() {
- static $Indexer = null;
- if (is_null($Indexer)) {
+ static $Indexer;
+ if (!isset($Indexer)) {
$Indexer = new Doku_Indexer();
}
return $Indexer;
@@ -1223,6 +1311,12 @@ function idx_addPage($page, $verbose=false, $force=false) {
return $result;
}
+ $Indexer = idx_get_indexer();
+ $pid = $Indexer->getPID($page);
+ if ($pid === false) {
+ if ($verbose) print("Indexer: getting the PID failed for $page".DOKU_LF);
+ return false;
+ }
$body = '';
$metadata = array();
$metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED);
@@ -1230,14 +1324,13 @@ function idx_addPage($page, $verbose=false, $force=false) {
$metadata['relation_references'] = array_keys($references);
else
$metadata['relation_references'] = array();
- $data = compact('page', 'body', 'metadata');
+ $data = compact('page', 'body', 'metadata', 'pid');
$evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
if ($evt->advise_before()) $data['body'] = $data['body'] . " " . rawWiki($page);
$evt->advise_after();
unset($evt);
extract($data);
- $Indexer = idx_get_indexer();
$result = $Indexer->addPageWords($page, $body);
if ($result === "locked") {
if ($verbose) print("Indexer: locked".DOKU_LF);