summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorMichael Hamann <michael@content-space.de>2011-02-02 22:41:58 +0100
committerMichael Hamann <michael@content-space.de>2011-02-02 22:41:58 +0100
commit89b8c518c85b307953db1a78ae207884375c14a0 (patch)
tree88d39e18c0e8d39a0845d25a84fdbb090f476bc9 /inc/indexer.php
parentfc756e0d4d88b37c01a9155e675a549430b00593 (diff)
parentf078bb0088870b4b68b348d546afa30a80a07e87 (diff)
downloadrpg-89b8c518c85b307953db1a78ae207884375c14a0.tar.gz
rpg-89b8c518c85b307953db1a78ae207884375c14a0.tar.bz2
Merge remote-tracking branch 'my-fork/master' into indexer_improvements
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php121
1 files changed, 107 insertions, 14 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 91d6842e4..3a0331302 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -320,6 +320,46 @@ class Doku_Indexer {
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
public function deletePage($page) {
+ if (!$this->_lock())
+ return "locked";
+
+ // load known documents
+ $page_idx = $this->_getIndexKey('page', '', $page);
+ if ($page_idx === false) {
+ $this->_unlock();
+ return false;
+ }
+
+ // Remove obsolete index entries
+ $pageword_idx = $this->_getIndexKey('pageword', '', $pid);
+ if ($pageword_idx !== '') {
+ $delwords = explode(':',$pageword_idx);
+ $upwords = array();
+ foreach ($delwords as $word) {
+ if ($word != '') {
+ list($wlen,$wid) = explode('*', $word);
+ $wid = (int)$wid;
+ $upwords[$wlen][] = $wid;
+ }
+ }
+ foreach ($upwords as $wlen => $widx) {
+ $index = $this->_getIndex('i', $wlen);
+ foreach ($widx as $wid) {
+ $index[$wid] = $this->_updateTuple($index[$wid], $pid, 0);
+ }
+ $this->_saveIndex('i', $wlen, $index);
+ }
+ }
+ // Save the reverse index
+ if (!$this->_saveIndexKey('pageword', '', $pid, "")) {
+ $this->_unlock();
+ return false;
+ }
+
+ // XXX TODO: delete meta keys
+
+ $this->_unlock();
+ return true;
}
/**
@@ -423,7 +463,7 @@ class Doku_Indexer {
* @param string $key name of the metadata key to look for
* @param string $value search term to look for
* @param callback $func comparison function
- * @return array lists with page names, keys are query values
+ * @return array lists with page names, keys are query values if $key is array
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Michael Hamann <michael@content-space.de>
*/
@@ -436,15 +476,38 @@ class Doku_Indexer {
// the matching ids for the provided value(s)
$value_ids = array();
- if (!is_array($value)) $value = array($value);
+ if (!is_array($value))
+ $value_array = array($value);
+ else
+ $value_array =& $value;
- foreach ($value as $val) {
- if (is_null($func)) {
- if (($i = array_search($val, $words)) !== false)
- $value_ids[$i] = $val;
- } else {
+ if (!is_null($func)) {
+ foreach ($value_array as $val) {
foreach ($words as $i => $word) {
- if (call_user_func_array($func, array($word, $value)))
+ if (call_user_func_array($func, array($word, $val)))
+ $value_ids[$i] = $val;
+ }
+ }
+ } else {
+ foreach ($value_array as $val) {
+ $xval = $val;
+ $caret = false;
+ $dollar = false;
+ // check for wildcards
+ if (substr($xval, 0, 1) == '*') {
+ $xval = substr($xval, 1);
+ $caret = '^';
+ }
+ if (substr($xval, -1, 1) == '*') {
+ $xval = substr($xval, 0, -1);
+ $dollar = '$';
+ }
+ if ($caret || $dollar) {
+ $re = $caret.preg_quote($xval, '/').$dollar;
+ foreach(array_keys(preg_grep('/'.$re.'/', $words)) as $i)
+ $value_ids[$i] = $val;
+ } else {
+ if (($i = array_search($val, $words)) !== false)
$value_ids[$i] = $val;
}
}
@@ -462,6 +525,7 @@ class Doku_Indexer {
// is an array with page_id => 1, page2_id => 1 etc. so take the keys only
$result[$val] = array_keys($this->_parseTuples($page_idx, $lines[$value_id]));
}
+ if (!is_array($value)) $result = $result[$value];
return $result;
}
@@ -492,12 +556,12 @@ class Doku_Indexer {
// check for wildcards
if (substr($xword, 0, 1) == '*') {
$xword = substr($xword, 1);
- $caret = true;
+ $caret = '^';
$wlen -= 1;
}
if (substr($xword, -1, 1) == '*') {
$xword = substr($xword, 0, -1);
- $dollar = true;
+ $dollar = '$';
$wlen -= 1;
}
if ($wlen < IDX_MINWORDLENGTH && !$caret && !$dollar && !is_numeric($xword))
@@ -505,9 +569,7 @@ class Doku_Indexer {
if (!isset($tokens[$xword]))
$tokenlength[$wlen][] = $xword;
if ($caret || $dollar) {
- $re = preg_quote($xword, '/');
- if ($caret) $re = '^'.$re;
- if ($dollar) $re = $re.'$';
+ $re = $caret.preg_quote($xword, '/').$dollar;
$tokens[$xword][] = array($word, '/'.$re.'/');
if (!isset($tokenwild[$xword]))
$tokenwild[$xword] = $wlen;
@@ -926,6 +988,36 @@ function idx_addPage($page, $verbose=false) {
}
}
+ if (!page_exists($page)) {
+ if (!@file_exists($idxtag)) {
+ if ($verbose) print("Indexer: $page does not exist, ignoring".DOKU_LF);
+ return false;
+ }
+ $Indexer = idx_get_indexer();
+ $result = $Indexer->deletePage($page);
+ if ($result === "locked") {
+ if ($verbose) print("Indexer: locked".DOKU_LF);
+ return false;
+ }
+ @unlink($idxtag);
+ return $result;
+ }
+ $indexenabled = p_get_metadata($page, 'internal index', false);
+ if ($indexenabled === false) {
+ $result = false;
+ if (@file_exists($idxtag)) {
+ $Indexer = idx_get_indexer();
+ $result = $Indexer->deletePage($page);
+ if ($result === "locked") {
+ if ($verbose) print("Indexer: locked".DOKU_LF);
+ return false;
+ }
+ @unlink($idxtag);
+ }
+ if ($verbose) print("Indexer: index disabled for $page".DOKU_LF);
+ return $result;
+ }
+
$body = '';
$data = array($page, $body);
$evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
@@ -944,7 +1036,8 @@ function idx_addPage($page, $verbose=false) {
if ($result) {
$data = array('page' => $page, 'metadata' => array());
- if (($references = p_get_metadata($page, 'relation references')) !== null)
+ $data['metadata']['title'] = p_get_metadata($page, 'title', false);
+ if (($references = p_get_metadata($page, 'relation references', false)) !== null)
$data['metadata']['relation_references'] = array_keys($references);
$evt = new Doku_Event('INDEXER_METADATA_INDEX', $data);