diff options
author | Michael Hamann <michael@content-space.de> | 2011-02-02 22:41:58 +0100 |
---|---|---|
committer | Michael Hamann <michael@content-space.de> | 2011-02-02 22:41:58 +0100 |
commit | 89b8c518c85b307953db1a78ae207884375c14a0 (patch) | |
tree | 88d39e18c0e8d39a0845d25a84fdbb090f476bc9 | |
parent | fc756e0d4d88b37c01a9155e675a549430b00593 (diff) | |
parent | f078bb0088870b4b68b348d546afa30a80a07e87 (diff) | |
download | rpg-89b8c518c85b307953db1a78ae207884375c14a0.tar.gz rpg-89b8c518c85b307953db1a78ae207884375c14a0.tar.bz2 |
Merge remote-tracking branch 'my-fork/master' into indexer_improvements
-rw-r--r-- | inc/fulltext.php | 9 | ||||
-rw-r--r-- | inc/indexer.php | 121 |
2 files changed, 113 insertions, 17 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php index 805da2e37..1a525f743 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -130,7 +130,6 @@ function ft_backlinks($id){ $result = array(); $result = idx_get_indexer()->lookupKey('relation_references', $id); - $result = $result[$id]; if(!count($result)) return $result; @@ -234,8 +233,12 @@ function _ft_pageLookup(&$data){ $pages[$p_id] = p_get_first_heading($p_id, false); } } - //if ($in_title) - // $titles = $Indexer->lookupKey('title', "*$id*"); + if ($in_title) { + foreach ($Indexer->lookupKey('title', "*$id*") as $p_id) { + if (!isset($pages[$p_id])) + $pages[$p_id] = p_get_first_heading($p_id, false); + } + } } if (isset($ns)) { foreach ($page_idx as $p_id) { diff --git a/inc/indexer.php b/inc/indexer.php index 91d6842e4..3a0331302 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -320,6 +320,46 @@ class Doku_Indexer { * @author Tom N Harris <tnharris@whoopdedo.org> */ public function deletePage($page) { + if (!$this->_lock()) + return "locked"; + + // load known documents + $page_idx = $this->_getIndexKey('page', '', $page); + if ($page_idx === false) { + $this->_unlock(); + return false; + } + + // Remove obsolete index entries + $pageword_idx = $this->_getIndexKey('pageword', '', $pid); + if ($pageword_idx !== '') { + $delwords = explode(':',$pageword_idx); + $upwords = array(); + foreach ($delwords as $word) { + if ($word != '') { + list($wlen,$wid) = explode('*', $word); + $wid = (int)$wid; + $upwords[$wlen][] = $wid; + } + } + foreach ($upwords as $wlen => $widx) { + $index = $this->_getIndex('i', $wlen); + foreach ($widx as $wid) { + $index[$wid] = $this->_updateTuple($index[$wid], $pid, 0); + } + $this->_saveIndex('i', $wlen, $index); + } + } + // Save the reverse index + if (!$this->_saveIndexKey('pageword', '', $pid, "")) { + $this->_unlock(); + return false; + } + + // XXX TODO: delete meta keys + + $this->_unlock(); + return true; } /** @@ -423,7 +463,7 @@ class Doku_Indexer { * @param string $key name of the metadata key to look for * @param string $value search term to look for * @param callback $func comparison function - * @return array lists with page names, keys are query values + * @return array lists with page names, keys are query values if $key is array * @author Tom N Harris <tnharris@whoopdedo.org> * @author Michael Hamann <michael@content-space.de> */ @@ -436,15 +476,38 @@ class Doku_Indexer { // the matching ids for the provided value(s) $value_ids = array(); - if (!is_array($value)) $value = array($value); + if (!is_array($value)) + $value_array = array($value); + else + $value_array =& $value; - foreach ($value as $val) { - if (is_null($func)) { - if (($i = array_search($val, $words)) !== false) - $value_ids[$i] = $val; - } else { + if (!is_null($func)) { + foreach ($value_array as $val) { foreach ($words as $i => $word) { - if (call_user_func_array($func, array($word, $value))) + if (call_user_func_array($func, array($word, $val))) + $value_ids[$i] = $val; + } + } + } else { + foreach ($value_array as $val) { + $xval = $val; + $caret = false; + $dollar = false; + // check for wildcards + if (substr($xval, 0, 1) == '*') { + $xval = substr($xval, 1); + $caret = '^'; + } + if (substr($xval, -1, 1) == '*') { + $xval = substr($xval, 0, -1); + $dollar = '$'; + } + if ($caret || $dollar) { + $re = $caret.preg_quote($xval, '/').$dollar; + foreach(array_keys(preg_grep('/'.$re.'/', $words)) as $i) + $value_ids[$i] = $val; + } else { + if (($i = array_search($val, $words)) !== false) $value_ids[$i] = $val; } } @@ -462,6 +525,7 @@ class Doku_Indexer { // is an array with page_id => 1, page2_id => 1 etc. so take the keys only $result[$val] = array_keys($this->_parseTuples($page_idx, $lines[$value_id])); } + if (!is_array($value)) $result = $result[$value]; return $result; } @@ -492,12 +556,12 @@ class Doku_Indexer { // check for wildcards if (substr($xword, 0, 1) == '*') { $xword = substr($xword, 1); - $caret = true; + $caret = '^'; $wlen -= 1; } if (substr($xword, -1, 1) == '*') { $xword = substr($xword, 0, -1); - $dollar = true; + $dollar = '$'; $wlen -= 1; } if ($wlen < IDX_MINWORDLENGTH && !$caret && !$dollar && !is_numeric($xword)) @@ -505,9 +569,7 @@ class Doku_Indexer { if (!isset($tokens[$xword])) $tokenlength[$wlen][] = $xword; if ($caret || $dollar) { - $re = preg_quote($xword, '/'); - if ($caret) $re = '^'.$re; - if ($dollar) $re = $re.'$'; + $re = $caret.preg_quote($xword, '/').$dollar; $tokens[$xword][] = array($word, '/'.$re.'/'); if (!isset($tokenwild[$xword])) $tokenwild[$xword] = $wlen; @@ -926,6 +988,36 @@ function idx_addPage($page, $verbose=false) { } } + if (!page_exists($page)) { + if (!@file_exists($idxtag)) { + if ($verbose) print("Indexer: $page does not exist, ignoring".DOKU_LF); + return false; + } + $Indexer = idx_get_indexer(); + $result = $Indexer->deletePage($page); + if ($result === "locked") { + if ($verbose) print("Indexer: locked".DOKU_LF); + return false; + } + @unlink($idxtag); + return $result; + } + $indexenabled = p_get_metadata($page, 'internal index', false); + if ($indexenabled === false) { + $result = false; + if (@file_exists($idxtag)) { + $Indexer = idx_get_indexer(); + $result = $Indexer->deletePage($page); + if ($result === "locked") { + if ($verbose) print("Indexer: locked".DOKU_LF); + return false; + } + @unlink($idxtag); + } + if ($verbose) print("Indexer: index disabled for $page".DOKU_LF); + return $result; + } + $body = ''; $data = array($page, $body); $evt = new Doku_Event('INDEXER_PAGE_ADD', $data); @@ -944,7 +1036,8 @@ function idx_addPage($page, $verbose=false) { if ($result) { $data = array('page' => $page, 'metadata' => array()); - if (($references = p_get_metadata($page, 'relation references')) !== null) + $data['metadata']['title'] = p_get_metadata($page, 'title', false); + if (($references = p_get_metadata($page, 'relation references', false)) !== null) $data['metadata']['relation_references'] = array_keys($references); $evt = new Doku_Event('INDEXER_METADATA_INDEX', $data); |