From bbc85ee4bc98fadf89707309f923f8ae2c16f727 Mon Sep 17 00:00:00 2001 From: Tom N Harris Date: Mon, 24 Jan 2011 02:52:10 -0500 Subject: Indexer v3 Rewrite: streamline indexing of deleted or disabled pages --- inc/indexer.php | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 8859ada33..5f37ec46c 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -315,6 +315,46 @@ class Doku_Indexer { * @author Tom N Harris */ public function deletePage($page) { + if (!$this->_lock()) + return "locked"; + + // load known documents + $page_idx = $this->_getIndexKey('page', '', $page); + if ($page_idx === false) { + $this->_unlock(); + return false; + } + + // Remove obsolete index entries + $pageword_idx = $this->_getIndexKey('pageword', '', $pid); + if ($pageword_idx !== '') { + $delwords = explode(':',$pageword_idx); + $upwords = array(); + foreach ($delwords as $word) { + if ($word != '') { + list($wlen,$wid) = explode('*', $word); + $wid = (int)$wid; + $upwords[$wlen][] = $wid; + } + } + foreach ($upwords as $wlen => $widx) { + $index = $this->_getIndex('i', $wlen); + foreach ($widx as $wid) { + $index[$wid] = $this->_updateTuple($index[$wid], $pid, 0); + } + $this->_saveIndex('i', $wlen, $index); + } + } + // Save the reverse index + if (!$this->_saveIndexKey('pageword', '', $pid, "")) { + $this->_unlock(); + return false; + } + + // XXX TODO: delete meta keys + + $this->_unlock(); + return true; } /** @@ -921,6 +961,36 @@ function idx_addPage($page, $verbose=false) { } } + if (!page_exists($page)) { + if (!@file_exists($idxtag)) { + if ($verbose) print("Indexer: $page does not exist, ignoring".DOKU_LF); + return false; + } + $Indexer = idx_get_indexer(); + $result = $Indexer->deletePage($page); + if ($result === "locked") { + if ($verbose) print("Indexer: locked".DOKU_LF); + return false; + } + @unlink($idxtag); + return $result; + } + $indexenabled = p_get_metadata($page, 'internal index', false); + if ($indexenabled === false) { + $result = false; + if (@file_exists($idxtag)) { + $Indexer = idx_get_indexer(); + $result = $Indexer->deletePage($page); + if ($result === "locked") { + if ($verbose) print("Indexer: locked".DOKU_LF); + return false; + } + @unlink($idxtag); + } + if ($verbose) print("Indexer: index disabled for $page".DOKU_LF); + return $result; + } + $body = ''; $data = array($page, $body); $evt = new Doku_Event('INDEXER_PAGE_ADD', $data); @@ -939,7 +1009,8 @@ function idx_addPage($page, $verbose=false) { if ($result) { $data = array('page' => $page, 'metadata' => array()); - if (($references = p_get_metadata($page, 'relation references')) !== null) + $data['metadata']['title'] = p_get_metadata($page, 'title', false); + if (($references = p_get_metadata($page, 'relation references', false)) !== null) $data['metadata']['relation_references'] = array_keys($references); $evt = new Doku_Event('INDEXER_METADATA_INDEX', $data); -- cgit v1.2.3 From f078bb0088870b4b68b348d546afa30a80a07e87 Mon Sep 17 00:00:00 2001 From: Tom N Harris Date: Mon, 24 Jan 2011 03:46:11 -0500 Subject: Indexer Rewrite v3: wildcards in lookupKey and automatically unwrap single result --- inc/indexer.php | 48 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 5f37ec46c..6af2de15d 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -458,7 +458,7 @@ class Doku_Indexer { * @param string $key name of the metadata key to look for * @param string $value search term to look for * @param callback $func comparison function - * @return array lists with page names, keys are query values + * @return array lists with page names, keys are query values if $key is array * @author Tom N Harris * @author Michael Hamann */ @@ -471,15 +471,38 @@ class Doku_Indexer { // the matching ids for the provided value(s) $value_ids = array(); - if (!is_array($value)) $value = array($value); + if (!is_array($value)) + $value_array = array($value); + else + $value_array =& $value; - foreach ($value as $val) { - if (is_null($func)) { - if (($i = array_search($val, $words)) !== false) - $value_ids[$i] = $val; - } else { + if (!is_null($func)) { + foreach ($value_array as $val) { foreach ($words as $i => $word) { - if (call_user_func_array($func, array($word, $value))) + if (call_user_func_array($func, array($word, $val))) + $value_ids[$i] = $val; + } + } + } else { + foreach ($value_array as $val) { + $xval = $val; + $caret = false; + $dollar = false; + // check for wildcards + if (substr($xval, 0, 1) == '*') { + $xval = substr($xval, 1); + $caret = '^'; + } + if (substr($xval, -1, 1) == '*') { + $xval = substr($xval, 0, -1); + $dollar = '$'; + } + if ($caret || $dollar) { + $re = $caret.preg_quote($xval, '/').$dollar; + foreach(array_keys(preg_grep('/'.$re.'/', $words)) as $i) + $value_ids[$i] = $val; + } else { + if (($i = array_search($val, $words)) !== false) $value_ids[$i] = $val; } } @@ -497,6 +520,7 @@ class Doku_Indexer { // is an array with page_id => 1, page2_id => 1 etc. so take the keys only $result[$val] = array_keys($this->_parseTuples($page_idx, $lines[$value_id])); } + if (!is_array($value)) $result = $result[$value]; return $result; } @@ -527,12 +551,12 @@ class Doku_Indexer { // check for wildcards if (substr($xword, 0, 1) == '*') { $xword = substr($xword, 1); - $caret = true; + $caret = '^'; $wlen -= 1; } if (substr($xword, -1, 1) == '*') { $xword = substr($xword, 0, -1); - $dollar = true; + $dollar = '$'; $wlen -= 1; } if ($wlen < IDX_MINWORDLENGTH && !$caret && !$dollar && !is_numeric($xword)) @@ -540,9 +564,7 @@ class Doku_Indexer { if (!isset($tokens[$xword])) $tokenlength[$wlen][] = $xword; if ($caret || $dollar) { - $re = preg_quote($xword, '/'); - if ($caret) $re = '^'.$re; - if ($dollar) $re = $re.'$'; + $re = $caret.preg_quote($xword, '/').$dollar; $tokens[$xword][] = array($word, '/'.$re.'/'); if (!isset($tokenwild[$xword])) $tokenwild[$xword] = $wlen; -- cgit v1.2.3