From a23ac4d7df6b3952d2d1b76686d3b439fc0a6759 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Thu, 15 Mar 2012 14:21:42 +0100 Subject: Indexer: Check for deleted pages first FS#2469 This move the check if the page doesn't exist anymore but is still in the index before the check if the index needs to be updated as otherwise deleted pages won't be deleted from the index. --- inc/indexer.php | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 9d8d6f99b..3f56e5fe5 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1174,18 +1174,8 @@ function & idx_get_stopwords() { * @author Tom N Harris */ function idx_addPage($page, $verbose=false, $force=false) { - // check if indexing needed $idxtag = metaFN($page,'.indexed'); - if(!$force && @file_exists($idxtag)){ - if(trim(io_readFile($idxtag)) == idx_get_version()){ - $last = @filemtime($idxtag); - if($last > @filemtime(wikiFN($page))){ - if ($verbose) print("Indexer: index for $page up to date".DOKU_LF); - return false; - } - } - } - + // check if page was deleted but is still in the index if (!page_exists($page)) { if (!@file_exists($idxtag)) { if ($verbose) print("Indexer: $page does not exist, ignoring".DOKU_LF); @@ -1200,6 +1190,18 @@ function idx_addPage($page, $verbose=false, $force=false) { @unlink($idxtag); return $result; } + + // check if indexing needed + if(!$force && @file_exists($idxtag)){ + if(trim(io_readFile($idxtag)) == idx_get_version()){ + $last = @filemtime($idxtag); + if($last > @filemtime(wikiFN($page))){ + if ($verbose) print("Indexer: index for $page up to date".DOKU_LF); + return false; + } + } + } + $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); if ($indexenabled === false) { $result = false; -- cgit v1.2.3 From c55c59e3d67b1481e86b41e4b49083c99d3659c5 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Thu, 15 Mar 2012 14:56:48 +0100 Subject: Fix page deletion in the indexer This fixes page deletion in the indexer and fixes a but where empty lines were added to the _i.idx file of metadata for which no value was set (harmless, and wasn't executed anyway). --- inc/indexer.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 3f56e5fe5..6766bec25 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -351,7 +351,7 @@ class Doku_Indexer { return "locked"; // load known documents - $pid = $this->getIndexKey('page', '', $page); + $pid = $this->addIndexKey('page', '', $page); if ($pid === false) { $this->unlock(); return false; @@ -389,6 +389,7 @@ class Doku_Indexer { $val_idx = explode(':', $this->getIndexKey($metaname.'_p', '', $pid)); $meta_idx = $this->getIndex($metaname.'_i', ''); foreach ($val_idx as $id) { + if ($id === '') continue; $meta_idx[$id] = $this->updateTuple($meta_idx[$id], $pid, 0); } $this->saveIndex($metaname.'_i', '', $meta_idx); -- cgit v1.2.3 From 63703ba5bd81f50c43bc45f8bf79c514afa3ee49 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Fri, 16 Mar 2012 12:09:30 +0100 Subject: coding style updates --- inc/indexer.php | 1 - 1 file changed, 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 6766bec25..80d2651c2 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -291,7 +291,6 @@ class Doku_Indexer { $val_idx = array(); } - foreach ($values as $val) { $val = (string)$val; if ($val !== "") { -- cgit v1.2.3 From b3d1090ed94469e7a1f38c0352a8a4184a6ac846 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Wed, 25 Jul 2012 18:25:04 +0200 Subject: Fix type declarations in some PHPDoc comments --- inc/indexer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 80d2651c2..d3a10f7ad 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1131,7 +1131,7 @@ class Doku_Indexer { /** * Create an instance of the indexer. * - * @return object a Doku_Indexer + * @return Doku_Indexer a Doku_Indexer * @author Tom N Harris */ function idx_get_indexer() { -- cgit v1.2.3 From 92a5d12663ce0d2371a16a0b894f9796f020146c Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 28 Jul 2012 10:49:13 +0200 Subject: Remove unused variables --- inc/indexer.php | 4 ---- 1 file changed, 4 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index d3a10f7ad..d8768d3ab 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -65,7 +65,6 @@ define('IDX_ASIAN', '(?:'.IDX_ASIAN1.'|'.IDX_ASIAN2.'|'.IDX_ASIAN3.')'); function idx_get_version(){ static $indexer_version = null; if ($indexer_version == null) { - global $conf; $version = INDEXER_VERSION; // DokuWiki version is included for the convenience of plugins @@ -192,7 +191,6 @@ class Doku_Indexer { * @author Tom N Harris */ protected function getPageWords($text) { - global $conf; $tokens = $this->tokenizer($text); $tokens = array_count_values($tokens); // count the frequency of each token @@ -415,8 +413,6 @@ class Doku_Indexer { * @author Andreas Gohr */ public function tokenizer($text, $wc=false) { - global $conf; - $words = array(); $wc = ($wc) ? '' : '\*'; $stopwords =& idx_get_stopwords(); -- cgit v1.2.3 From 9a9b579a79463369319f9613a630625a99eeded0 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 28 Jul 2012 10:50:26 +0200 Subject: Fix index histogram minlen parameter for metadata keys This includes a test for the minlen parameter for metadata keys --- inc/indexer.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index d8768d3ab..c28499d68 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -755,13 +755,15 @@ class Doku_Indexer { $val_idx = array(); foreach ($index as $wid => $line) { $freq = $this->countTuples($line); - if ($freq >= $min && (!$max || $freq <= $max) && strlen($val) >= $minlen) + if ($freq >= $min && (!$max || $freq <= $max)) $val_idx[$wid] = $freq; } if (!empty($val_idx)) { $words = $this->getIndex($metaname.'_w', ''); - foreach ($val_idx as $wid => $freq) - $result[$words[$wid]] = $freq; + foreach ($val_idx as $wid => $freq) { + if (strlen($words[$wid]) >= $minlen) + $result[$words[$wid]] = $freq; + } } } else { -- cgit v1.2.3 From e3ab6fc5cbab1aaf365e73abaa3d91c03eebdd47 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 28 Jul 2012 11:06:10 +0200 Subject: Fixed and extended PHPDoc comments and added additional @var comments --- inc/indexer.php | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index c28499d68..321940508 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -463,8 +463,8 @@ class Doku_Indexer { * in the returned list is an array with the page names as keys and the * number of times that token appears on the page as value. * - * @param arrayref $tokens list of words to search for - * @return array list of page names with usage counts + * @param array $tokens list of words to search for + * @return array list of page names with usage counts * @author Tom N Harris * @author Andreas Gohr */ @@ -615,9 +615,9 @@ class Doku_Indexer { * The $result parameter can be used to merge the index locations with * the appropriate query term. * - * @param arrayref $words The query terms. - * @param arrayref $result Set to word => array("length*id" ...) - * @return array Set to length => array(id ...) + * @param array $words The query terms. + * @param array $result Set to word => array("length*id" ...) + * @return array Set to length => array(id ...) * @author Tom N Harris */ protected function getIndexWords(&$words, &$result) { @@ -728,7 +728,7 @@ class Doku_Indexer { * * @param int $min bottom frequency threshold * @param int $max upper frequency limit. No limit if $max<$min - * @param int $length minimum length of words to count + * @param int $minlen minimum length of words to count * @param string $key metadata key to list. Uses the fulltext index if not given * @return array list of words as the keys and frequency as values * @author Tom N Harris @@ -852,7 +852,8 @@ class Doku_Indexer { * * @param string $idx name of the index * @param string $suffix subpart identifier - * @param arrayref $linex list of lines without LF + * @param array $lines list of lines without LF + * @return bool If saving succeeded * @author Tom N Harris */ protected function saveIndex($idx, $suffix, &$lines) { @@ -902,6 +903,7 @@ class Doku_Indexer { * @param string $suffix subpart identifier * @param int $id the line number * @param string $line line to write + * @return bool If saving succeeded * @author Tom N Harris */ protected function saveIndexKey($idx, $suffix, $id, $line) { @@ -961,6 +963,11 @@ class Doku_Indexer { return $id; } + /** + * @param string $idx The index file which should be added to the key. + * @param string $suffix The suffix of the file + * @param bool $delete Unused + */ protected function cacheIndexDir($idx, $suffix, $delete=false) { global $conf; if ($idx == 'i') @@ -1263,8 +1270,8 @@ function idx_addPage($page, $verbose=false, $force=false) { * Important: No ACL checking is done here! All results are * returned, regardless of permissions * - * @param arrayref $words list of words to search for - * @return array list of pages found, associated with the search terms + * @param array $words list of words to search for + * @return array list of pages found, associated with the search terms */ function idx_lookup(&$words) { $Indexer = idx_get_indexer(); -- cgit v1.2.3 From 3e8e3ae62e43569a3dbe679e04d07744e045e472 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 28 Jul 2012 11:09:27 +0200 Subject: Fix inconsistent return values in the indexer and in getInfo() --- inc/indexer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 321940508..f22aee3a0 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1036,7 +1036,7 @@ class Doku_Indexer { $fh = @fopen($cachename.'.tmp', 'w'); if (!$fh) { trigger_error("Failed to write index cache", E_USER_ERROR); - return; + return $lengths; } @fwrite($fh, implode("\n", $lengths)); @fclose($fh); -- cgit v1.2.3