From 5a1d05488ca6e176af4d195accbdca8010cf3416 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Wed, 11 Dec 2013 23:02:17 +0100 Subject: Fix indexing numeric words in page contents Since a8dba4523d2ecd09dd69a68a36673eaf5c009c57 the search index didn't properly index numeric words anymore, instead they were added as new words to the word list each time they were indexed, leading to an ever-increasing index size. --- inc/indexer.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 658fb966b..07f29b542 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -10,7 +10,7 @@ if(!defined('DOKU_INC')) die('meh.'); // Version tag used to force rebuild on upgrade -define('INDEXER_VERSION', 7); +define('INDEXER_VERSION', 8); // set the minimum token length to use in the index (note, this doesn't apply to numeric tokens) if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',2); @@ -215,6 +215,7 @@ class Doku_Indexer { foreach (array_keys($words) as $wlen) { $word_idx = $this->getIndex('w', $wlen); foreach ($words[$wlen] as $word => $freq) { + $word = (string)$word; $wid = array_search($word, $word_idx, true); if ($wid === false) { $wid = count($word_idx); -- cgit v1.2.3 From 39134585c3f7a3c02113fb844ffabe0c1398c937 Mon Sep 17 00:00:00 2001 From: Christopher Smith Date: Wed, 5 Mar 2014 22:15:25 +0000 Subject: add some braces (just style, not E_ALL) --- inc/indexer.php | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 07f29b542..7c0b8bc18 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -304,15 +304,17 @@ class Doku_Indexer { $addwords = true; } // test if value is already in the index - if (isset($val_idx[$id]) && $val_idx[$id] <= 0) + if (isset($val_idx[$id]) && $val_idx[$id] <= 0){ $val_idx[$id] = 0; - else // else add it + } else { // else add it $val_idx[$id] = 1; + } } } - if ($addwords) + if ($addwords) { $this->saveIndex($metaname.'_w', '', $metawords); + } $vals_changed = false; foreach ($val_idx as $id => $action) { if ($action == -1) { @@ -1214,14 +1216,16 @@ class Doku_Indexer { */ protected function updateTuple($line, $id, $count) { $newLine = $line; - if ($newLine !== '') + if ($newLine !== ''){ $newLine = preg_replace('/(^|:)'.preg_quote($id,'/').'\*\d*/', '', $newLine); + } $newLine = trim($newLine, ':'); if ($count) { - if (strlen($newLine) > 0) + if (strlen($newLine) > 0) { return "$id*$count:".$newLine; - else + } else { return "$id*$count".$newLine; + } } return $newLine; } -- cgit v1.2.3 From e1ecd6fde3f52e917907ec1dffc774829934d0dd Mon Sep 17 00:00:00 2001 From: Christopher Smith Date: Thu, 6 Mar 2014 20:05:31 +0000 Subject: resolve notices generated in calls to updateTuple() when $metaidx[$id] doesn't exist --- inc/indexer.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 7c0b8bc18..a167db47f 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -270,8 +270,9 @@ class Doku_Indexer { // Special handling for titles so the index file is simpler if (array_key_exists('title', $key)) { $value = $key['title']; - if (is_array($value)) + if (is_array($value)) { $value = $value[0]; + } $this->saveIndexKey('title', '', $pid, $value); unset($key['title']); } @@ -299,8 +300,10 @@ class Doku_Indexer { if ($val !== "") { $id = array_search($val, $metawords, true); if ($id === false) { + // didn't find $val, so we'll add it to the end of metawords and create a placeholder in metaidx $id = count($metawords); $metawords[$id] = $val; + $metaidx[$id] = ''; $addwords = true; } // test if value is already in the index -- cgit v1.2.3 From c6568d42d53cab0d8fef498f00a3b9f30d7403ad Mon Sep 17 00:00:00 2001 From: Christopher Smith Date: Wed, 12 Mar 2014 18:20:33 +0000 Subject: Refactor updateTuple() The existing function was difficult to follow. It worked when adding a new tuple, but not in the most sensible manner. --- inc/indexer.php | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index a167db47f..5ca2f0bb1 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1218,19 +1218,18 @@ class Doku_Indexer { * @author Tom N Harris */ protected function updateTuple($line, $id, $count) { - $newLine = $line; - if ($newLine !== ''){ - $newLine = preg_replace('/(^|:)'.preg_quote($id,'/').'\*\d*/', '', $newLine); + if ($line != ''){ + $line = preg_replace('/(^|:)'.preg_quote($id,'/').'\*\d*/', '', $line); } - $newLine = trim($newLine, ':'); + $line = trim($line, ':'); if ($count) { - if (strlen($newLine) > 0) { - return "$id*$count:".$newLine; + if ($line) { + return "$id*$count:".$line; } else { - return "$id*$count".$newLine; + return "$id*$count"; } } - return $newLine; + return $line; } /** -- cgit v1.2.3