summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorTom N Harris <tnharris@whoopdedo.org>2011-02-25 17:56:21 -0500
committerTom N Harris <tnharris@whoopdedo.org>2011-02-25 17:56:21 -0500
commit675bf41fb9fe7d43646c3ff2de5a1e701818ed2c (patch)
tree186c32f0165261bd3fe06d3cadf911c111a6aae1 /inc/indexer.php
parent7233c152c0a107c0f12dbc09f5493022b264dddb (diff)
downloadrpg-675bf41fb9fe7d43646c3ff2de5a1e701818ed2c.tar.gz
rpg-675bf41fb9fe7d43646c3ff2de5a1e701818ed2c.tar.bz2
Reduce memory footprint of tokenizer; make returned arrays use contiguous keys
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php16
1 files changed, 8 insertions, 8 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index eaab7736a..6913dd4e3 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -439,14 +439,14 @@ class Doku_Indexer {
$text = utf8_stripspecials($text, ' ', '\._\-:'.$wc);
$wordlist = explode(' ', $text);
- foreach ($wordlist as $word) {
+ foreach ($wordlist as $i => &$word) {
$word = (preg_match('/[^0-9A-Za-z]/u', $word)) ?
utf8_strtolower($word) : strtolower($word);
- if (!is_numeric($word) && strlen($word) < IDX_MINWORDLENGTH) continue;
- if (array_search($word, $stopwords) !== false) continue;
- $words[] = $word;
+ if ((!is_numeric($word) && strlen($word) < IDX_MINWORDLENGTH)
+ || array_search($word, $stopwords) !== false)
+ unset($wordlist[$i]);
}
- return $words;
+ return array_values($wordlist);
}
/**
@@ -707,7 +707,7 @@ class Doku_Indexer {
array_splice($page_idx, count($title_idx));
foreach ($title_idx as $i => $title)
if ($title === "") unset($page_idx[$i]);
- return $page_idx;
+ return array_values($page_idx);
}
$pages = array();
@@ -1068,7 +1068,7 @@ class Doku_Indexer {
if ($line == '') return $result;
$parts = explode(':', $line);
foreach ($parts as $tuple) {
- if ($tuple == '') continue;
+ if ($tuple === '') continue;
list($key, $cnt) = explode('*', $tuple);
if (!$cnt) continue;
$key = $keys[$key];
@@ -1087,7 +1087,7 @@ class Doku_Indexer {
$freq = 0;
$parts = explode(':', $line);
foreach ($parts as $tuple) {
- if ($tuple == '') continue;
+ if ($tuple === '') continue;
list($pid, $cnt) = explode('*', $tuple);
$freq += (int)$cnt;
}