summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Smith <chris.eureka@jalakai.co.uk>2008-12-07 17:11:29 +0100
committerChris Smith <chris.eureka@jalakai.co.uk>2008-12-07 17:11:29 +0100
commit33815ce27ad57e922146632ece1f6d9464db0225 (patch)
treed022b5e1dea05899deb606f75ba377de8a0c0b6a
parentab91b35c28866e6a69b28a3a57bfad235e2f79bb (diff)
downloadrpg-33815ce27ad57e922146632ece1f6d9464db0225.tar.gz
rpg-33815ce27ad57e922146632ece1f6d9464db0225.tar.bz2
Change search index min. token length to a define (IDX_MINWORDLENGTH)
Currently the min. token length is 3 (note, this doesn't apply to numeric tokens). The value set in inc/indexer.php can be overridden by defining IDX_MINWORDLENGTH elsewhere (e.g. conf/local.protected.php). darcs-hash:20081207161129-f07c6-6432947fe5d74666409d1e00222eaa489374c32f.gz
-rw-r--r--inc/indexer.php9
1 files changed, 6 insertions, 3 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index ff2d332dc..490ba1393 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -12,6 +12,9 @@
require_once(DOKU_INC.'inc/utf8.php');
require_once(DOKU_INC.'inc/parserutils.php');
+// set the minimum token length to use in the index (note, this doesn't apply to numeric tokens)
+if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',3);
+
// Asian characters are handled as words. The following regexp defines the
// Unicode-Ranges for Asian characters
// Ranges taken from http://en.wikipedia.org/wiki/Unicode_block
@@ -472,7 +475,7 @@ function idx_getIndexWordsSorted($words,&$result){
$wild |= 2;
$wlen -= 1;
}
- if ($wlen < 3 && $wild == 0 && !is_numeric($xword)) continue;
+ if ($wlen < IDX_MINWORDLENGTH && $wild == 0 && !is_numeric($xword)) continue;
if(!isset($tokens[$xword])){
$tokenlength[$wlen][] = $xword;
}
@@ -620,14 +623,14 @@ function idx_tokenizer($string,&$stopwords,$wc=false){
$arr = explode(' ', utf8_stripspecials($string,' ','\._\-:'.$wc));
foreach ($arr as $w) {
- if (!is_numeric($w) && strlen($w) < 3) continue;
+ if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) continue;
$w = utf8_strtolower($w);
if($stopwords && is_int(array_search("$w\n",$stopwords))) continue;
$words[] = $w;
}
}else{
$w = $string;
- if (!is_numeric($w) && strlen($w) < 3) return $words;
+ if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) return $words;
$w = strtolower($w);
if(is_int(array_search("$w\n",$stopwords))) return $words;
$words[] = $w;