summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorchris <chris@jalakai.co.uk>2006-08-31 02:34:13 +0200
committerchris <chris@jalakai.co.uk>2006-08-31 02:34:13 +0200
commit9ee93076cf04a466d2e9620bc3efe538d93e5983 (patch)
treeb488c990f3745e4125c1799a7520f7f55a045f7d /inc/indexer.php
parentbc3e97becac1d4920baa2ff042cdd886e1080056 (diff)
downloadrpg-9ee93076cf04a466d2e9620bc3efe538d93e5983.tar.gz
rpg-9ee93076cf04a466d2e9620bc3efe538d93e5983.tar.bz2
search improvements
ft_snippet() - make utf8 algorithm default - add workaround for utf8_substr() limitations, bug #891 - fix some indexes which missed out on conversion to utf8 character counts - minor improvements idx_lookup() - minor changes to wildcard matching code to improve performance (changes based on profiling results) utf8 - specifically set mb_internal_coding to utf-8 when mb_string functions will be used. darcs-hash:20060831003413-9b6ab-712021eda3c959ffe79d8d3fe91d2c9a8acf2b58.gz
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php9
1 files changed, 7 insertions, 2 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 9af4b5b84..a2b7a0637 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -233,6 +233,8 @@ function idx_lookup($words){
if(substr($xword,0,1) == '*'){
$xword = substr($xword,1);
$wild = 1;
+ $ptn = '/'.preg_quote($xword,'/').'$/';
+# $l = -1*strlen($xword)-1;
}
if(substr($xword,-1,1) == '*'){
$xword = substr($xword,0,-1);
@@ -245,8 +247,11 @@ function idx_lookup($words){
for($wid=0; $wid<$cnt; $wid++){
$iword = $word_idx[$wid];
if( (($wild==3) && is_int(strpos($iword,$xword))) ||
- (($wild==1) && ("$xword\n" == substr($iword,(-1*strlen($xword))-1))) ||
- (($wild==2) && ($xword == substr($iword,0,strlen($xword))))
+# (($wild==1) && ("$xword\n" == substr($iword,$l))) ||
+ (($wild==1) && preg_match($ptn,$iword)) ||
+# (($wild==2) && ($xword == substr($iword,0,strlen($xword))))
+ (($wild==2) && (0 === strpos($iword,$xword)))
+
){
$wids[] = $wid;
$result[$word][] = $wid;