diff options
author | chris <chris@jalakai.co.uk> | 2006-08-31 02:34:13 +0200 |
---|---|---|
committer | chris <chris@jalakai.co.uk> | 2006-08-31 02:34:13 +0200 |
commit | 9ee93076cf04a466d2e9620bc3efe538d93e5983 (patch) | |
tree | b488c990f3745e4125c1799a7520f7f55a045f7d /inc/indexer.php | |
parent | bc3e97becac1d4920baa2ff042cdd886e1080056 (diff) | |
download | rpg-9ee93076cf04a466d2e9620bc3efe538d93e5983.tar.gz rpg-9ee93076cf04a466d2e9620bc3efe538d93e5983.tar.bz2 |
search improvements
ft_snippet()
- make utf8 algorithm default
- add workaround for utf8_substr() limitations, bug #891
- fix some indexes which missed out on conversion to utf8
character counts
- minor improvements
idx_lookup()
- minor changes to wildcard matching code to improve performance
(changes based on profiling results)
utf8
- specifically set mb_internal_coding to utf-8 when mb_string
functions will be used.
darcs-hash:20060831003413-9b6ab-712021eda3c959ffe79d8d3fe91d2c9a8acf2b58.gz
Diffstat (limited to 'inc/indexer.php')
-rw-r--r-- | inc/indexer.php | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index 9af4b5b84..a2b7a0637 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -233,6 +233,8 @@ function idx_lookup($words){ if(substr($xword,0,1) == '*'){ $xword = substr($xword,1); $wild = 1; + $ptn = '/'.preg_quote($xword,'/').'$/'; +# $l = -1*strlen($xword)-1; } if(substr($xword,-1,1) == '*'){ $xword = substr($xword,0,-1); @@ -245,8 +247,11 @@ function idx_lookup($words){ for($wid=0; $wid<$cnt; $wid++){ $iword = $word_idx[$wid]; if( (($wild==3) && is_int(strpos($iword,$xword))) || - (($wild==1) && ("$xword\n" == substr($iword,(-1*strlen($xword))-1))) || - (($wild==2) && ($xword == substr($iword,0,strlen($xword)))) +# (($wild==1) && ("$xword\n" == substr($iword,$l))) || + (($wild==1) && preg_match($ptn,$iword)) || +# (($wild==2) && ($xword == substr($iword,0,strlen($xword)))) + (($wild==2) && (0 === strpos($iword,$xword))) + ){ $wids[] = $wid; $result[$word][] = $wid; |