From 9ee93076cf04a466d2e9620bc3efe538d93e5983 Mon Sep 17 00:00:00 2001 From: chris Date: Thu, 31 Aug 2006 02:34:13 +0200 Subject: search improvements ft_snippet() - make utf8 algorithm default - add workaround for utf8_substr() limitations, bug #891 - fix some indexes which missed out on conversion to utf8 character counts - minor improvements idx_lookup() - minor changes to wildcard matching code to improve performance (changes based on profiling results) utf8 - specifically set mb_internal_coding to utf-8 when mb_string functions will be used. darcs-hash:20060831003413-9b6ab-712021eda3c959ffe79d8d3fe91d2c9a8acf2b58.gz --- inc/indexer.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 9af4b5b84..a2b7a0637 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -233,6 +233,8 @@ function idx_lookup($words){ if(substr($xword,0,1) == '*'){ $xword = substr($xword,1); $wild = 1; + $ptn = '/'.preg_quote($xword,'/').'$/'; +# $l = -1*strlen($xword)-1; } if(substr($xword,-1,1) == '*'){ $xword = substr($xword,0,-1); @@ -245,8 +247,11 @@ function idx_lookup($words){ for($wid=0; $wid<$cnt; $wid++){ $iword = $word_idx[$wid]; if( (($wild==3) && is_int(strpos($iword,$xword))) || - (($wild==1) && ("$xword\n" == substr($iword,(-1*strlen($xword))-1))) || - (($wild==2) && ($xword == substr($iword,0,strlen($xword)))) +# (($wild==1) && ("$xword\n" == substr($iword,$l))) || + (($wild==1) && preg_match($ptn,$iword)) || +# (($wild==2) && ($xword == substr($iword,0,strlen($xword)))) + (($wild==2) && (0 === strpos($iword,$xword))) + ){ $wids[] = $wid; $result[$word][] = $wid; -- cgit v1.2.3