diff options
author | chris <chris@jalakai.co.uk> | 2006-08-25 12:40:46 +0200 |
---|---|---|
committer | chris <chris@jalakai.co.uk> | 2006-08-25 12:40:46 +0200 |
commit | bd2cb6fcf3ae405928c03419a189fba5d1a052e3 (patch) | |
tree | aa28c882e924f11401daf4499252e1011ff7bd2b /inc/fulltext.php | |
parent | 0196c1c565fe3ea9d84cc1dff01580e911716a21 (diff) | |
download | rpg-bd2cb6fcf3ae405928c03419a189fba5d1a052e3.tar.gz rpg-bd2cb6fcf3ae405928c03419a189fba5d1a052e3.tar.bz2 |
ft_snippet optimisations
This patch includes two alternative algorithms for ft_snippet(), the code
which prepares the snippets seen on the search page - and the most time
consuming part of the production of that page.
If you have $conf['allowdebug'] on, you can specify the search algorithm to
use by adding &_search
darcs-hash:20060825104046-9b6ab-942d81a43cf0f85bfd235cabf6c35dd4b20e0b71.gz
Diffstat (limited to 'inc/fulltext.php')
-rw-r--r-- | inc/fulltext.php | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php index 7702711f5..a07adedd6 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -200,6 +200,18 @@ function ft_snippet($id,$poswords){ $poswords = preg_quote($poswords,'#'); $re = '('.str_replace(' ','|',$poswords).')'; $text = rawWiki($id); + +// extra code to allow selection of search algorithm - remove before release +global $conf; +$algorithm = ''; +if ($conf['allowdebug']) { + if (!empty($_REQUEST['_search'])) $algorithm = $_REQUEST['_search']; +} + +switch ($algorithm) { + case 'orig' : +// original code ... dokuwiki + //FIXME caseinsensitive matching doesn't work with UTF-8!? preg_match_all('#(.{0,50})'.$re.'(.{0,50})#iu',$text,$matches,PREG_SET_ORDER); @@ -214,6 +226,60 @@ function ft_snippet($id,$poswords){ if($cnt++ == 2) break; } + break; + + case 'opt1' : +// my snippet algorithm, first cut ... CS 2006-08-25 +// reduce the impact of the original regex + $matches = array(); + preg_match_all('#'.$re.'#iu',$text,$matches,PREG_OFFSET_CAPTURE|PREG_SET_ORDER); + + $cnt = 3; + $snippets = array(); + $len = strlen($text); + foreach ($matches as $match) { + list($str,$idx) = $match[0]; + if ($idx < $end) continue; + + $pre = min($idx,50); + $start = $idx - $pre; + $end = min($idx+100+strlen($str)-$pre,$len); + $snippets[] = substr($text,$start,$end-$start); + if (!(--$cnt)) break; + } + + $m = "\1"; + $snippets = preg_replace('#'.$re.'#iu',$m.'$1'.$m,$snippets); + $snippet = preg_replace('#'.$m.'([^'.$m.']*?)'.$m.'#iu','<span class="search_hit">$1</span>',hsc(join('... ',$snippets))); + + break; + + case 'opt2' : + default : +// option 2 ... CS 2006-08-25 +// above + reduce amount of the file searched + $match = array(); + $snippets = array(); + $offset = 0; + $len = strlen($text); + for ($cnt=3; --$cnt;) { + if (!preg_match('#'.$re.'#iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) break; + + list($str,$idx) = $match[0]; + + $pre = min($idx,50); + $start = $idx - $pre; + $end = min($idx+100+strlen($str)-$pre,$len); + $snippets[] = substr($text,$start,$end-$start); + $offset = $end; + } + $m = "\1"; + $snippets = preg_replace('#'.$re.'#iu',$m.'$1'.$m,$snippets); + $snippet = preg_replace('#'.$m.'([^'.$m.']*?)'.$m.'#iu','<span class="search_hit">$1</span>',hsc(join('... ',$snippets))); + + break; +} + return $snippet; } |