summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchris <chris@jalakai.co.uk>2006-08-25 12:40:46 +0200
committerchris <chris@jalakai.co.uk>2006-08-25 12:40:46 +0200
commitbd2cb6fcf3ae405928c03419a189fba5d1a052e3 (patch)
treeaa28c882e924f11401daf4499252e1011ff7bd2b
parent0196c1c565fe3ea9d84cc1dff01580e911716a21 (diff)
downloadrpg-bd2cb6fcf3ae405928c03419a189fba5d1a052e3.tar.gz
rpg-bd2cb6fcf3ae405928c03419a189fba5d1a052e3.tar.bz2
ft_snippet optimisations
This patch includes two alternative algorithms for ft_snippet(), the code which prepares the snippets seen on the search page - and the most time consuming part of the production of that page. If you have $conf['allowdebug'] on, you can specify the search algorithm to use by adding &_search darcs-hash:20060825104046-9b6ab-942d81a43cf0f85bfd235cabf6c35dd4b20e0b71.gz
-rw-r--r--inc/fulltext.php66
1 files changed, 66 insertions, 0 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 7702711f5..a07adedd6 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -200,6 +200,18 @@ function ft_snippet($id,$poswords){
$poswords = preg_quote($poswords,'#');
$re = '('.str_replace(' ','|',$poswords).')';
$text = rawWiki($id);
+
+// extra code to allow selection of search algorithm - remove before release
+global $conf;
+$algorithm = '';
+if ($conf['allowdebug']) {
+ if (!empty($_REQUEST['_search'])) $algorithm = $_REQUEST['_search'];
+}
+
+switch ($algorithm) {
+ case 'orig' :
+// original code ... dokuwiki
+
//FIXME caseinsensitive matching doesn't work with UTF-8!?
preg_match_all('#(.{0,50})'.$re.'(.{0,50})#iu',$text,$matches,PREG_SET_ORDER);
@@ -214,6 +226,60 @@ function ft_snippet($id,$poswords){
if($cnt++ == 2) break;
}
+ break;
+
+ case 'opt1' :
+// my snippet algorithm, first cut ... CS 2006-08-25
+// reduce the impact of the original regex
+ $matches = array();
+ preg_match_all('#'.$re.'#iu',$text,$matches,PREG_OFFSET_CAPTURE|PREG_SET_ORDER);
+
+ $cnt = 3;
+ $snippets = array();
+ $len = strlen($text);
+ foreach ($matches as $match) {
+ list($str,$idx) = $match[0];
+ if ($idx < $end) continue;
+
+ $pre = min($idx,50);
+ $start = $idx - $pre;
+ $end = min($idx+100+strlen($str)-$pre,$len);
+ $snippets[] = substr($text,$start,$end-$start);
+ if (!(--$cnt)) break;
+ }
+
+ $m = "\1";
+ $snippets = preg_replace('#'.$re.'#iu',$m.'$1'.$m,$snippets);
+ $snippet = preg_replace('#'.$m.'([^'.$m.']*?)'.$m.'#iu','<span class="search_hit">$1</span>',hsc(join('... ',$snippets)));
+
+ break;
+
+ case 'opt2' :
+ default :
+// option 2 ... CS 2006-08-25
+// above + reduce amount of the file searched
+ $match = array();
+ $snippets = array();
+ $offset = 0;
+ $len = strlen($text);
+ for ($cnt=3; --$cnt;) {
+ if (!preg_match('#'.$re.'#iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) break;
+
+ list($str,$idx) = $match[0];
+
+ $pre = min($idx,50);
+ $start = $idx - $pre;
+ $end = min($idx+100+strlen($str)-$pre,$len);
+ $snippets[] = substr($text,$start,$end-$start);
+ $offset = $end;
+ }
+ $m = "\1";
+ $snippets = preg_replace('#'.$re.'#iu',$m.'$1'.$m,$snippets);
+ $snippet = preg_replace('#'.$m.'([^'.$m.']*?)'.$m.'#iu','<span class="search_hit">$1</span>',hsc(join('... ',$snippets)));
+
+ break;
+}
+
return $snippet;
}