summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchris <chris@jalakai.co.uk>2006-08-25 15:47:30 +0200
committerchris <chris@jalakai.co.uk>2006-08-25 15:47:30 +0200
commit95a12943a8ef44b4c16f72c9c1c7f5e28b1d60c4 (patch)
tree15af33ca6551768cc23ea32249eee64b66f8df8a
parentbd2cb6fcf3ae405928c03419a189fba5d1a052e3 (diff)
downloadrpg-95a12943a8ef44b4c16f72c9c1c7f5e28b1d60c4.tar.gz
rpg-95a12943a8ef44b4c16f72c9c1c7f5e28b1d60c4.tar.bz2
update to previous ft_snippet() patch, improve snippet text selection
darcs-hash:20060825134730-9b6ab-086ee0647af39c4398cf1726324d8215722a39db.gz
-rw-r--r--inc/fulltext.php37
1 files changed, 31 insertions, 6 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index a07adedd6..3943bdae4 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -245,7 +245,7 @@ switch ($algorithm) {
$start = $idx - $pre;
$end = min($idx+100+strlen($str)-$pre,$len);
$snippets[] = substr($text,$start,$end-$start);
- if (!(--$cnt)) break;
+ if (!($cnt--)) break;
}
$m = "\1";
@@ -262,16 +262,41 @@ switch ($algorithm) {
$snippets = array();
$offset = 0;
$len = strlen($text);
- for ($cnt=3; --$cnt;) {
+ for ($cnt=3; $cnt--;) {
if (!preg_match('#'.$re.'#iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) break;
list($str,$idx) = $match[0];
- $pre = min($idx,50);
+ // establish context, 100 characters surrounding the match string
+ // first look to see if we can go 100 either side,
+ // then drop to 50 adding any excess if the other side can't go to 50.
+ $pre = min($idx-$offset,100);
+ $post = min($len-$idx-strlen($str),100);
+
+ if ($pre>50 && $post>50) {
+ $pre = $post = 50;
+ } else if ($pre>50) {
+ $pre = min($pre,100-$post);
+ } else if ($post>50) {
+ $post = min($post, 100-$pre);
+ }
+
+ // establish context start and end points, try to append to previous context if possible
$start = $idx - $pre;
- $end = min($idx+100+strlen($str)-$pre,$len);
- $snippets[] = substr($text,$start,$end-$start);
- $offset = $end;
+ $append = ($start < $end) ? $end : false; // still the end of the previous context snippet
+ $end = $idx + strlen($str) + $post; // now set it to the end of this context
+
+ if ($append) {
+ $snippets[count($snippets)-1] .= substr($text,$append,$end-$append);
+ } else {
+ $snippets[] = substr($text,$start,$end-$start);
+ }
+
+ // set $offset for next match attempt
+ // substract strlen to avoid splitting a potential search success, this is an approximation as the
+ // search pattern may match strings of varying length and it will fail if the context snippet
+ // boundary breaks a matching string longer than the current match
+ $offset = $end - strlen($str);
}
$m = "\1";
$snippets = preg_replace('#'.$re.'#iu',$m.'$1'.$m,$snippets);