From ef3e3cddddc43156f53958411f3396c9ef5dec60 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 15 Feb 2014 14:55:51 +0100 Subject: Only use the whole text as search snippet when it's the first match --- inc/fulltext.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/fulltext.php b/inc/fulltext.php index bd8e6b866..fdaa1a19b 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -333,7 +333,7 @@ function ft_snippet($id,$highlight){ $pre = min($pre,100-$post); } else if ($post>50) { $post = min($post, 100-$pre); - } else { + } else if ($offset == 0) { // both are less than 50, means the context is the whole string // make it so and break out of this loop - there is no need for the // complex snippet calculations -- cgit v1.2.3 From 43d58b76484489688582f407d73054a3d4ddcba1 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 15 Feb 2014 15:34:03 +0100 Subject: Fix the snippet search to continue after the previous match --- inc/fulltext.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inc/fulltext.php b/inc/fulltext.php index fdaa1a19b..87b5a7370 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -354,12 +354,12 @@ function ft_snippet($id,$highlight){ } // set $offset for next match attempt - // substract strlen to avoid splitting a potential search success, - // this is an approximation as the search pattern may match strings - // of varying length and it will fail if the context snippet - // boundary breaks a matching string longer than the current match - $utf8_offset = $utf8_idx + $post; - $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); + // continue matching after the current match + // if the current match is not the longest possible match starting at the current offset + // this prevents further matching of this snippet but for possible matches of length + // smaller than match length + context (at least 50 characters) this match is part of the context + $utf8_offset = $utf8_idx + $utf8_len; + $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$utf8_len)); $offset = utf8_correctIdx($text,$offset); } -- cgit v1.2.3