From ef3e3cddddc43156f53958411f3396c9ef5dec60 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 15 Feb 2014 14:55:51 +0100 Subject: Only use the whole text as search snippet when it's the first match --- inc/fulltext.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index bd8e6b866..fdaa1a19b 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -333,7 +333,7 @@ function ft_snippet($id,$highlight){ $pre = min($pre,100-$post); } else if ($post>50) { $post = min($post, 100-$pre); - } else { + } else if ($offset == 0) { // both are less than 50, means the context is the whole string // make it so and break out of this loop - there is no need for the // complex snippet calculations -- cgit v1.2.3 From 43d58b76484489688582f407d73054a3d4ddcba1 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 15 Feb 2014 15:34:03 +0100 Subject: Fix the snippet search to continue after the previous match --- inc/fulltext.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index fdaa1a19b..87b5a7370 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -354,12 +354,12 @@ function ft_snippet($id,$highlight){ } // set $offset for next match attempt - // substract strlen to avoid splitting a potential search success, - // this is an approximation as the search pattern may match strings - // of varying length and it will fail if the context snippet - // boundary breaks a matching string longer than the current match - $utf8_offset = $utf8_idx + $post; - $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); + // continue matching after the current match + // if the current match is not the longest possible match starting at the current offset + // this prevents further matching of this snippet but for possible matches of length + // smaller than match length + context (at least 50 characters) this match is part of the context + $utf8_offset = $utf8_idx + $utf8_len; + $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$utf8_len)); $offset = utf8_correctIdx($text,$offset); } -- cgit v1.2.3 From a7e8b43e07d1665d3fdff8bde3a9850c0228efaf Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 15 Feb 2014 15:47:24 +0100 Subject: Add FULLTEXT_PHRASE_MATCH event for allowing plugins to match phrases Our index doesn't support phrase searches so we are searching for the pages that contain all words of the phrase and then search again in the content of the pages. As plugins can also add additional text to the index this event allows plugins to do phrase matching in their content. --- inc/fulltext.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index bd8e6b866..cc6742e5d 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -72,8 +72,20 @@ function _ft_pageSearch(&$data) { $pages = end($stack); $pages_matched = array(); foreach(array_keys($pages) as $id){ - $text = utf8_strtolower(rawWiki($id)); - if (strpos($text, $phrase) !== false) { + $evdata = array( + 'id' => $id, + 'phrase' => $phrase, + 'text' => rawWiki($id) + ); + $evt = new Doku_Event('FULLTEXT_PHRASE_MATCH',$evdata); + if ($evt->advise_before() && $evt->result !== true) { + $text = utf8_strtolower($evdata['text']); + if (strpos($text, $phrase) !== false) { + $evt->result = true; + } + } + $evt->advise_after(); + if ($evt->result === true) { $pages_matched[$id] = 0; // phrase: always 0 hit } } -- cgit v1.2.3