summaryrefslogtreecommitdiff
path: root/inc/fulltext.php
diff options
context:
space:
mode:
authorChristopher Smith <chris@jalakai.co.uk>2014-03-02 21:00:08 +0000
committerChristopher Smith <chris@jalakai.co.uk>2014-03-02 21:00:08 +0000
commitf327a5f0a9ef72e6dd79d9d200f81c330aa4903c (patch)
tree2d2a398218964e3bee0876385f9c8786a3baf8de /inc/fulltext.php
parent446b5b5934799f1f906ea8903b1e96c981f1c1b2 (diff)
parent709fd92548efedbd4b4e5693097165d1dff072e4 (diff)
downloadrpg-f327a5f0a9ef72e6dd79d9d200f81c330aa4903c.tar.gz
rpg-f327a5f0a9ef72e6dd79d9d200f81c330aa4903c.tar.bz2
Merge branch 'master' into FS#2388
Diffstat (limited to 'inc/fulltext.php')
-rw-r--r--inc/fulltext.php30
1 files changed, 21 insertions, 9 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index bd8e6b866..dd918f214 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -72,8 +72,20 @@ function _ft_pageSearch(&$data) {
$pages = end($stack);
$pages_matched = array();
foreach(array_keys($pages) as $id){
- $text = utf8_strtolower(rawWiki($id));
- if (strpos($text, $phrase) !== false) {
+ $evdata = array(
+ 'id' => $id,
+ 'phrase' => $phrase,
+ 'text' => rawWiki($id)
+ );
+ $evt = new Doku_Event('FULLTEXT_PHRASE_MATCH',$evdata);
+ if ($evt->advise_before() && $evt->result !== true) {
+ $text = utf8_strtolower($evdata['text']);
+ if (strpos($text, $phrase) !== false) {
+ $evt->result = true;
+ }
+ }
+ $evt->advise_after();
+ if ($evt->result === true) {
$pages_matched[$id] = 0; // phrase: always 0 hit
}
}
@@ -333,7 +345,7 @@ function ft_snippet($id,$highlight){
$pre = min($pre,100-$post);
} else if ($post>50) {
$post = min($post, 100-$pre);
- } else {
+ } else if ($offset == 0) {
// both are less than 50, means the context is the whole string
// make it so and break out of this loop - there is no need for the
// complex snippet calculations
@@ -354,12 +366,12 @@ function ft_snippet($id,$highlight){
}
// set $offset for next match attempt
- // substract strlen to avoid splitting a potential search success,
- // this is an approximation as the search pattern may match strings
- // of varying length and it will fail if the context snippet
- // boundary breaks a matching string longer than the current match
- $utf8_offset = $utf8_idx + $post;
- $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
+ // continue matching after the current match
+ // if the current match is not the longest possible match starting at the current offset
+ // this prevents further matching of this snippet but for possible matches of length
+ // smaller than match length + context (at least 50 characters) this match is part of the context
+ $utf8_offset = $utf8_idx + $utf8_len;
+ $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$utf8_len));
$offset = utf8_correctIdx($text,$offset);
}