From 26eb848c36688e459e5dfc46cdc68d87ed2bda0c Mon Sep 17 00:00:00 2001
From: Gina Haeussge <gina@foosel.net>
Date: Sat, 2 Oct 2010 18:04:09 +0200
Subject: FS#1353: Only highlight isolated occurences of search term, not those
 where it's part of another term. Word boundaries are now respected.

---
 inc/fulltext.php | 9 ++++++++-
 inc/html.php     | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'inc')

diff --git a/inc/fulltext.php b/inc/fulltext.php
index e90205e9c..7dae183c8 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -313,7 +313,7 @@ function ft_snippet($id,$highlight){
         $len = utf8_strlen($text);
 
         // build a regexp from the phrases to highlight
-        $re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')';
+        $re1 = '('.join('|',array_map('_ft_snippet_re_preprocess', array_map('preg_quote_cb',array_filter((array) $highlight)))).')';
         $re2 = "$re1.{0,75}(?!\\1)$re1";
         $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
 
@@ -386,6 +386,13 @@ function ft_snippet($id,$highlight){
     return $evdata['snippet'];
 }
 
+/**
+ * Wraps a search term in regex boundary checks.
+ */
+function _ft_snippet_re_preprocess($term) {
+    return '\b'.$term.'\b';
+}
+
 /**
  * Combine found documents and sum up their scores
  *
diff --git a/inc/html.php b/inc/html.php
index add559971..e1478b0a8 100644
--- a/inc/html.php
+++ b/inc/html.php
@@ -285,13 +285,20 @@ function html_draft(){
  */
 function html_hilight($html,$phrases){
     $phrases = array_filter((array) $phrases);
-    $regex = join('|',array_map('preg_quote_cb',$phrases));
+    $regex = join('|',array_map('_html_hilight_re_preprocess', array_map('preg_quote_cb',$phrases)));
 
     if ($regex === '') return $html;
     $html = preg_replace_callback("/((<[^>]*)|$regex)/ui",'html_hilight_callback',$html);
     return $html;
 }
 
+/**
+ * Wraps a search term in regex boundary checks.
+ */
+function _html_hilight_re_preprocess($term) {
+    return '\b'.$term.'\b';
+}
+
 /**
  * Callback used by html_hilight()
  *
-- 
cgit v1.2.3