From 26eb848c36688e459e5dfc46cdc68d87ed2bda0c Mon Sep 17 00:00:00 2001 From: Gina Haeussge Date: Sat, 2 Oct 2010 18:04:09 +0200 Subject: FS#1353: Only highlight isolated occurences of search term, not those where it's part of another term. Word boundaries are now respected. --- inc/fulltext.php | 9 ++++++++- inc/html.php | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'inc') diff --git a/inc/fulltext.php b/inc/fulltext.php index e90205e9c..7dae183c8 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -313,7 +313,7 @@ function ft_snippet($id,$highlight){ $len = utf8_strlen($text); // build a regexp from the phrases to highlight - $re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')'; + $re1 = '('.join('|',array_map('_ft_snippet_re_preprocess', array_map('preg_quote_cb',array_filter((array) $highlight)))).')'; $re2 = "$re1.{0,75}(?!\\1)$re1"; $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1"; @@ -386,6 +386,13 @@ function ft_snippet($id,$highlight){ return $evdata['snippet']; } +/** + * Wraps a search term in regex boundary checks. + */ +function _ft_snippet_re_preprocess($term) { + return '\b'.$term.'\b'; +} + /** * Combine found documents and sum up their scores * diff --git a/inc/html.php b/inc/html.php index add559971..e1478b0a8 100644 --- a/inc/html.php +++ b/inc/html.php @@ -285,13 +285,20 @@ function html_draft(){ */ function html_hilight($html,$phrases){ $phrases = array_filter((array) $phrases); - $regex = join('|',array_map('preg_quote_cb',$phrases)); + $regex = join('|',array_map('_html_hilight_re_preprocess', array_map('preg_quote_cb',$phrases))); if ($regex === '') return $html; $html = preg_replace_callback("/((<[^>]*)|$regex)/ui",'html_hilight_callback',$html); return $html; } +/** + * Wraps a search term in regex boundary checks. + */ +function _html_hilight_re_preprocess($term) { + return '\b'.$term.'\b'; +} + /** * Callback used by html_hilight() * -- cgit v1.2.3