From 4f0030dd466f56b3dc0c864656fb1bf0e76d2932 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 6 Feb 2011 19:07:31 +0100 Subject: ignore soft-hyphens for search FS#2049 This makes it possible to find words that include soft-hyphens. However, search higlighting will not work and I have no idea how to make it work. --- inc/indexer.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 9cf079261..526c8db05 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -221,7 +221,14 @@ function idx_getPageWords($page){ list($page,$body) = $data; - $body = strtr($body, "\r\n\t", ' '); + $body = strtr($body, + array( + "\r" => ' ', + "\n" => ' ', + "\t" => ' ', + "\xC2\xAD" => '', //soft-hyphen + ) + ); $tokens = explode(' ', $body); $tokens = array_count_values($tokens); // count the frequency of each token -- cgit v1.2.3