summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2011-02-06 19:07:31 +0100
committerAndreas Gohr <andi@splitbrain.org>2011-02-06 19:11:19 +0100
commit4f0030dd466f56b3dc0c864656fb1bf0e76d2932 (patch)
tree09291d0635244f84e673b8336a26f44d4d92d754 /inc/indexer.php
parent412b5df14aaa2104af3d82e77380c5321cd94389 (diff)
downloadrpg-4f0030dd466f56b3dc0c864656fb1bf0e76d2932.tar.gz
rpg-4f0030dd466f56b3dc0c864656fb1bf0e76d2932.tar.bz2
ignore soft-hyphens for search FS#2049
This makes it possible to find words that include soft-hyphens. However, search higlighting will not work and I have no idea how to make it work.
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php9
1 files changed, 8 insertions, 1 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 9cf079261..526c8db05 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -221,7 +221,14 @@ function idx_getPageWords($page){
list($page,$body) = $data;
- $body = strtr($body, "\r\n\t", ' ');
+ $body = strtr($body,
+ array(
+ "\r" => ' ',
+ "\n" => ' ',
+ "\t" => ' ',
+ "\xC2\xAD" => '', //soft-hyphen
+ )
+ );
$tokens = explode(' ', $body);
$tokens = array_count_values($tokens); // count the frequency of each token