summaryrefslogtreecommitdiff
path: root/inc
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2012-07-28 16:57:15 +0200
committerAndreas Gohr <andi@splitbrain.org>2012-07-28 16:57:15 +0200
commit84e581a6fc319435b4b46835b63cb4dccf05195b (patch)
tree803c9e7a55764bcf1cec9d5bb1527b467d88060a /inc
parent0483f97f57369aa0b4ba028a881a78fda540d2a9 (diff)
downloadrpg-84e581a6fc319435b4b46835b63cb4dccf05195b.tar.gz
rpg-84e581a6fc319435b4b46835b63cb4dccf05195b.tar.bz2
fix word boundary matching on broken platforms FS#2440
Seems like matching \b on unicode strings is unreliable across different platforms (Debian). Using Unicode class lookahed/behinds seems to work though.
Diffstat (limited to 'inc')
-rw-r--r--inc/fulltext.php11
1 files changed, 8 insertions, 3 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 8f4db111d..eab8850dc 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -394,19 +394,24 @@ function ft_snippet_re_preprocess($term) {
return $term;
}
+ // unicode word boundaries
+ // see http://stackoverflow.com/a/2449017/172068
+ $BL = '(?<!\pL)';
+ $BR = '(?!\pL)';
+
if(substr($term,0,2) == '\\*'){
$term = substr($term,2);
}else{
- $term = '\b'.$term;
+ $term = $BL.$term;
}
if(substr($term,-2,2) == '\\*'){
$term = substr($term,0,-2);
}else{
- $term = $term.'\b';
+ $term = $term.$BR;
}
- if($term == '\b' || $term == '\b\b') $term = '';
+ if($term == $BL || $term == $BR || $term == $BL.$BR) $term = '';
return $term;
}