diff options
author | Andreas Gohr <andi@splitbrain.org> | 2012-07-28 16:57:15 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2012-07-28 16:57:15 +0200 |
commit | 84e581a6fc319435b4b46835b63cb4dccf05195b (patch) | |
tree | 803c9e7a55764bcf1cec9d5bb1527b467d88060a /inc/fulltext.php | |
parent | 0483f97f57369aa0b4ba028a881a78fda540d2a9 (diff) | |
download | rpg-84e581a6fc319435b4b46835b63cb4dccf05195b.tar.gz rpg-84e581a6fc319435b4b46835b63cb4dccf05195b.tar.bz2 |
fix word boundary matching on broken platforms FS#2440
Seems like matching \b on unicode strings is unreliable across different
platforms (Debian). Using Unicode class lookahed/behinds seems to work
though.
Diffstat (limited to 'inc/fulltext.php')
-rw-r--r-- | inc/fulltext.php | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php index 8f4db111d..eab8850dc 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -394,19 +394,24 @@ function ft_snippet_re_preprocess($term) { return $term; } + // unicode word boundaries + // see http://stackoverflow.com/a/2449017/172068 + $BL = '(?<!\pL)'; + $BR = '(?!\pL)'; + if(substr($term,0,2) == '\\*'){ $term = substr($term,2); }else{ - $term = '\b'.$term; + $term = $BL.$term; } if(substr($term,-2,2) == '\\*'){ $term = substr($term,0,-2); }else{ - $term = $term.'\b'; + $term = $term.$BR; } - if($term == '\b' || $term == '\b\b') $term = ''; + if($term == $BL || $term == $BR || $term == $BL.$BR) $term = ''; return $term; } |