summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--_test/cases/inc/utf8_substr.test.php13
-rw-r--r--inc/fulltext.php5
2 files changed, 15 insertions, 3 deletions
diff --git a/_test/cases/inc/utf8_substr.test.php b/_test/cases/inc/utf8_substr.test.php
index 92bc65fe3..5cb93034a 100644
--- a/_test/cases/inc/utf8_substr.test.php
+++ b/_test/cases/inc/utf8_substr.test.php
@@ -26,5 +26,18 @@ class utf8_substr_test extends UnitTestCase {
}
}
+ function test2_bug891() {
+ // we test multiple cases here - format: in, offset, length, out
+ $tests = array();
+
+ $str = str_pad('',strlen('в')*66000,'в').'@@';
+ $tests[] = array($str, 65600, 1, 'в');
+ $tests[] = array($str,0,66000,$str);
+
+ foreach($tests as $test){
+ $this->assertEqual(utf8_substr($test[0],$test[1],$test[2]),$test[3]);
+ }
+ }
+
}
//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/fulltext.php b/inc/fulltext.php
index fa3ec05d2..280ba0c89 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -333,7 +333,6 @@ switch ($algorithm) {
// establish context, 100 bytes surrounding the match string
// first look to see if we can go 100 either side,
// then drop to 50 adding any excess if the other side can't go to 50,
- // NOTE: these are byte adjustments and will have to be corrected for utf-8
$pre = min($utf8_idx-$utf8_offset,100);
$post = min($len-$utf8_idx-$utf8_len,100);
@@ -371,9 +370,9 @@ switch ($algorithm) {
} else {
// code for strings too large for utf8_substr
// use a larger context number as its bytes not characters
- $pre = 70;
+ // no need to check for short pre, $idx is nearly 64k
$post = min(strlen($text)-$idx-strlen($str), 70);
- if ($post < 70) { $pre = 70 - $post; }
+ $pre = ($post < 70) ? 140 - $post : 70;
$start = utf8_correctIdx($text,$idx - $pre);
$end = utf8_correctIdx($text, $idx + strlen($str) + $post);