2 files changed, 15 insertions, 3 deletions
diff --git a/_test/cases/inc/utf8_substr.test.php b/_test/cases/inc/utf8_substr.test.php
index 92bc65fe3..5cb93034a 100644
--- a/_test/cases/inc/utf8_substr.test.php
+++ b/_test/cases/inc/utf8_substr.test.php
@@ -26,5 +26,18 @@ class utf8_substr_test extends UnitTestCase {
         }
     }
 
+    function test2_bug891() {
+        // we test multiple cases here - format: in, offset, length, out
+        $tests   = array();
+
+        $str = str_pad('',strlen('в')*66000,'в').'@@';
+        $tests[] = array($str, 65600, 1, 'в');
+        $tests[] = array($str,0,66000,$str);
+
+        foreach($tests as $test){
+            $this->assertEqual(utf8_substr($test[0],$test[1],$test[2]),$test[3]);
+        }
+    }
+
 }
 //Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/fulltext.php b/inc/fulltext.php
index fa3ec05d2..280ba0c89 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -333,7 +333,6 @@ switch ($algorithm) {
       // establish context, 100 bytes surrounding the match string
       // first look to see if we can go 100 either side,
       // then drop to 50 adding any excess if the other side can't go to 50,
-      // NOTE: these are byte adjustments and will have to be corrected for utf-8
         $pre = min($utf8_idx-$utf8_offset,100);
         $post = min($len-$utf8_idx-$utf8_len,100);
 
@@ -371,9 +370,9 @@ switch ($algorithm) {
       } else {
         // code for strings too large for utf8_substr
         // use a larger context number as its bytes not characters
-        $pre = 70;
+        // no need to check for short pre, $idx is nearly 64k
         $post = min(strlen($text)-$idx-strlen($str), 70);
-        if ($post < 70) { $pre = 70 - $post; }
+        $pre = ($post < 70) ?  140 - $post : 70;
 
         $start = utf8_correctIdx($text,$idx - $pre);
         $end = utf8_correctIdx($text, $idx + strlen($str) + $post);