summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchris <chris@jalakai.co.uk>2006-08-27 17:32:54 +0200
committerchris <chris@jalakai.co.uk>2006-08-27 17:32:54 +0200
commitf50163d1d772ac773f6d03ed049e4a0329674db9 (patch)
tree6444af39f7037eec981df00a1e01822064d5b074
parentced0762eb24c25ac27438d5bef2a6ff434e55dca (diff)
downloadrpg-f50163d1d772ac773f6d03ed049e4a0329674db9.tar.gz
rpg-f50163d1d772ac773f6d03ed049e4a0329674db9.tar.bz2
utf8_correctIdx bounds checking and more unittests
darcs-hash:20060827153254-9b6ab-3c76fde7cb5534ca12628e9aa6e6d59d9bb02f45.gz
-rw-r--r--_test/cases/inc/utf8_correctidx.test.php65
-rw-r--r--inc/utf8.php20
2 files changed, 71 insertions, 14 deletions
diff --git a/_test/cases/inc/utf8_correctidx.test.php b/_test/cases/inc/utf8_correctidx.test.php
index 1e7abf04a..d95ce9ae0 100644
--- a/_test/cases/inc/utf8_correctidx.test.php
+++ b/_test/cases/inc/utf8_correctidx.test.php
@@ -6,15 +6,68 @@ require_once DOKU_INC.'inc/utf8.php';
class utf8_correctidx_test extends UnitTestCase {
- function test1(){
+ function test_singlebyte(){
// we test multiple cases here - format: in, offset, length, out
$tests = array();
- $tests[] = array('живπά우리をあöä',1,false,0);
- $tests[] = array('живπά우리をあöä',2,false,2);
- $tests[] = array('живπά우리をあöä',1,true,2);
- $tests[] = array('живπά우리をあöä',0,false,0);
- $tests[] = array('живπά우리をあöä',2,true,2);
+ // single byte, should return current index
+ $tests[] = array('aaживπά우리をあöä',0,false,0);
+ $tests[] = array('aaживπά우리をあöä',1,false,1);
+ $tests[] = array('aaживπά우리をあöä',1,true,1);
+
+ foreach($tests as $test){
+ $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+ }
+ }
+
+ function test_twobyte(){
+ // we test multiple cases here - format: in, offset, length, out
+ $tests = array();
+
+ // two byte, should move to boundary, expect even number
+ $tests[] = array('aaживπά우리をあöä',2,false,2);
+ $tests[] = array('aaживπά우리をあöä',3,false,2);
+ $tests[] = array('aaживπά우리をあöä',4,false,4);
+
+ $tests[] = array('aaживπά우리をあöä',2,true,2);
+ $tests[] = array('aaживπά우리をあöä',3,true,4);
+ $tests[] = array('aaживπά우리をあöä',4,true,4);
+
+ foreach($tests as $test){
+ $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+ }
+ }
+
+ function test_threebyte(){
+ // we test multiple cases here - format: in, offset, length, out
+ $tests = array();
+
+ // three byte, should move to boundary 10 or 13
+ $tests[] = array('aaживπά우리をあöä',10,false,10);
+ $tests[] = array('aaживπά우리をあöä',11,false,10);
+ $tests[] = array('aaживπά우리をあöä',12,false,10);
+ $tests[] = array('aaживπά우리をあöä',13,false,13);
+
+ $tests[] = array('aaживπά우리をあöä',10,true,10);
+ $tests[] = array('aaживπά우리をあöä',11,true,13);
+ $tests[] = array('aaживπά우리をあöä',12,true,13);
+ $tests[] = array('aaживπά우리をあöä',13,true,13);
+
+ foreach($tests as $test){
+ $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
+ }
+ }
+
+ function test_bounds(){
+ // we test multiple cases here - format: in, offset, length, out
+ $tests = array();
+
+ // bounds checking
+ $tests[] = array('aaживπά우리をあöä',-2,false,0);
+ $tests[] = array('aaживπά우리をあöä',128,false,29);
+
+ $tests[] = array('aaживπά우리をあöä',-2,true,0);
+ $tests[] = array('aaживπά우리をあöä',128,true,29);
foreach($tests as $test){
$this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]);
diff --git a/inc/utf8.php b/inc/utf8.php
index 0323bed4b..ef056bfa4 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -776,15 +776,19 @@ function utf8_bad_replace($str, $replace = '') {
* @author chris smith <chris@jalakai.co.uk>
*/
function utf8_correctIdx(&$str,$i,$next=false) {
-
+
+ if ($i <= 0) return 0;
+
+ $limit = strlen($str);
+ if ($i>=$limit) return $limit;
+
if ($next) {
- $limit = strlen($str);
- while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++;
- } else {
- while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--;
- }
-
- return $i;
+ while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++;
+ } else {
+ while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--;
+ }
+
+ return $i;
}
// only needed if no mb_string available