diff options
author | chris <chris@jalakai.co.uk> | 2006-08-27 17:32:54 +0200 |
---|---|---|
committer | chris <chris@jalakai.co.uk> | 2006-08-27 17:32:54 +0200 |
commit | f50163d1d772ac773f6d03ed049e4a0329674db9 (patch) | |
tree | 6444af39f7037eec981df00a1e01822064d5b074 | |
parent | ced0762eb24c25ac27438d5bef2a6ff434e55dca (diff) | |
download | rpg-f50163d1d772ac773f6d03ed049e4a0329674db9.tar.gz rpg-f50163d1d772ac773f6d03ed049e4a0329674db9.tar.bz2 |
utf8_correctIdx bounds checking and more unittests
darcs-hash:20060827153254-9b6ab-3c76fde7cb5534ca12628e9aa6e6d59d9bb02f45.gz
-rw-r--r-- | _test/cases/inc/utf8_correctidx.test.php | 65 | ||||
-rw-r--r-- | inc/utf8.php | 20 |
2 files changed, 71 insertions, 14 deletions
diff --git a/_test/cases/inc/utf8_correctidx.test.php b/_test/cases/inc/utf8_correctidx.test.php index 1e7abf04a..d95ce9ae0 100644 --- a/_test/cases/inc/utf8_correctidx.test.php +++ b/_test/cases/inc/utf8_correctidx.test.php @@ -6,15 +6,68 @@ require_once DOKU_INC.'inc/utf8.php'; class utf8_correctidx_test extends UnitTestCase { - function test1(){ + function test_singlebyte(){ // we test multiple cases here - format: in, offset, length, out $tests = array(); - $tests[] = array('живπά우리をあöä',1,false,0); - $tests[] = array('живπά우리をあöä',2,false,2); - $tests[] = array('живπά우리をあöä',1,true,2); - $tests[] = array('живπά우리をあöä',0,false,0); - $tests[] = array('живπά우리をあöä',2,true,2); + // single byte, should return current index + $tests[] = array('aaживπά우리をあöä',0,false,0); + $tests[] = array('aaживπά우리をあöä',1,false,1); + $tests[] = array('aaживπά우리をあöä',1,true,1); + + foreach($tests as $test){ + $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); + } + } + + function test_twobyte(){ + // we test multiple cases here - format: in, offset, length, out + $tests = array(); + + // two byte, should move to boundary, expect even number + $tests[] = array('aaживπά우리をあöä',2,false,2); + $tests[] = array('aaживπά우리をあöä',3,false,2); + $tests[] = array('aaживπά우리をあöä',4,false,4); + + $tests[] = array('aaживπά우리をあöä',2,true,2); + $tests[] = array('aaживπά우리をあöä',3,true,4); + $tests[] = array('aaживπά우리をあöä',4,true,4); + + foreach($tests as $test){ + $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); + } + } + + function test_threebyte(){ + // we test multiple cases here - format: in, offset, length, out + $tests = array(); + + // three byte, should move to boundary 10 or 13 + $tests[] = array('aaживπά우리をあöä',10,false,10); + $tests[] = array('aaживπά우리をあöä',11,false,10); + $tests[] = array('aaживπά우리をあöä',12,false,10); + $tests[] = array('aaживπά우리をあöä',13,false,13); + + $tests[] = array('aaживπά우리をあöä',10,true,10); + $tests[] = array('aaживπά우리をあöä',11,true,13); + $tests[] = array('aaживπά우리をあöä',12,true,13); + $tests[] = array('aaживπά우리をあöä',13,true,13); + + foreach($tests as $test){ + $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); + } + } + + function test_bounds(){ + // we test multiple cases here - format: in, offset, length, out + $tests = array(); + + // bounds checking + $tests[] = array('aaживπά우리をあöä',-2,false,0); + $tests[] = array('aaживπά우리をあöä',128,false,29); + + $tests[] = array('aaживπά우리をあöä',-2,true,0); + $tests[] = array('aaживπά우리をあöä',128,true,29); foreach($tests as $test){ $this->assertEqual(utf8_correctIdx($test[0],$test[1],$test[2]),$test[3]); diff --git a/inc/utf8.php b/inc/utf8.php index 0323bed4b..ef056bfa4 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -776,15 +776,19 @@ function utf8_bad_replace($str, $replace = '') { * @author chris smith <chris@jalakai.co.uk> */ function utf8_correctIdx(&$str,$i,$next=false) { - + + if ($i <= 0) return 0; + + $limit = strlen($str); + if ($i>=$limit) return $limit; + if ($next) { - $limit = strlen($str); - while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; - } else { - while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; - } - - return $i; + while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; + } else { + while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; + } + + return $i; } // only needed if no mb_string available |