From 1abfaba47f3c1c4352eb728afcd4b6871b7bb4a0 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Tue, 21 Feb 2006 22:26:05 +0100 Subject: fixes for utf-8 to/from unicode conversion The functions utf8_to unicode and unicode_to_utf8 didn't work correctly with some 3 and 4 byte strings. This exchanges those functions against two more sophisticated ones. It also adds unit testing for them. darcs-hash:20060221212605-7ad00-7bfefe8c9615d5a7f3b33c279ce79d4200d4778c.gz --- _test/cases/inc/utf8_unicode.test.php | 60 +++++++++++++++++++++++++++++++++++ _test/cases/inc/utf8_utf16be.test.php | 2 +- 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 _test/cases/inc/utf8_unicode.test.php (limited to '_test') diff --git a/_test/cases/inc/utf8_unicode.test.php b/_test/cases/inc/utf8_unicode.test.php new file mode 100644 index 000000000..453aad216 --- /dev/null +++ b/_test/cases/inc/utf8_unicode.test.php @@ -0,0 +1,60 @@ +assertEqual(utf8_to_unicode($in),$out); + } + + function test_from_2byte(){ + $in = "\xc3\xbc"; + $out = array(252); + $this->assertEqual(utf8_to_unicode($in),$out); + } + + function test_from_3byte(){ + $in = "\xe2\x99\x8a"; + $out = array(9802); + $this->assertEqual(utf8_to_unicode($in),$out); + } + + function test_from_4byte(){ + $in = "\xf4\x80\x80\x81"; + $out = array(1048577); + $this->assertEqual(utf8_to_unicode($in),$out); + } + + function test_to_1byte(){ + $out = 'a'; + $in = array(97); + $this->assertEqual(unicode_to_utf8($in),$out); + } + + function test_to_2byte(){ + $out = "\xc3\xbc"; + $in = array(252); + $this->assertEqual(unicode_to_utf8($in),$out); + } + + function test_to_3byte(){ + $out = "\xe2\x99\x8a"; + $in = array(9802); + $this->assertEqual(unicode_to_utf8($in),$out); + } + + function test_to_4byte(){ + $out = "\xf4\x80\x80\x81"; + $in = array(1048577); + $this->assertEqual(unicode_to_utf8($in),$out); + } + +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/_test/cases/inc/utf8_utf16be.test.php b/_test/cases/inc/utf8_utf16be.test.php index 2d79ca35a..3a346da51 100644 --- a/_test/cases/inc/utf8_utf16be.test.php +++ b/_test/cases/inc/utf8_utf16be.test.php @@ -3,7 +3,7 @@ require_once DOKU_INC.'inc/utf8.php'; // use no mbstring help here -define('UTF8_NOMBSTRING',1); +if(!defined('UTF8_NOMBSTRING')) define('UTF8_NOMBSTRING',1); class utf8_utf16be_test extends UnitTestCase { // some chars from various code regions -- cgit v1.2.3