diff options
author | Andreas Gohr <andi@splitbrain.org> | 2005-10-17 22:04:31 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2005-10-17 22:04:31 +0200 |
commit | 15fa0b4f1d7eb4b260ff5b8025c56fce3681148e (patch) | |
tree | eb71f504982f49e497ada40bbbac35b24383a72a | |
parent | faa28b7fc56238d32739d949fd2df3bffe33b1ba (diff) | |
download | rpg-15fa0b4f1d7eb4b260ff5b8025c56fce3681148e.tar.gz rpg-15fa0b4f1d7eb4b260ff5b8025c56fce3681148e.tar.bz2 |
removed mb_string requirement in JSON.php #592
darcs-hash:20051017200431-7ad00-c9ed74326a44b078cc516e08515375d9d0fba811.gz
-rw-r--r-- | _test/cases/inc/utf8_utf16be.test.php | 28 | ||||
-rw-r--r-- | inc/JSON.php | 24 | ||||
-rw-r--r-- | inc/utf8.php | 31 |
3 files changed, 74 insertions, 9 deletions
diff --git a/_test/cases/inc/utf8_utf16be.test.php b/_test/cases/inc/utf8_utf16be.test.php new file mode 100644 index 000000000..2d79ca35a --- /dev/null +++ b/_test/cases/inc/utf8_utf16be.test.php @@ -0,0 +1,28 @@ +<?php + +require_once DOKU_INC.'inc/utf8.php'; + +// use no mbstring help here +define('UTF8_NOMBSTRING',1); + +class utf8_utf16be_test extends UnitTestCase { + // some chars from various code regions + var $utf8 = '鈩ℵŁöx'; + var $utf16 = "\x92\x29\x21\x35\x1\x41\x0\xf6\x0\x78"; + + /** + * Convert from UTF-8 to UTF-16BE + */ + function test_to16be(){ + $this->assertEqual(utf8_to_utf16be($this->utf8), $this->utf16); + } + + /** + * Convert from UTF-16BE to UTF-8 + */ + function test_from16be(){ + $this->assertEqual(utf16be_to_utf8($this->utf16),$this->utf8); + } +} + +//Setup VIM: ex: et ts=2 enc=utf-8 : diff --git a/inc/JSON.php b/inc/JSON.php index 6345743fc..2958d3419 100644 --- a/inc/JSON.php +++ b/inc/JSON.php @@ -57,6 +57,10 @@ * @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198 */ +// for DokuWiki +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); +require_once(DOKU_INC.'inc/utf8.php'); + /** * Marker constant for JSON::decode(), used to flag stack state */ @@ -191,7 +195,8 @@ class JSON // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c+1})); $c+=1; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -202,7 +207,8 @@ class JSON ord($var{$c+1}), ord($var{$c+2})); $c+=2; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -214,7 +220,8 @@ class JSON ord($var{$c+2}), ord($var{$c+3})); $c+=3; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -227,7 +234,8 @@ class JSON ord($var{$c+3}), ord($var{$c+4})); $c+=4; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; @@ -241,7 +249,8 @@ class JSON ord($var{$c+4}), ord($var{$c+5})); $c+=5; - $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); + $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; } @@ -411,7 +420,8 @@ class JSON // single, escaped unicode character $utf16 = chr(hexdec(substr($chrs, ($c+2), 2))) . chr(hexdec(substr($chrs, ($c+4), 2))); - $utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); + //$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); + $utf8 .= utf16be_to_utf8($utf16); $c+=5; } elseif(($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F)) { @@ -612,4 +622,4 @@ class JSON } -?>
\ No newline at end of file +?> diff --git a/inc/utf8.php b/inc/utf8.php index 79baf2df7..7e82e7cd2 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -377,7 +377,7 @@ function utf8_tohtml ($str) { * @link http://www.randomchaos.com/document.php?source=php_and_unicode * @see unicode_to_utf8() */ -function utf8_to_unicode( $str ) { +function utf8_to_unicode( &$str ) { $unicode = array(); $values = array(); $lookingFor = 1; @@ -408,7 +408,7 @@ function utf8_to_unicode( $str ) { * @link http://www.randomchaos.com/document.php?source=php_and_unicode * @see utf8_to_unicode() */ -function unicode_to_utf8( $str ) { +function unicode_to_utf8( &$str ) { $utf8 = ''; foreach( $str as $unicode ) { if ( $unicode < 128 ) { @@ -426,6 +426,33 @@ function unicode_to_utf8( $str ) { } /** + * UTF-8 to UTF-16BE conversion. + * + * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits + */ +function utf8_to_utf16be(&$str, $bom = false) { + $out = $bom ? "\xFE\xFF" : ''; + if(!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding')) + return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); + + $uni = utf8_to_unicode($str); + foreach($uni as $cp){ + $out .= pack('n',$cp); + } + return $out; +} + +/** + * UTF-8 to UTF-16BE conversion. + * + * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits + */ +function utf16be_to_utf8(&$str) { + $uni = unpack('n*',$str); + return unicode_to_utf8($uni); +} + +/** * UTF-8 Case lookup table * * This lookuptable defines the upper case letters to their correspponding |