summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--_test/cases/inc/utf8_utf16be.test.php28
-rw-r--r--inc/JSON.php24
-rw-r--r--inc/utf8.php31
3 files changed, 74 insertions, 9 deletions
diff --git a/_test/cases/inc/utf8_utf16be.test.php b/_test/cases/inc/utf8_utf16be.test.php
new file mode 100644
index 000000000..2d79ca35a
--- /dev/null
+++ b/_test/cases/inc/utf8_utf16be.test.php
@@ -0,0 +1,28 @@
+<?php
+
+require_once DOKU_INC.'inc/utf8.php';
+
+// use no mbstring help here
+define('UTF8_NOMBSTRING',1);
+
+class utf8_utf16be_test extends UnitTestCase {
+ // some chars from various code regions
+ var $utf8 = '鈩ℵŁöx';
+ var $utf16 = "\x92\x29\x21\x35\x1\x41\x0\xf6\x0\x78";
+
+ /**
+ * Convert from UTF-8 to UTF-16BE
+ */
+ function test_to16be(){
+ $this->assertEqual(utf8_to_utf16be($this->utf8), $this->utf16);
+ }
+
+ /**
+ * Convert from UTF-16BE to UTF-8
+ */
+ function test_from16be(){
+ $this->assertEqual(utf16be_to_utf8($this->utf16),$this->utf8);
+ }
+}
+
+//Setup VIM: ex: et ts=2 enc=utf-8 :
diff --git a/inc/JSON.php b/inc/JSON.php
index 6345743fc..2958d3419 100644
--- a/inc/JSON.php
+++ b/inc/JSON.php
@@ -57,6 +57,10 @@
* @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
*/
+// for DokuWiki
+if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
+require_once(DOKU_INC.'inc/utf8.php');
+
/**
* Marker constant for JSON::decode(), used to flag stack state
*/
@@ -191,7 +195,8 @@ class JSON
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c, ord($var{$c+1}));
$c+=1;
- $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ $utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
@@ -202,7 +207,8 @@ class JSON
ord($var{$c+1}),
ord($var{$c+2}));
$c+=2;
- $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ $utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
@@ -214,7 +220,8 @@ class JSON
ord($var{$c+2}),
ord($var{$c+3}));
$c+=3;
- $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ $utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
@@ -227,7 +234,8 @@ class JSON
ord($var{$c+3}),
ord($var{$c+4}));
$c+=4;
- $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ $utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
@@ -241,7 +249,8 @@ class JSON
ord($var{$c+4}),
ord($var{$c+5}));
$c+=5;
- $utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
+ $utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
}
@@ -411,7 +420,8 @@ class JSON
// single, escaped unicode character
$utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
. chr(hexdec(substr($chrs, ($c+4), 2)));
- $utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
+ //$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
+ $utf8 .= utf16be_to_utf8($utf16);
$c+=5;
} elseif(($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F)) {
@@ -612,4 +622,4 @@ class JSON
}
-?> \ No newline at end of file
+?>
diff --git a/inc/utf8.php b/inc/utf8.php
index 79baf2df7..7e82e7cd2 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -377,7 +377,7 @@ function utf8_tohtml ($str) {
* @link http://www.randomchaos.com/document.php?source=php_and_unicode
* @see unicode_to_utf8()
*/
-function utf8_to_unicode( $str ) {
+function utf8_to_unicode( &$str ) {
$unicode = array();
$values = array();
$lookingFor = 1;
@@ -408,7 +408,7 @@ function utf8_to_unicode( $str ) {
* @link http://www.randomchaos.com/document.php?source=php_and_unicode
* @see utf8_to_unicode()
*/
-function unicode_to_utf8( $str ) {
+function unicode_to_utf8( &$str ) {
$utf8 = '';
foreach( $str as $unicode ) {
if ( $unicode < 128 ) {
@@ -426,6 +426,33 @@ function unicode_to_utf8( $str ) {
}
/**
+ * UTF-8 to UTF-16BE conversion.
+ *
+ * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+ */
+function utf8_to_utf16be(&$str, $bom = false) {
+ $out = $bom ? "\xFE\xFF" : '';
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding'))
+ return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
+
+ $uni = utf8_to_unicode($str);
+ foreach($uni as $cp){
+ $out .= pack('n',$cp);
+ }
+ return $out;
+}
+
+/**
+ * UTF-8 to UTF-16BE conversion.
+ *
+ * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+ */
+function utf16be_to_utf8(&$str) {
+ $uni = unpack('n*',$str);
+ return unicode_to_utf8($uni);
+}
+
+/**
* UTF-8 Case lookup table
*
* This lookuptable defines the upper case letters to their correspponding