diff options
Diffstat (limited to 'inc/utf8.php')
-rw-r--r-- | inc/utf8.php | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/inc/utf8.php b/inc/utf8.php index 70b16f1a5..7d0f0a266 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -190,7 +190,7 @@ function utf8_ltrim($str,$charlist=''){ } /** - * Unicode aware replacement for ltrim() + * Unicode aware replacement for rtrim() * * @author Andreas Gohr <andi@splitbrain.org> * @see rtrim() @@ -342,6 +342,33 @@ function utf8_strpos($haystack, $needle,$offset=0) { } /** + * Encodes UTF-8 characters to HTML entities + * + * @author <vpribish at shopping dot com> + * @link http://www.php.net/manual/en/function.utf8-decode.php + */ +function utf8_tohtml ($str) { + $ret = ''; + $max = strlen($str); + $last = 0; // keeps the index of the last regular character + for ($i=0; $i<$max; $i++) { + $c = $str{$i}; + $c1 = ord($c); + if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode + $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed + $c1 &= 31; // remove the 3 bit two bytes prefix + $c2 = ord($str{++$i}); // the next byte + $c2 &= 63; // remove the 2 bit trailing byte prefix + $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2 + $c1 >>= 2; // c1 shifts 2 to the right + $ret .= '&#' . ($c1 * 100 + $c2) . ';'; // this is the fastest string concatenation + $last = $i+1; + } + } + return $ret . substr($str, $last, $i); // append the last batch of regular characters +} + +/** * This function returns any UTF-8 encoded text as a list of * Unicode values: * |