summaryrefslogtreecommitdiff
path: root/inc/utf8.php
diff options
context:
space:
mode:
authorandi <andi@splitbrain.org>2005-06-11 11:29:16 +0200
committerandi <andi@splitbrain.org>2005-06-11 11:29:16 +0200
commitea2eed85b6afedcf37443a4953bade0186b9aef2 (patch)
tree7f602365221428bfe8aee8a08e5cd0be367ade88 /inc/utf8.php
parent9af6e647b56056230f069dbb99935ae7402210fd (diff)
downloadrpg-ea2eed85b6afedcf37443a4953bade0186b9aef2.tar.gz
rpg-ea2eed85b6afedcf37443a4953bade0186b9aef2.tar.bz2
spellchecker fixes for Konqeror
Konqeror seems to ignore the charset darcs-hash:20050611092916-9977f-311b99e63b66a83c9f9022c468ba61d0687822c2.gz
Diffstat (limited to 'inc/utf8.php')
-rw-r--r--inc/utf8.php29
1 files changed, 28 insertions, 1 deletions
diff --git a/inc/utf8.php b/inc/utf8.php
index 70b16f1a5..7d0f0a266 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -190,7 +190,7 @@ function utf8_ltrim($str,$charlist=''){
}
/**
- * Unicode aware replacement for ltrim()
+ * Unicode aware replacement for rtrim()
*
* @author Andreas Gohr <andi@splitbrain.org>
* @see rtrim()
@@ -342,6 +342,33 @@ function utf8_strpos($haystack, $needle,$offset=0) {
}
/**
+ * Encodes UTF-8 characters to HTML entities
+ *
+ * @author <vpribish at shopping dot com>
+ * @link http://www.php.net/manual/en/function.utf8-decode.php
+ */
+function utf8_tohtml ($str) {
+ $ret = '';
+ $max = strlen($str);
+ $last = 0; // keeps the index of the last regular character
+ for ($i=0; $i<$max; $i++) {
+ $c = $str{$i};
+ $c1 = ord($c);
+ if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode
+ $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed
+ $c1 &= 31; // remove the 3 bit two bytes prefix
+ $c2 = ord($str{++$i}); // the next byte
+ $c2 &= 63; // remove the 2 bit trailing byte prefix
+ $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2
+ $c1 >>= 2; // c1 shifts 2 to the right
+ $ret .= '&#' . ($c1 * 100 + $c2) . ';'; // this is the fastest string concatenation
+ $last = $i+1;
+ }
+ }
+ return $ret . substr($str, $last, $i); // append the last batch of regular characters
+}
+
+/**
* This function returns any UTF-8 encoded text as a list of
* Unicode values:
*