summaryrefslogtreecommitdiff
path: root/inc
diff options
context:
space:
mode:
authorandi <andi@splitbrain.org>2005-01-23 12:34:32 +0100
committerandi <andi@splitbrain.org>2005-01-23 12:34:32 +0100
commitf29bd553260d53e5606fe7c57abcb9a29ad03b7c (patch)
tree7928bace6ba438da6227e705fb53c31eef0bf9f7 /inc
parent49c713a33bc794629ec41fbddf2c29ebd124e926 (diff)
downloadrpg-f29bd553260d53e5606fe7c57abcb9a29ad03b7c.tar.gz
rpg-f29bd553260d53e5606fe7c57abcb9a29ad03b7c.tar.bz2
utf8_check
darcs-hash:20050123113432-9977f-c9ffd2cf315325fa6abc04323e767875d0f11d84.gz
Diffstat (limited to 'inc')
-rw-r--r--inc/utf8.php22
1 files changed, 22 insertions, 0 deletions
diff --git a/inc/utf8.php b/inc/utf8.php
index d06cfc58c..c985f48e8 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -29,6 +29,28 @@ function utf8_decodeFN($file){
return $file;
}
+/**
+ * Tries to detect if a string is in Unicode encoding
+ *
+ * @author <bmorel@ssi.fr>
+ * @link http://www.php.net/manual/en/function.utf8-encode.php
+ */
+function utf8_check($Str) {
+ for ($i=0; $i<strlen($Str); $i++) {
+ if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
+ elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
+ elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
+ elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
+ elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
+ elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
+ else return false; # Does not match any model
+ for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
+ if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80))
+ return false;
+ }
+ }
+ return true;
+}
/**
* This is a unicode aware replacement for strtolower()