diff options
author | andi <andi@splitbrain.org> | 2005-01-23 12:34:32 +0100 |
---|---|---|
committer | andi <andi@splitbrain.org> | 2005-01-23 12:34:32 +0100 |
commit | f29bd553260d53e5606fe7c57abcb9a29ad03b7c (patch) | |
tree | 7928bace6ba438da6227e705fb53c31eef0bf9f7 /inc | |
parent | 49c713a33bc794629ec41fbddf2c29ebd124e926 (diff) | |
download | rpg-f29bd553260d53e5606fe7c57abcb9a29ad03b7c.tar.gz rpg-f29bd553260d53e5606fe7c57abcb9a29ad03b7c.tar.bz2 |
utf8_check
darcs-hash:20050123113432-9977f-c9ffd2cf315325fa6abc04323e767875d0f11d84.gz
Diffstat (limited to 'inc')
-rw-r--r-- | inc/utf8.php | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/inc/utf8.php b/inc/utf8.php index d06cfc58c..c985f48e8 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -29,6 +29,28 @@ function utf8_decodeFN($file){ return $file; } +/** + * Tries to detect if a string is in Unicode encoding + * + * @author <bmorel@ssi.fr> + * @link http://www.php.net/manual/en/function.utf8-encode.php + */ +function utf8_check($Str) { + for ($i=0; $i<strlen($Str); $i++) { + if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb + elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb + elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb + elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb + elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb + elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b + else return false; # Does not match any model + for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? + if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) + return false; + } + } + return true; +} /** * This is a unicode aware replacement for strtolower() |