diff options
-rw-r--r-- | inc/utf8.php | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/inc/utf8.php b/inc/utf8.php index d06cfc58c..c985f48e8 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -29,6 +29,28 @@ function utf8_decodeFN($file){ return $file; } +/** + * Tries to detect if a string is in Unicode encoding + * + * @author <bmorel@ssi.fr> + * @link http://www.php.net/manual/en/function.utf8-encode.php + */ +function utf8_check($Str) { + for ($i=0; $i<strlen($Str); $i++) { + if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb + elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb + elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb + elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb + elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb + elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b + else return false; # Does not match any model + for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? + if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) + return false; + } + } + return true; +} /** * This is a unicode aware replacement for strtolower() |