diff options
author | Guy Brand <gb@unistra.fr> | 2015-08-10 10:03:27 +0200 |
---|---|---|
committer | Guy Brand <gb@unistra.fr> | 2015-08-10 10:03:27 +0200 |
commit | 53a57d16b9c741bb44099fd93bf79efa06796341 (patch) | |
tree | 24a90a50afe9325926c8ebaa2ed90f9fa093e5b9 /inc/utf8.php | |
parent | cf6e6645c31a9f185cef3fb9452fb188882ede47 (diff) | |
parent | a060d9973e7c1d5051f2cc426937881826e4972e (diff) | |
download | rpg-53a57d16b9c741bb44099fd93bf79efa06796341.tar.gz rpg-53a57d16b9c741bb44099fd93bf79efa06796341.tar.bz2 |
Merge branch master into stable
Diffstat (limited to 'inc/utf8.php')
-rw-r--r-- | inc/utf8.php | 129 |
1 files changed, 95 insertions, 34 deletions
diff --git a/inc/utf8.php b/inc/utf8.php index c944667f7..2b6a0c498 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -43,6 +43,9 @@ if(!function_exists('utf8_isASCII')){ * Checks if a string contains 7bit ASCII only * * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> + * + * @param string $str + * @return bool */ function utf8_isASCII($str){ return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); @@ -56,6 +59,9 @@ if(!function_exists('utf8_strip')){ * Returns a pure ASCII7 string * * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string $str + * @return string */ function utf8_strip($str){ $ascii = ''; @@ -75,6 +81,9 @@ if(!function_exists('utf8_check')){ * * @author <bmorel@ssi.fr> * @link http://www.php.net/manual/en/function.utf8-encode.php + * + * @param string $Str + * @return bool */ function utf8_check($Str) { $len = strlen($Str); @@ -105,6 +114,7 @@ if(!function_exists('utf8_basename')){ * * @see basename() * @link https://bugs.php.net/bug.php?id=37738 + * * @param string $path A path * @param string $suffix If the name component ends in suffix this will also be cut off * @return string @@ -134,6 +144,9 @@ if(!function_exists('utf8_strlen')){ * @author <chernyshevsky at hotmail dot com> * @see strlen() * @see utf8_decode() + * + * @param string $string + * @return int */ function utf8_strlen($string){ return strlen(utf8_decode($string)); @@ -148,10 +161,11 @@ if(!function_exists('utf8_substr')){ * * @author Harry Fuecks <hfuecks@gmail.com> * @author Chris Smith <chris@jalakai.co.uk> + * * @param string $str * @param int $offset number of UTF-8 characters offset (from left) * @param int $length (optional) length in UTF-8 characters from offset - * @return mixed string or false if failure + * @return string */ function utf8_substr($str, $offset, $length = null) { if(UTF8_MBSTRING){ @@ -250,6 +264,14 @@ if(!function_exists('utf8_substr_replace')){ * * @author Andreas Gohr <andi@splitbrain.org> * @see substr_replace() + * + * @param string $string input string + * @param string $replacement the replacement + * @param int $start the replacing will begin at the start'th offset into string. + * @param int $length If given and is positive, it represents the length of the portion of string which is + * to be replaced. If length is zero then this function will have the effect of inserting + * replacement into string at the given start offset. + * @return string */ function utf8_substr_replace($string, $replacement, $start , $length=0 ){ $ret = ''; @@ -266,6 +288,7 @@ if(!function_exists('utf8_ltrim')){ * * @author Andreas Gohr <andi@splitbrain.org> * @see ltrim() + * * @param string $str * @param string $charlist * @return string @@ -286,6 +309,7 @@ if(!function_exists('utf8_rtrim')){ * * @author Andreas Gohr <andi@splitbrain.org> * @see rtrim() + * * @param string $str * @param string $charlist * @return string @@ -306,6 +330,7 @@ if(!function_exists('utf8_trim')){ * * @author Andreas Gohr <andi@splitbrain.org> * @see trim() + * * @param string $str * @param string $charlist * @return string @@ -326,10 +351,17 @@ if(!function_exists('utf8_strtolower')){ * @author Leo Feyer <leo@typolight.org> * @see strtolower() * @see utf8_strtoupper() + * + * @param string $string + * @return string */ function utf8_strtolower($string){ - if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); - + if(UTF8_MBSTRING) { + if (class_exists("Normalizer", $autoload = false)) + return normalizer::normalize(mb_strtolower($string,'utf-8')); + else + return (mb_strtolower($string,'utf-8')); + } global $UTF8_UPPER_TO_LOWER; return strtr($string,$UTF8_UPPER_TO_LOWER); } @@ -344,6 +376,9 @@ if(!function_exists('utf8_strtoupper')){ * @author Leo Feyer <leo@typolight.org> * @see strtoupper() * @see utf8_strtoupper() + * + * @param string $string + * @return string */ function utf8_strtoupper($string){ if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); @@ -359,7 +394,8 @@ if(!function_exists('utf8_ucfirst')){ * Make a string's first character uppercase * * @author Harry Fuecks - * @param string + * + * @param string $str * @return string with first character as upper case (if applicable) */ function utf8_ucfirst($str){ @@ -381,9 +417,10 @@ if(!function_exists('utf8_ucwords')){ * Uppercase the first character of each word in a string * * @author Harry Fuecks - * @param string - * @return string with first char of each word uppercase * @see http://www.php.net/ucwords + * + * @param string $str + * @return string with first char of each word uppercase */ function utf8_ucwords($str) { // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; @@ -399,10 +436,11 @@ if(!function_exists('utf8_ucwords')){ * You don't need to call this yourself * * @author Harry Fuecks - * @param array $matches matches corresponding to a single word - * @return string with first char of the word in uppercase * @see utf8_ucwords * @see utf8_strtoupper + * + * @param array $matches matches corresponding to a single word + * @return string with first char of the word in uppercase */ function utf8_ucwords_callback($matches) { $leadingws = $matches[2]; @@ -420,6 +458,10 @@ if(!function_exists('utf8_deaccent')){ * letters. Default is to deaccent both cases ($case = 0) * * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string $string + * @param int $case + * @return string */ function utf8_deaccent($string,$case=0){ if($case <= 0){ @@ -439,6 +481,9 @@ if(!function_exists('utf8_romanize')){ * Romanize a non-latin string * * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string $string + * @return string */ function utf8_romanize($string){ if(utf8_isASCII($string)) return $string; //nothing to do @@ -456,6 +501,7 @@ if(!function_exists('utf8_stripspecials')){ * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) * * @author Andreas Gohr <andi@splitbrain.org> + * * @param string $string The UTF8 string to strip of special chars * @param string $repl Replace special with this string * @param string $additional Additional chars to strip (used in regexp char class) @@ -480,9 +526,10 @@ if(!function_exists('utf8_strpos')){ * * @author Leo Feyer <leo@typolight.org> * @see strpos() - * @param string - * @param string - * @param integer + * + * @param string $haystack + * @param string $needle + * @param integer $offset * @return integer */ function utf8_strpos($haystack, $needle, $offset=0){ @@ -512,6 +559,9 @@ if(!function_exists('utf8_tohtml')){ * @author Tom N Harris <tnharris@whoopdedo.org> * @author <vpribish at shopping dot com> * @link http://www.php.net/manual/en/function.utf8-decode.php + * + * @param string $str + * @return string */ function utf8_tohtml ($str) { $ret = ''; @@ -542,6 +592,7 @@ if(!function_exists('utf8_unhtml')){ * what it should be -> "&&#38;" * * @author Tom N Harris <tnharris@whoopdedo.org> + * * @param string $str UTF-8 encoded string * @param boolean $entities Flag controlling decoding of named entities. * @return string UTF-8 encoded string with numeric (and named) entities replaced. @@ -564,7 +615,7 @@ if(!function_exists('utf8_decode_numeric')){ * Decodes numeric HTML entities to their correct UTF-8 characters * * @param $ent string A numeric entity - * @return string + * @return string|false */ function utf8_decode_numeric($ent) { switch ($ent[2]) { @@ -597,10 +648,10 @@ if(!class_exists('utf8_entity_decoder')){ } /** - * Wrapper aorund unicode_to_utf8() + * Wrapper around unicode_to_utf8() * - * @param $c string - * @return mixed + * @param string $c + * @return string|false */ function makeutf8($c) { return unicode_to_utf8(array(ord($c))); @@ -609,8 +660,8 @@ if(!class_exists('utf8_entity_decoder')){ /** * Decodes any HTML entity to it's correct UTF-8 char equivalent * - * @param $ent string An entity - * @return string + * @param string $ent An entity + * @return string|false */ function decode($ent) { if ($ent[1] == '#') { @@ -640,12 +691,13 @@ if(!function_exists('utf8_to_unicode')){ * * @author <hsivonen@iki.fi> * @author Harry Fuecks <hfuecks@gmail.com> - * @param string $str UTF-8 encoded string - * @param boolean $strict Check for invalid sequences? - * @return mixed array of unicode code points or false if UTF-8 invalid * @see unicode_to_utf8 * @link http://hsivonen.iki.fi/php-utf8/ * @link http://sourceforge.net/projects/phputf8/ + * + * @param string $str UTF-8 encoded string + * @param boolean $strict Check for invalid sequences? + * @return mixed array of unicode code points or false if UTF-8 invalid */ function utf8_to_unicode($str,$strict=false) { $mState = 0; // cached expected number of octets after the current octet @@ -815,7 +867,8 @@ if(!function_exists('unicode_to_utf8')){ * * @param array $arr of unicode code points representing a string * @param boolean $strict Check for invalid sequences? - * @return mixed UTF-8 string or false if array contains invalid code points + * @return string|false UTF-8 string or false if array contains invalid code points + * * @author <hsivonen@iki.fi> * @author Harry Fuecks <hfuecks@gmail.com> * @see utf8_to_unicode @@ -896,6 +949,10 @@ if(!function_exists('utf8_to_utf16be')){ * UTF-8 to UTF-16BE conversion. * * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits + * + * @param string $str + * @param bool $bom + * @return string */ function utf8_to_utf16be(&$str, $bom = false) { $out = $bom ? "\xFE\xFF" : ''; @@ -914,6 +971,9 @@ if(!function_exists('utf16be_to_utf8')){ * UTF-8 to UTF-16BE conversion. * * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits + * + * @param string $str + * @return false|string */ function utf16be_to_utf8(&$str) { $uni = unpack('n*',$str); @@ -933,6 +993,7 @@ if(!function_exists('utf8_bad_replace')){ * * @author Harry Fuecks <hfuecks@gmail.com> * @see http://www.w3.org/International/questions/qa-forms-utf-8 + * * @param string $str to search * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII * @return string @@ -967,8 +1028,8 @@ if(!function_exists('utf8_correctIdx')){ /** * adjust a byte index into a utf8 string to a utf8 character boundary * - * @param $str string utf8 character string - * @param $i int byte index into $str + * @param string $str utf8 character string + * @param int $i byte index into $str * @param $next bool direction to search for boundary, * false = up (current character) * true = down (next character) @@ -1009,11 +1070,11 @@ if(!UTF8_MBSTRING){ "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", - "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", + "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", - "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", - "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", - "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", + "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", + "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", + "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", @@ -1088,11 +1149,11 @@ if(!UTF8_MBSTRING){ "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", - "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", + "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", - "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", - "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", - "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", + "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", + "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", + "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", @@ -1298,11 +1359,11 @@ global $UTF8_SPECIAL_CHARS2; if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~
�'. '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. - '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. + '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. - '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. + '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. - '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. + '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. |