summaryrefslogtreecommitdiff
path: root/inc/utf8.php
diff options
context:
space:
mode:
authorandi <andi@splitbrain.org>2005-01-23 16:52:39 +0100
committerandi <andi@splitbrain.org>2005-01-23 16:52:39 +0100
commit2f95495925c5ddfb2e3e7120c43e709ae0c50ae3 (patch)
tree530dd955192a6918a7355e961b9d1b7eddd575b3 /inc/utf8.php
parentf29bd553260d53e5606fe7c57abcb9a29ad03b7c (diff)
downloadrpg-2f95495925c5ddfb2e3e7120c43e709ae0c50ae3.tar.gz
rpg-2f95495925c5ddfb2e3e7120c43e709ae0c50ae3.tar.bz2
utf8 replacements for strpos and strlen
darcs-hash:20050123155239-9977f-2ddc1e19ccf48579c71382e8933166a86ee750a4.gz
Diffstat (limited to 'inc/utf8.php')
-rw-r--r--inc/utf8.php52
1 files changed, 52 insertions, 0 deletions
diff --git a/inc/utf8.php b/inc/utf8.php
index c985f48e8..6e015177b 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -52,6 +52,24 @@ function utf8_check($Str) {
return true;
}
+
+/**
+ * This is a unicode aware replacement for strlen()
+ *
+ * Uses mb_string extension if available
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see strlen()
+ */
+function utf8_strlen($string){
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strlen'))
+ return mb_strlen($string,'utf-8');
+
+ $uni = utf8_to_unicode($string);
+ return count($uni);
+}
+
+
/**
* This is a unicode aware replacement for strtolower()
*
@@ -119,6 +137,40 @@ function utf8_deaccent($string,$case=0){
}
/**
+ * This is an Unicode aware replacement for strpos
+ *
+ * Uses mb_string extension if available
+ *
+ * @author Scott Michael Reynen <scott@randomchaos.com>
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @link http://www.randomchaos.com/document.php?source=php_and_unicode
+ * @see strpos()
+ */
+function utf8_strpos($haystack, $needle,$offset=0) {
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos'))
+ return mb_strpos($haystack,$needle,$offset,'utf-8');
+
+ $haystack = utf8_to_unicode($haystack);
+ $needle = utf8_to_unicode($needle);
+ $position = $offset;
+ $found = false;
+
+ while( (! $found ) && ( $position < count( $haystack ) ) ) {
+ if ( $needle[0] == $haystack[$position] ) {
+ for ($i = 1; $i < count( $needle ); $i++ ) {
+ if ( $needle[$i] != $haystack[ $position + $i ] ) break;
+ }
+ if ( $i == count( $needle ) ) {
+ $found = true;
+ $position--;
+ }
+ }
+ $position++;
+ }
+ return ( $found == true ) ? $position : false;
+}
+
+/**
* This function will any UTF-8 encoded text and return it as
* a list of Unicode values:
*