summaryrefslogtreecommitdiff
path: root/inc/utf8.php
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2006-04-03 21:25:37 +0200
committerAndreas Gohr <andi@splitbrain.org>2006-04-03 21:25:37 +0200
commit10f09f2a66400f77b4696f973c4c526424e44bc1 (patch)
tree70f7f944e1709c11b6bebf8804b31cdffdeb33ba /inc/utf8.php
parent42c7abd6efcdc604f41b9f5b6440b7b9abfd2bf1 (diff)
downloadrpg-10f09f2a66400f77b4696f973c4c526424e44bc1.tar.gz
rpg-10f09f2a66400f77b4696f973c4c526424e44bc1.tar.bz2
better utf8_substr function
darcs-hash:20060403192537-7ad00-72b129ce494066bce491821a0396db7576873ec2.gz
Diffstat (limited to 'inc/utf8.php')
-rw-r--r--inc/utf8.php66
1 files changed, 54 insertions, 12 deletions
diff --git a/inc/utf8.php b/inc/utf8.php
index 9ef17936c..18b09c29f 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -109,22 +109,64 @@ function utf8_strlen($string){
}
/**
- * Unicode aware replacement for substr()
+ * UTF-8 aware alternative to substr
*
- * @author lmak at NOSPAM dot iti dot gr
- * @link http://www.php.net/manual/en/function.substr.php
- * @see substr()
+ * Return part of a string given character offset (and optionally length)
+ * Note: supports use of negative offsets and lengths but will be slower
+ * when doing so
+ *
+ * @author Harry Fuecks <hfuecks@gmail.com>
+ * @param string
+ * @param integer number of UTF-8 characters offset (from left)
+ * @param integer (optional) length in UTF-8 characters from offset
+ * @return mixed string or FALSE if failure
*/
-function utf8_substr($str,$start,$length=null){
- preg_match_all("/./u", $str, $ar);
-
- if($length != null) {
- return join("",array_slice($ar[0],$start,$length));
- } else {
- return join("",array_slice($ar[0],$start));
- }
+function utf8_substr($str, $offset, $length = null) {
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_substr')){
+ if( $length === null ){
+ mb_substr($str, $offset);
+ }else{
+ mb_substr($str, $offset, $length);
+ }
+ }
+
+ if ( $offset >= 0 && $length >= 0 ) {
+ if ( $length === null ) {
+ $length = '*';
+ } else {
+ $strlen = strlen(utf8_decode($str));
+ if ( $offset > $strlen ) {
+ return '';
+ }
+
+ if ( ( $offset + $length ) > $strlen ) {
+ $length = '*';
+ } else {
+ $length = '{'.$length.'}';
+ }
+ }
+
+ $pattern = '/^.{'.$offset.'}(.'.$length.')/us';
+ preg_match($pattern, $str, $matches);
+
+ if ( isset($matches[1]) ) {
+ return $matches[1];
+ }
+ return false;
+
+ } else {
+ // Handle negatives using different, slower technique
+ // From: http://www.php.net/manual/en/function.substr.php#44838
+ preg_match_all('/./u', $str, $ar);
+ if( $length !== null ) {
+ return join('',array_slice($ar[0],$offset,$length));
+ } else {
+ return join('',array_slice($ar[0],$offset));
+ }
+ }
}
+
/**
* Unicode aware replacement for substr_replace()
*