summaryrefslogtreecommitdiff
path: root/includes/unicode.inc
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2008-12-31 11:01:49 +0000
committerDries Buytaert <dries@buytaert.net>2008-12-31 11:01:49 +0000
commit82727ed8aae6dcd49b47e1893282ee36697cc9a2 (patch)
treedd00b656ddf1888ce0ebcdc9c00925c72c85d521 /includes/unicode.inc
parentba9f2041f62401efbf726c264533c103db555170 (diff)
downloadbrdo-82727ed8aae6dcd49b47e1893282ee36697cc9a2.tar.gz
brdo-82727ed8aae6dcd49b47e1893282ee36697cc9a2.tar.bz2
- Patch #352359 by Damien Tournoud: provided unit tests for the unicode library and fixed some bugs.
Diffstat (limited to 'includes/unicode.inc')
-rw-r--r--includes/unicode.inc47
1 files changed, 29 insertions, 18 deletions
diff --git a/includes/unicode.inc b/includes/unicode.inc
index 2912cde97..d9b81f062 100644
--- a/includes/unicode.inc
+++ b/includes/unicode.inc
@@ -471,13 +471,13 @@ function drupal_substr($text, $start, $length = NULL) {
}
else {
$strlen = strlen($text);
- // Find the starting byte offset
+ // Find the starting byte offset.
$bytes = 0;
if ($start > 0) {
// Count all the continuation bytes from the start until we have found
- // $start characters
+ // $start characters or the end of the string.
$bytes = -1; $chars = -1;
- while ($bytes < $strlen && $chars < $start) {
+ while ($bytes < $strlen - 1 && $chars < $start) {
$bytes++;
$c = ord($text[$bytes]);
if ($c < 0x80 || $c >= 0xC0) {
@@ -487,7 +487,7 @@ function drupal_substr($text, $start, $length = NULL) {
}
elseif ($start < 0) {
// Count all the continuation bytes from the end until we have found
- // abs($start) characters
+ // abs($start) characters.
$start = abs($start);
$bytes = $strlen; $chars = 0;
while ($bytes > 0 && $chars < $start) {
@@ -500,37 +500,48 @@ function drupal_substr($text, $start, $length = NULL) {
}
$istart = $bytes;
- // Find the ending byte offset
+ // Find the ending byte offset.
if ($length === NULL) {
- $bytes = $strlen - 1;
+ $iend = $strlen;
}
elseif ($length > 0) {
// Count all the continuation bytes from the starting index until we have
- // found $length + 1 characters. Then backtrack one byte.
- $bytes = $istart; $chars = 0;
- while ($bytes < $strlen && $chars < $length) {
- $bytes++;
- $c = ord($text[$bytes]);
+ // found $length characters or reached the end of the string, then
+ // backtrace one byte.
+ $iend = $istart - 1; $chars = -1;
+ while ($iend < $strlen - 1 && $chars < $length) {
+ $iend++;
+ $c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
- $bytes--;
+ // Backtrace one byte if the end of the string was not reached.
+ if ($iend < $strlen - 1) {
+ $iend--;
+ }
}
elseif ($length < 0) {
// Count all the continuation bytes from the end until we have found
- // abs($length) characters
+ // abs($start) characters, then backtrace one byte.
$length = abs($length);
- $bytes = $strlen - 1; $chars = 0;
- while ($bytes >= 0 && $chars < $length) {
- $c = ord($text[$bytes]);
+ $iend = $strlen; $chars = 0;
+ while ($iend > 0 && $chars < $length) {
+ $iend--;
+ $c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
- $bytes--;
}
+ // Backtrace one byte if we are not at the begining of the string.
+ if ($iend > 0) {
+ $iend--;
+ }
+ }
+ else {
+ // $length == 0, return an empty string.
+ $iend = $istart - 1;
}
- $iend = $bytes;
return substr($text, $istart, max(0, $iend - $istart + 1));
}