From d8bd6cf97c7afce67b0b99817ccc139c65bc1c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Hojtsy?= Date: Thu, 20 Dec 2007 08:46:01 +0000 Subject: #200185 by Pancho: truncate_utf8() was abused as if it was an UTF-8 character based string function, so align our code to that. Can be renamed to drupal_truncate_chars() in Drupal 7. --- includes/unicode.inc | 58 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) (limited to 'includes/unicode.inc') diff --git a/includes/unicode.inc b/includes/unicode.inc index a9c417404..c58e81bb5 100644 --- a/includes/unicode.inc +++ b/includes/unicode.inc @@ -203,28 +203,62 @@ function drupal_convert_to_utf8($data, $encoding) { * The string to truncate. * @param $len * An upper limit on the returned string length. + * @return + * The truncated string. + */ +function drupal_truncate_bytes($string, $len) { + if (strlen($string) <= $len) { + return $string; + } + if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) { + return substr($string, 0, $len); + } + while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0) {}; + return substr($string, 0, $len); +} + +/** + * Truncate a UTF-8-encoded string safely to a number of characters. + * + * @param $string + * The string to truncate. + * @param $len + * An upper limit on the returned string length. * @param $wordsafe - * Flag to truncate at nearest space. Defaults to FALSE. + * Flag to truncate at last space within the upper limit. Defaults to FALSE. + * @param $dots + * Flag to add trailing dots. Defaults to FALSE. * @return * The truncated string. */ function truncate_utf8($string, $len, $wordsafe = FALSE, $dots = FALSE) { - $slen = strlen($string); - if ($slen <= $len) { + + if (drupal_strlen($string) <= $len) { return $string; } + + if ($dots) { + $len -= 4; + } + if ($wordsafe) { - $end = $len; - while (($string[--$len] != ' ') && ($len > 0)) {}; - if ($len == 0) { - $len = $end; + $string = drupal_substr($string, 0, $len + 1); // leave one more character + if ($last_space = strrpos($string, ' ')) { // space exists AND is not on position 0 + $string = substr($string, 0, $last_space); + } + else { + $string = drupal_substr($string, 0, $len); } } - if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) { - return substr($string, 0, $len) . ($dots ? ' ...' : ''); + else { + $string = drupal_substr($string, 0, $len); } - while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0) {}; - return substr($string, 0, $len) . ($dots ? ' ...' : ''); + + if ($dots) { + $string .= ' ...'; + } + + return $string; } /** @@ -248,7 +282,7 @@ function mime_header_encode($string) { $len = strlen($string); $output = ''; while ($len > 0) { - $chunk = truncate_utf8($string, $chunk_size); + $chunk = drupal_truncate_bytes($string, $chunk_size); $output .= ' =?UTF-8?B?'. base64_encode($chunk) ."?=\n"; $c = strlen($chunk); $string = substr($string, $c); -- cgit v1.2.3