summaryrefslogtreecommitdiff
path: root/includes/unicode.inc
diff options
context:
space:
mode:
authorGábor Hojtsy <gabor@hojtsy.hu>2007-12-20 08:46:01 +0000
committerGábor Hojtsy <gabor@hojtsy.hu>2007-12-20 08:46:01 +0000
commitd8bd6cf97c7afce67b0b99817ccc139c65bc1c63 (patch)
treea4b1ba030730a4f32b8b0294b79fed01b6e44964 /includes/unicode.inc
parent3645f032335bee9b20ae86c5e05d01efbfa0680b (diff)
downloadbrdo-d8bd6cf97c7afce67b0b99817ccc139c65bc1c63.tar.gz
brdo-d8bd6cf97c7afce67b0b99817ccc139c65bc1c63.tar.bz2
#200185 by Pancho: truncate_utf8() was abused as if it was an UTF-8 character based string function, so align our code to that. Can be renamed to drupal_truncate_chars() in Drupal 7.
Diffstat (limited to 'includes/unicode.inc')
-rw-r--r--includes/unicode.inc58
1 files changed, 46 insertions, 12 deletions
diff --git a/includes/unicode.inc b/includes/unicode.inc
index a9c417404..c58e81bb5 100644
--- a/includes/unicode.inc
+++ b/includes/unicode.inc
@@ -203,28 +203,62 @@ function drupal_convert_to_utf8($data, $encoding) {
* The string to truncate.
* @param $len
* An upper limit on the returned string length.
+ * @return
+ * The truncated string.
+ */
+function drupal_truncate_bytes($string, $len) {
+ if (strlen($string) <= $len) {
+ return $string;
+ }
+ if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) {
+ return substr($string, 0, $len);
+ }
+ while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0) {};
+ return substr($string, 0, $len);
+}
+
+/**
+ * Truncate a UTF-8-encoded string safely to a number of characters.
+ *
+ * @param $string
+ * The string to truncate.
+ * @param $len
+ * An upper limit on the returned string length.
* @param $wordsafe
- * Flag to truncate at nearest space. Defaults to FALSE.
+ * Flag to truncate at last space within the upper limit. Defaults to FALSE.
+ * @param $dots
+ * Flag to add trailing dots. Defaults to FALSE.
* @return
* The truncated string.
*/
function truncate_utf8($string, $len, $wordsafe = FALSE, $dots = FALSE) {
- $slen = strlen($string);
- if ($slen <= $len) {
+
+ if (drupal_strlen($string) <= $len) {
return $string;
}
+
+ if ($dots) {
+ $len -= 4;
+ }
+
if ($wordsafe) {
- $end = $len;
- while (($string[--$len] != ' ') && ($len > 0)) {};
- if ($len == 0) {
- $len = $end;
+ $string = drupal_substr($string, 0, $len + 1); // leave one more character
+ if ($last_space = strrpos($string, ' ')) { // space exists AND is not on position 0
+ $string = substr($string, 0, $last_space);
+ }
+ else {
+ $string = drupal_substr($string, 0, $len);
}
}
- if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) {
- return substr($string, 0, $len) . ($dots ? ' ...' : '');
+ else {
+ $string = drupal_substr($string, 0, $len);
}
- while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0) {};
- return substr($string, 0, $len) . ($dots ? ' ...' : '');
+
+ if ($dots) {
+ $string .= ' ...';
+ }
+
+ return $string;
}
/**
@@ -248,7 +282,7 @@ function mime_header_encode($string) {
$len = strlen($string);
$output = '';
while ($len > 0) {
- $chunk = truncate_utf8($string, $chunk_size);
+ $chunk = drupal_truncate_bytes($string, $chunk_size);
$output .= ' =?UTF-8?B?'. base64_encode($chunk) ."?=\n";
$c = strlen($chunk);
$string = substr($string, $c);