diff options
-rw-r--r-- | includes/unicode.inc | 47 | ||||
-rw-r--r-- | modules/simpletest/tests/unicode.test | 214 |
2 files changed, 243 insertions, 18 deletions
diff --git a/includes/unicode.inc b/includes/unicode.inc index 2912cde97..d9b81f062 100644 --- a/includes/unicode.inc +++ b/includes/unicode.inc @@ -471,13 +471,13 @@ function drupal_substr($text, $start, $length = NULL) { } else { $strlen = strlen($text); - // Find the starting byte offset + // Find the starting byte offset. $bytes = 0; if ($start > 0) { // Count all the continuation bytes from the start until we have found - // $start characters + // $start characters or the end of the string. $bytes = -1; $chars = -1; - while ($bytes < $strlen && $chars < $start) { + while ($bytes < $strlen - 1 && $chars < $start) { $bytes++; $c = ord($text[$bytes]); if ($c < 0x80 || $c >= 0xC0) { @@ -487,7 +487,7 @@ function drupal_substr($text, $start, $length = NULL) { } elseif ($start < 0) { // Count all the continuation bytes from the end until we have found - // abs($start) characters + // abs($start) characters. $start = abs($start); $bytes = $strlen; $chars = 0; while ($bytes > 0 && $chars < $start) { @@ -500,37 +500,48 @@ function drupal_substr($text, $start, $length = NULL) { } $istart = $bytes; - // Find the ending byte offset + // Find the ending byte offset. if ($length === NULL) { - $bytes = $strlen - 1; + $iend = $strlen; } elseif ($length > 0) { // Count all the continuation bytes from the starting index until we have - // found $length + 1 characters. Then backtrack one byte. - $bytes = $istart; $chars = 0; - while ($bytes < $strlen && $chars < $length) { - $bytes++; - $c = ord($text[$bytes]); + // found $length characters or reached the end of the string, then + // backtrace one byte. + $iend = $istart - 1; $chars = -1; + while ($iend < $strlen - 1 && $chars < $length) { + $iend++; + $c = ord($text[$iend]); if ($c < 0x80 || $c >= 0xC0) { $chars++; } } - $bytes--; + // Backtrace one byte if the end of the string was not reached. + if ($iend < $strlen - 1) { + $iend--; + } } elseif ($length < 0) { // Count all the continuation bytes from the end until we have found - // abs($length) characters + // abs($start) characters, then backtrace one byte. $length = abs($length); - $bytes = $strlen - 1; $chars = 0; - while ($bytes >= 0 && $chars < $length) { - $c = ord($text[$bytes]); + $iend = $strlen; $chars = 0; + while ($iend > 0 && $chars < $length) { + $iend--; + $c = ord($text[$iend]); if ($c < 0x80 || $c >= 0xC0) { $chars++; } - $bytes--; } + // Backtrace one byte if we are not at the begining of the string. + if ($iend > 0) { + $iend--; + } + } + else { + // $length == 0, return an empty string. + $iend = $istart - 1; } - $iend = $bytes; return substr($text, $istart, max(0, $iend - $istart + 1)); } diff --git a/modules/simpletest/tests/unicode.test b/modules/simpletest/tests/unicode.test new file mode 100644 index 000000000..8970fca25 --- /dev/null +++ b/modules/simpletest/tests/unicode.test @@ -0,0 +1,214 @@ +<?php +// $Id $ + +/** + * @file + * Various unicode handling tests. + */ + +/** + * Test unicode handling features implemented in unicode.inc. + */ +class UnicodeUnitTest extends DrupalWebTestCase { + + /** + * Whether to run the extended version of the tests (including non latin1 characters). + * + * @var boolean + */ + protected $extendedMode = FALSE; + + function getInfo() { + return array( + 'name' => t('Unicode handling'), + 'description' => t('Tests Drupal Unicode handling.'), + 'group' => t('System'), + ); + } + + /** + * Test full unicode features implemented using the mbstring extension. + */ + function testMbStringUnicode() { + global $multibyte; + + // mbstring was not detected on this installation, there is no way to test + // multibyte features. Treat that as an exception. + if ($multibyte == UNICODE_SINGLEBYTE) { + $this->error(t('Unable to test Multibyte features: mbstring extension was not detected.')); + } + + $multibyte = UNICODE_MULTIBYTE; + + $this->extendedMode = TRUE; + $this->pass(t('Testing in mbstring mode')); + + $this->helperTestStrToLower(); + $this->helperTestStrToUpper(); + $this->helperTestUcFirst(); + $this->helperTestStrLen(); + $this->helperTestSubStr(); + } + + /** + * Test emulated unicode features. + */ + function testEmulatedUnicode() { + global $multibyte; + + $multibyte = UNICODE_SINGLEBYTE; + + $this->extendedMode = FALSE; + + $this->pass(t('Testing in emulated (best-effort) mode')); + + $this->helperTestStrToLower(); + $this->helperTestStrToUpper(); + $this->helperTestUcFirst(); + $this->helperTestStrLen(); + $this->helperTestSubStr(); + } + + function helperTestStrToLower() { + $testcase = array( + 'tHe QUIcK bRoWn' => 'the quick brown', + 'FrançAIS is ÜBER-åwesome' => 'français is über-åwesome', + ); + if ($this->extendedMode) { + $testcase['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'] = 'αβγδεζηθικλμνξοσὠ'; + } + + foreach ($testcase as $input => $output) { + $this->assertEqual(drupal_strtolower($input), $output, t('%input is lowercased as %output', array('%input' => $input, '%output' => $output))); + } + } + + function helperTestStrToUpper() { + $testcase = array( + 'tHe QUIcK bRoWn' => 'THE QUICK BROWN', + 'FrançAIS is ÜBER-åwesome' => 'FRANÇAIS IS ÜBER-ÅWESOME', + ); + if ($this->extendedMode) { + $testcase['αβγδεζηθικλμνξοσὠ'] = 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'; + } + + foreach ($testcase as $input => $output) { + $this->assertEqual(drupal_strtoupper($input), $output, t('%input is uppercased as %output', array('%input' => $input, '%output' => $output))); + } + } + + function helperTestUcFirst() { + $testcase = array( + 'tHe QUIcK bRoWn' => 'THe QUIcK bRoWn', + 'françAIS' => 'FrançAIS', + 'über' => 'Über', + 'åwesome' => 'Åwesome' + ); + if ($this->extendedMode) { + $testcase['σion'] = 'Σion'; + } + + foreach ($testcase as $input => $output) { + $this->assertEqual(drupal_ucfirst($input), $output, t('%input is ucfirst-ed as %output', array('%input' => $input, '%output' => $output))); + } + } + + function helperTestStrLen() { + $testcase = array( + 'tHe QUIcK bRoWn' => 15, + 'ÜBER-åwesome' => 12, + ); + + foreach ($testcase as $input => $output) { + $this->assertEqual(drupal_strlen($input), $output, t('%input length is %output', array('%input' => $input, '%output' => $output))); + } + } + + function helperTestSubStr() { + $testcase = array( + // 012345678901234567890123 + array('frànçAIS is über-åwesome', 0, 1, + 'f'), + array('frànçAIS is über-åwesome', 0, 8, + 'frànçAIS'), + array('frànçAIS is über-åwesome', 0, 100, + 'frànçAIS is über-åwesome'), + array('frànçAIS is über-åwesome', 4, 4, + 'çAIS'), + array('frànçAIS is über-åwesome', 1, 0, + ''), + array('frànçAIS is über-åwesome', 100, 0, + ''), + array('frànçAIS is über-åwesome', -4, 2, + 'so'), + array('frànçAIS is über-åwesome', -7, 10, + 'åwesome'), + array('frànçAIS is über-åwesome', 5, -10, + 'AIS is üb'), + + ); + + foreach ($testcase as $test) { + list($input, $start, $length, $output) = $test; + $this->assertEqual(drupal_substr($input, $start, $length), $output, t('%input substring-ed at offset %offset for %length characters is %output', array('%input' => $input, '%offset' => $start, '%length' => $length, '%output' => $output))); + } + } + + /** + * Test decode_entities(). + */ + function testDecodeEntities() { + $testcase = array( + 'Drupal' => 'Drupal', + '<script>' => '<script>', + '<script>' => '<script>', + '&lt;script&gt;' => '<script>', + '"' => '"', + '"' => '"', + '&#34;' => '"', + '"' => '"', + '&quot;' => '"', + "'" => "'", + ''' => "'", + '&#39;' => ''', + '©' => '©', + '©' => '©', + '©' => '©', + '→' => '→', + '→' => '→', + '➼' => '➼', + '➼' => '➼', + ); + foreach ($testcase as $input => $output) { + $this->assertEqual(decode_entities($input), $output, t('Make sure the decoded entity of @input is @output', array('@input' => $input, '@output' => $output))); + } + } + + function testDecodeEntitiesExclusion() { + $testcase = array( + 'Drupal' => 'Drupal', + '<script>' => '<script>', + '<script>' => '<script>', + '&lt;script&gt;' => '&lt;script&gt;', + '"' => '"', + '"' => '"', + '&#34;' => '&#34;', + '"' => '"', + '&quot;' => '&quot;', + "'" => "'", + ''' => "'", + '&#39;' => '&#39;', + '©' => '©', + '©' => '©', + '©' => '©', + '→' => '→', + '→' => '→', + '➼' => '➼', + '➼' => '➼', + ); + $exclude = array('<', '&', '"'); + foreach ($testcase as $input => $output) { + $this->assertIdentical(decode_entities($input, $exclude), $output, t('Make sure the decoded entity of %input, excluding %excludes, is %output', array('%input' => $input, '%excludes' => implode(',', $exclude), '%output' => $output))); + } + } +} |