summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2008-12-31 11:01:49 +0000
committerDries Buytaert <dries@buytaert.net>2008-12-31 11:01:49 +0000
commit82727ed8aae6dcd49b47e1893282ee36697cc9a2 (patch)
treedd00b656ddf1888ce0ebcdc9c00925c72c85d521
parentba9f2041f62401efbf726c264533c103db555170 (diff)
downloadbrdo-82727ed8aae6dcd49b47e1893282ee36697cc9a2.tar.gz
brdo-82727ed8aae6dcd49b47e1893282ee36697cc9a2.tar.bz2
- Patch #352359 by Damien Tournoud: provided unit tests for the unicode library and fixed some bugs.
-rw-r--r--includes/unicode.inc47
-rw-r--r--modules/simpletest/tests/unicode.test214
2 files changed, 243 insertions, 18 deletions
diff --git a/includes/unicode.inc b/includes/unicode.inc
index 2912cde97..d9b81f062 100644
--- a/includes/unicode.inc
+++ b/includes/unicode.inc
@@ -471,13 +471,13 @@ function drupal_substr($text, $start, $length = NULL) {
}
else {
$strlen = strlen($text);
- // Find the starting byte offset
+ // Find the starting byte offset.
$bytes = 0;
if ($start > 0) {
// Count all the continuation bytes from the start until we have found
- // $start characters
+ // $start characters or the end of the string.
$bytes = -1; $chars = -1;
- while ($bytes < $strlen && $chars < $start) {
+ while ($bytes < $strlen - 1 && $chars < $start) {
$bytes++;
$c = ord($text[$bytes]);
if ($c < 0x80 || $c >= 0xC0) {
@@ -487,7 +487,7 @@ function drupal_substr($text, $start, $length = NULL) {
}
elseif ($start < 0) {
// Count all the continuation bytes from the end until we have found
- // abs($start) characters
+ // abs($start) characters.
$start = abs($start);
$bytes = $strlen; $chars = 0;
while ($bytes > 0 && $chars < $start) {
@@ -500,37 +500,48 @@ function drupal_substr($text, $start, $length = NULL) {
}
$istart = $bytes;
- // Find the ending byte offset
+ // Find the ending byte offset.
if ($length === NULL) {
- $bytes = $strlen - 1;
+ $iend = $strlen;
}
elseif ($length > 0) {
// Count all the continuation bytes from the starting index until we have
- // found $length + 1 characters. Then backtrack one byte.
- $bytes = $istart; $chars = 0;
- while ($bytes < $strlen && $chars < $length) {
- $bytes++;
- $c = ord($text[$bytes]);
+ // found $length characters or reached the end of the string, then
+ // backtrace one byte.
+ $iend = $istart - 1; $chars = -1;
+ while ($iend < $strlen - 1 && $chars < $length) {
+ $iend++;
+ $c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
- $bytes--;
+ // Backtrace one byte if the end of the string was not reached.
+ if ($iend < $strlen - 1) {
+ $iend--;
+ }
}
elseif ($length < 0) {
// Count all the continuation bytes from the end until we have found
- // abs($length) characters
+ // abs($start) characters, then backtrace one byte.
$length = abs($length);
- $bytes = $strlen - 1; $chars = 0;
- while ($bytes >= 0 && $chars < $length) {
- $c = ord($text[$bytes]);
+ $iend = $strlen; $chars = 0;
+ while ($iend > 0 && $chars < $length) {
+ $iend--;
+ $c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
- $bytes--;
}
+ // Backtrace one byte if we are not at the begining of the string.
+ if ($iend > 0) {
+ $iend--;
+ }
+ }
+ else {
+ // $length == 0, return an empty string.
+ $iend = $istart - 1;
}
- $iend = $bytes;
return substr($text, $istart, max(0, $iend - $istart + 1));
}
diff --git a/modules/simpletest/tests/unicode.test b/modules/simpletest/tests/unicode.test
new file mode 100644
index 000000000..8970fca25
--- /dev/null
+++ b/modules/simpletest/tests/unicode.test
@@ -0,0 +1,214 @@
+<?php
+// $Id $
+
+/**
+ * @file
+ * Various unicode handling tests.
+ */
+
+/**
+ * Test unicode handling features implemented in unicode.inc.
+ */
+class UnicodeUnitTest extends DrupalWebTestCase {
+
+ /**
+ * Whether to run the extended version of the tests (including non latin1 characters).
+ *
+ * @var boolean
+ */
+ protected $extendedMode = FALSE;
+
+ function getInfo() {
+ return array(
+ 'name' => t('Unicode handling'),
+ 'description' => t('Tests Drupal Unicode handling.'),
+ 'group' => t('System'),
+ );
+ }
+
+ /**
+ * Test full unicode features implemented using the mbstring extension.
+ */
+ function testMbStringUnicode() {
+ global $multibyte;
+
+ // mbstring was not detected on this installation, there is no way to test
+ // multibyte features. Treat that as an exception.
+ if ($multibyte == UNICODE_SINGLEBYTE) {
+ $this->error(t('Unable to test Multibyte features: mbstring extension was not detected.'));
+ }
+
+ $multibyte = UNICODE_MULTIBYTE;
+
+ $this->extendedMode = TRUE;
+ $this->pass(t('Testing in mbstring mode'));
+
+ $this->helperTestStrToLower();
+ $this->helperTestStrToUpper();
+ $this->helperTestUcFirst();
+ $this->helperTestStrLen();
+ $this->helperTestSubStr();
+ }
+
+ /**
+ * Test emulated unicode features.
+ */
+ function testEmulatedUnicode() {
+ global $multibyte;
+
+ $multibyte = UNICODE_SINGLEBYTE;
+
+ $this->extendedMode = FALSE;
+
+ $this->pass(t('Testing in emulated (best-effort) mode'));
+
+ $this->helperTestStrToLower();
+ $this->helperTestStrToUpper();
+ $this->helperTestUcFirst();
+ $this->helperTestStrLen();
+ $this->helperTestSubStr();
+ }
+
+ function helperTestStrToLower() {
+ $testcase = array(
+ 'tHe QUIcK bRoWn' => 'the quick brown',
+ 'FrançAIS is ÜBER-åwesome' => 'français is über-åwesome',
+ );
+ if ($this->extendedMode) {
+ $testcase['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'] = 'αβγδεζηθικλμνξοσὠ';
+ }
+
+ foreach ($testcase as $input => $output) {
+ $this->assertEqual(drupal_strtolower($input), $output, t('%input is lowercased as %output', array('%input' => $input, '%output' => $output)));
+ }
+ }
+
+ function helperTestStrToUpper() {
+ $testcase = array(
+ 'tHe QUIcK bRoWn' => 'THE QUICK BROWN',
+ 'FrançAIS is ÜBER-åwesome' => 'FRANÇAIS IS ÜBER-ÅWESOME',
+ );
+ if ($this->extendedMode) {
+ $testcase['αβγδεζηθικλμνξοσὠ'] = 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ';
+ }
+
+ foreach ($testcase as $input => $output) {
+ $this->assertEqual(drupal_strtoupper($input), $output, t('%input is uppercased as %output', array('%input' => $input, '%output' => $output)));
+ }
+ }
+
+ function helperTestUcFirst() {
+ $testcase = array(
+ 'tHe QUIcK bRoWn' => 'THe QUIcK bRoWn',
+ 'françAIS' => 'FrançAIS',
+ 'über' => 'Über',
+ 'åwesome' => 'Åwesome'
+ );
+ if ($this->extendedMode) {
+ $testcase['σion'] = 'Σion';
+ }
+
+ foreach ($testcase as $input => $output) {
+ $this->assertEqual(drupal_ucfirst($input), $output, t('%input is ucfirst-ed as %output', array('%input' => $input, '%output' => $output)));
+ }
+ }
+
+ function helperTestStrLen() {
+ $testcase = array(
+ 'tHe QUIcK bRoWn' => 15,
+ 'ÜBER-åwesome' => 12,
+ );
+
+ foreach ($testcase as $input => $output) {
+ $this->assertEqual(drupal_strlen($input), $output, t('%input length is %output', array('%input' => $input, '%output' => $output)));
+ }
+ }
+
+ function helperTestSubStr() {
+ $testcase = array(
+ // 012345678901234567890123
+ array('frànçAIS is über-åwesome', 0, 1,
+ 'f'),
+ array('frànçAIS is über-åwesome', 0, 8,
+ 'frànçAIS'),
+ array('frànçAIS is über-åwesome', 0, 100,
+ 'frànçAIS is über-åwesome'),
+ array('frànçAIS is über-åwesome', 4, 4,
+ 'çAIS'),
+ array('frànçAIS is über-åwesome', 1, 0,
+ ''),
+ array('frànçAIS is über-åwesome', 100, 0,
+ ''),
+ array('frànçAIS is über-åwesome', -4, 2,
+ 'so'),
+ array('frànçAIS is über-åwesome', -7, 10,
+ 'åwesome'),
+ array('frànçAIS is über-åwesome', 5, -10,
+ 'AIS is üb'),
+
+ );
+
+ foreach ($testcase as $test) {
+ list($input, $start, $length, $output) = $test;
+ $this->assertEqual(drupal_substr($input, $start, $length), $output, t('%input substring-ed at offset %offset for %length characters is %output', array('%input' => $input, '%offset' => $start, '%length' => $length, '%output' => $output)));
+ }
+ }
+
+ /**
+ * Test decode_entities().
+ */
+ function testDecodeEntities() {
+ $testcase = array(
+ 'Drupal' => 'Drupal',
+ '<script>' => '<script>',
+ '&lt;script&gt;' => '<script>',
+ '&amp;lt;script&amp;gt;' => '&lt;script&gt;',
+ '"' => '"',
+ '&#34;' => '"',
+ '&amp;#34;' => '&#34;',
+ '&quot;' => '"',
+ '&amp;quot;' => '&quot;',
+ "'" => "'",
+ '&#39;' => "'",
+ '&amp;#39;' => '&#39;',
+ '©' => '©',
+ '&copy;' => '©',
+ '&#169;' => '©',
+ '→' => '→',
+ '&#8594;' => '→',
+ '➼' => '➼',
+ '&#10172;' => '➼',
+ );
+ foreach ($testcase as $input => $output) {
+ $this->assertEqual(decode_entities($input), $output, t('Make sure the decoded entity of @input is @output', array('@input' => $input, '@output' => $output)));
+ }
+ }
+
+ function testDecodeEntitiesExclusion() {
+ $testcase = array(
+ 'Drupal' => 'Drupal',
+ '<script>' => '<script>',
+ '&lt;script&gt;' => '&lt;script>',
+ '&amp;lt;script&amp;gt;' => '&amp;lt;script&amp;gt;',
+ '"' => '"',
+ '&#34;' => '&#34;',
+ '&amp;#34;' => '&amp;#34;',
+ '&quot;' => '&quot;',
+ '&amp;quot;' => '&amp;quot;',
+ "'" => "'",
+ '&#39;' => "'",
+ '&amp;#39;' => '&amp;#39;',
+ '©' => '©',
+ '&copy;' => '©',
+ '&#169;' => '©',
+ '→' => '→',
+ '&#8594;' => '→',
+ '➼' => '➼',
+ '&#10172;' => '➼',
+ );
+ $exclude = array('<', '&', '"');
+ foreach ($testcase as $input => $output) {
+ $this->assertIdentical(decode_entities($input, $exclude), $output, t('Make sure the decoded entity of %input, excluding %excludes, is %output', array('%input' => $input, '%excludes' => implode(',', $exclude), '%output' => $output)));
+ }
+ }
+}