diff options
author | webchick <webchick@24967.no-reply.drupal.org> | 2011-06-29 22:51:13 -0700 |
---|---|---|
committer | webchick <webchick@24967.no-reply.drupal.org> | 2011-06-29 22:51:13 -0700 |
commit | 2608c670ababfd04a4ce85393db7ebe4a0c14188 (patch) | |
tree | fbdcb5236b5fd0822257a9773f18e90844028650 | |
parent | 33f6c5608c62b27fa86d52e8806435a6c94f3eee (diff) | |
download | brdo-2608c670ababfd04a4ce85393db7ebe4a0c14188.tar.gz brdo-2608c670ababfd04a4ce85393db7ebe4a0c14188.tar.bz2 |
Issue #1130198 by pillarsdotnet, Damien Tournoud: Fixed Regression: line-breaks are mangled by drupal_html_to_text().
-rw-r--r-- | includes/mail.inc | 4 | ||||
-rw-r--r-- | modules/simpletest/tests/mail.test | 353 |
2 files changed, 355 insertions, 2 deletions
diff --git a/includes/mail.inc b/includes/mail.inc index d2febed39..7272df972 100644 --- a/includes/mail.inc +++ b/includes/mail.inc @@ -430,7 +430,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { $indent[] = count($lists) ? ' "' : '>'; break; case 'li': - $indent[] = is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * '; + $indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * '; break; case 'dd': $indent[] = ' '; @@ -509,7 +509,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { $chunk = $casing($chunk); } // Format it and apply the current indentation. - $output .= drupal_wrap_mail($chunk, implode('', $indent)); + $output .= drupal_wrap_mail($chunk, implode('', $indent)) . MAIL_LINE_ENDINGS; // Remove non-quotation markers from indentation. $indent = array_map('_drupal_html_to_text_clean', $indent); } diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test index 8a7b152d9..a6c7b40e5 100644 --- a/modules/simpletest/tests/mail.test +++ b/modules/simpletest/tests/mail.test @@ -1,6 +1,7 @@ <?php /** + * @file * Test the Drupal mailing system. */ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface { @@ -63,3 +64,355 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface { } } +/** + * Unit tests for drupal_html_to_text(). + */ +class DrupalHtmlToTextTestCase extends DrupalWebTestCase { + public static function getInfo() { + return array( + 'name' => 'HTML to text conversion', + 'description' => 'Tests drupal_html_to_text().', + 'group' => 'Mail', + ); + } + + /** + * Converts a string to its PHP source equivalent for display in test messages. + * + * @param $text + * The text string to convert. + * + * @return + * An HTML representation of the text string that, when displayed in a + * browser, represents the PHP source code equivalent of $text. + */ + function stringToHtml($text) { + return '"' . + str_replace( + array("\n", ' '), + array('\n', ' '), + check_plain($text) + ) . '"'; + } + + /** + * Helper function for testing drupal_html_to_text(). + * + * @param $html + * The source HTML string to be converted. + * @param $text + * The expected result of converting $html to text. + * @param $message + * A text message to display in the assertion message. + * @param $allowed_tags + * (optional) An array of allowed tags, or NULL to default to the full + * set of tags supported by drupal_html_to_text(). + */ + function assertHtmlToText($html, $text, $message, $allowed_tags = NULL) { + preg_match_all('/<([a-z0-6]+)/', drupal_strtolower($html), $matches); + $tested_tags = implode(', ', array_unique($matches[1])); + $message .= ' (' . $tested_tags . ')'; + $result = drupal_html_to_text($html, $allowed_tags); + $pass = $this->assertEqual($result, $text, check_plain($message)); + $verbose = 'html = <pre>' . $this->stringToHtml($html) + . '</pre><br />' . 'result = <pre>' . $this->stringToHtml($result) + . '</pre><br />' . 'expected = <pre>' . $this->stringToHtml($text) + . '</pre>'; + $this->verbose($verbose); + if (!$pass) { + $this->pass("Previous test verbose info:<br />$verbose"); + } + } + + /** + * Test all supported tags of drupal_html_to_text(). + */ + function testTags() { + global $base_path, $base_url; + $tests = array( + // @todo Trailing linefeeds should be trimmed. + '<a href = "http://drupal.org">Drupal.org</a>' => "Drupal.org [1]\n\n[1] http://drupal.org\n", + // @todo Footer urls should be absolute. + "<a href = \"$base_path\">Homepage</a>" => "Homepage [1]\n\n[1] $base_url/\n", + '<address>Drupal</address>' => "Drupal\n", + // @todo The <address> tag is currently not supported. + '<address>Drupal</address><address>Drupal</address>' => "DrupalDrupal\n", + '<b>Drupal</b>' => "*Drupal*\n", + // @todo There should be a space between the '>' and the text. + '<blockquote>Drupal</blockquote>' => ">Drupal\n", + '<blockquote>Drupal</blockquote><blockquote>Drupal</blockquote>' => ">Drupal\n>Drupal\n", + '<br />Drupal<br />Drupal<br /><br />Drupal' => "Drupal\nDrupal\nDrupal\n", + '<br/>Drupal<br/>Drupal<br/><br/>Drupal' => "Drupal\nDrupal\nDrupal\n", + // @todo There should be two line breaks before the paragraph. + '<br/>Drupal<br/>Drupal<br/><br/>Drupal<p>Drupal</p>' => "Drupal\nDrupal\nDrupal\nDrupal\n\n", + '<div>Drupal</div>' => "Drupal\n", + // @todo The <div> tag is currently not supported. + '<div>Drupal</div><div>Drupal</div>' => "DrupalDrupal\n", + '<em>Drupal</em>' => "/Drupal/\n", + '<h1>Drupal</h1>' => "======== DRUPAL ==============================================================\n\n", + '<h1>Drupal</h1><p>Drupal</p>' => "======== DRUPAL ==============================================================\n\nDrupal\n\n", + '<h2>Drupal</h2>' => "-------- DRUPAL --------------------------------------------------------------\n\n", + '<h2>Drupal</h2><p>Drupal</p>' => "-------- DRUPAL --------------------------------------------------------------\n\nDrupal\n\n", + '<h3>Drupal</h3>' => ".... Drupal\n\n", + '<h3>Drupal</h3><p>Drupal</p>' => ".... Drupal\n\nDrupal\n\n", + '<h4>Drupal</h4>' => ".. Drupal\n\n", + '<h4>Drupal</h4><p>Drupal</p>' => ".. Drupal\n\nDrupal\n\n", + '<h5>Drupal</h5>' => "Drupal\n\n", + '<h5>Drupal</h5><p>Drupal</p>' => "Drupal\n\nDrupal\n\n", + '<h6>Drupal</h6>' => "Drupal\n\n", + '<h6>Drupal</h6><p>Drupal</p>' => "Drupal\n\nDrupal\n\n", + '<hr />Drupal<hr />' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n", + '<hr/>Drupal<hr/>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n", + '<hr/>Drupal<hr/><p>Drupal</p>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n\n", + '<i>Drupal</i>' => "/Drupal/\n", + '<p>Drupal</p>' => "Drupal\n\n", + '<p>Drupal</p><p>Drupal</p>' => "Drupal\n\nDrupal\n\n", + '<strong>Drupal</strong>' => "*Drupal*\n", + // @todo Tables are currently not supported. + '<table><tr><td>Drupal</td><td>Drupal</td></tr><tr><td>Drupal</td><td>Drupal</td></tr></table>' => "DrupalDrupalDrupalDrupal\n", + '<table><tr><td>Drupal</td></tr></table><p>Drupal</p>' => "Drupal\nDrupal\n\n", + // @todo The <u> tag is currently not supported. + '<u>Drupal</u>' => "Drupal\n", + '<ul><li>Drupal</li></ul>' => " * Drupal\n\n", + '<ul><li>Drupal <em>Drupal</em> Drupal</li></ul>' => " * Drupal /Drupal/ Drupal\n\n", + // @todo Lines containing nothing but spaces should be trimmed. + '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => " * Drupal\n * 1) Drupal\n 2) Drupal\n \n\n", + '<ul><li>Drupal</li><li><ol><li>Drupal</li></ol></li><li>Drupal</li></ul>' => " * Drupal\n * 1) Drupal\n \n * Drupal\n\n", + '<ul><li>Drupal</li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n\n", + '<ul><li>Drupal</li></ul><p>Drupal</p>' => " * Drupal\n\nDrupal\n\n", + '<ol><li>Drupal</li></ol>' => " 1) Drupal\n\n", + '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => " 1) Drupal\n 2) * Drupal\n * Drupal\n \n\n", + '<ol><li>Drupal</li><li>Drupal</li></ol>' => " 1) Drupal\n 2) Drupal\n\n", + '<ol>Drupal</ol>' => "Drupal\n\n", + '<ol><li>Drupal</li></ol><p>Drupal</p>' => " 1) Drupal\n\nDrupal\n\n", + '<dl><dt>Drupal</dt></dl>' => "Drupal\n\n", + '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\n\n", + '<dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\nDrupal\n Drupal\n\n", + '<dl><dt>Drupal</dt><dd>Drupal</dd></dl><p>Drupal</p>' => "Drupal\n Drupal\n\nDrupal\n\n", + '<dl><dt>Drupal<dd>Drupal</dl>' => "Drupal\n Drupal\n\n", + '<dl><dt>Drupal</dt></dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n", + // @todo Again, lines containing only spaces should be trimmed. + '<ul><li>Drupal</li><li><dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl></li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n Drupal\n Drupal\n Drupal\n \n * Drupal\n\n", + // Tests malformed HTML tags. + '<br>Drupal<br>Drupal' => "Drupal\nDrupal\n", + '<hr>Drupal<hr>Drupal' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n", + '<ol><li>Drupal<li>Drupal</ol>' => " 1) Drupal\n 2) Drupal\n\n", + '<ul><li>Drupal <em>Drupal</em> Drupal</ul></ul>' => " * Drupal /Drupal/ Drupal\n\n", + '<ul><li>Drupal<li>Drupal</ol>' => " * Drupal\n * Drupal\n\n", + '<ul><li>Drupal<li>Drupal</ul>' => " * Drupal\n * Drupal\n\n", + '<ul>Drupal</ul>' => "Drupal\n\n", + 'Drupal</ul></ol></dl><li>Drupal' => "Drupal\n * Drupal\n", + '<dl>Drupal</dl>' => "Drupal\n\n", + '<dl>Drupal</dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n", + '<dt>Drupal</dt>' => "Drupal\n", + // Tests some unsupported HTML tags. + '<html>Drupal</html>' => "Drupal\n", + // @todo Perhaps the contents of <script> tags should be dropped. + '<script type="text/javascript">Drupal</script>' => "Drupal\n", + ); + + foreach ($tests as $html => $text) { + $this->assertHtmlToText($html, $text, 'Supported tags'); + } + } + + /** + * Test $allowed_tags argument of drupal_html_to_text(). + */ + function testDrupalHtmlToTextArgs() { + // The second parameter of drupal_html_to_text() overrules the allowed tags. + $this->assertHtmlToText( + 'Drupal <b>Drupal</b> Drupal', + "Drupal *Drupal* Drupal\n", + 'Allowed <b> tag found', + array('b') + ); + $this->assertHtmlToText( + 'Drupal <h1>Drupal</h1> Drupal', + "Drupal Drupal Drupal\n", + 'Disallowed <h1> tag not found', + array('b') + ); + + $this->assertHtmlToText( + 'Drupal <p><em><b>Drupal</b></em><p> Drupal', + "Drupal Drupal Drupal\n", + 'Disallowed <p>, <em>, and <b> tags not found', + array('a', 'br', 'h1') + ); + + $this->assertHtmlToText( + '<html><body>Drupal</body></html>', + "Drupal\n", + 'Unsupported <html> and <body> tags not found', + array('html', 'body') + ); + } + + /** + * Test that whitespace is collapsed. + */ + function testDrupalHtmltoTextCollapsesWhitespace() { + $input = "<p>Drupal Drupal\n\nDrupal<pre>Drupal Drupal\n\nDrupal</pre>Drupal Drupal\n\nDrupal</p>"; + // @todo The whitespace should be collapsed. + $collapsed = "Drupal Drupal\n\nDrupalDrupal Drupal\n\nDrupalDrupal Drupal\n\nDrupal\n\n"; + $this->assertHtmlToText( + $input, + $collapsed, + 'Whitespace is collapsed', + array('p') + ); + } + + /** + * Test that text separated by block-level tags in HTML get separated by + * (at least) a newline in the plaintext version. + */ + function testDrupalHtmlToTextBlockTagToNewline() { + $input = '[text]' + . '<blockquote>[blockquote]</blockquote>' + . '<br />[br]' + . '<dl><dt>[dl-dt]</dt>' + . '<dt>[dt]</dt>' + . '<dd>[dd]</dd>' + . '<dd>[dd-dl]</dd></dl>' + . '<h1>[h1]</h1>' + . '<h2>[h2]</h2>' + . '<h3>[h3]</h3>' + . '<h4>[h4]</h4>' + . '<h5>[h5]</h5>' + . '<h6>[h6]</h6>' + . '<hr />[hr]' + . '<ol><li>[ol-li]</li>' + . '<li>[li]</li>' + . '<li>[li-ol]</li></ol>' + . '<p>[p]</p>' + . '<ul><li>[ul-li]</li>' + . '<li>[li-ul]</li></ul>' + . '[text]'; + $output = drupal_html_to_text($input); + $pass = $this->assertFalse( + preg_match('/\][^\n]*\[/s', $output), + 'Block-level HTML tags should force newlines' + ); + if (!$pass) { + $this->verbose($this->stringToHtml($output)); + } + $output_upper = drupal_strtoupper($output); + $upper_input = drupal_strtoupper($input); + $upper_output = drupal_html_to_text($upper_input); + $pass = $this->assertEqual( + $upper_output, + $output_upper, + 'Tag recognition should be case-insensitive' + ); + if (!$pass) { + $this->verbose( + $upper_output + . '<br />should be equal to <br />' + . $output_upper + ); + } + } + + /** + * Test that headers are properly separated from surrounding text. + */ + function testHeaderSeparation() { + $html = 'Drupal<h1>Drupal</h1>Drupal'; + // @todo There should be more space above the header than below it. + $text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n"; + $this->assertHtmlToText($html, $text, + 'Text before and after <h1> tag'); + $html = '<p>Drupal</p><h1>Drupal</h1>Drupal'; + // @todo There should be more space above the header than below it. + $text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n"; + $this->assertHtmlToText($html, $text, + 'Paragraph before and text after <h1> tag'); + $html = 'Drupal<h1>Drupal</h1><p>Drupal</p>'; + // @todo There should be more space above the header than below it. + $text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n\n"; + $this->assertHtmlToText($html, $text, + 'Text before and paragraph after <h1> tag'); + $html = '<p>Drupal</p><h1>Drupal</h1><p>Drupal</p>'; + $text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n\n"; + $this->assertHtmlToText($html, $text, + 'Paragraph before and after <h1> tag'); + } + + /** + * Test that footnote references are properly generated. + */ + function testFootnoteReferences() { + global $base_path, $base_url; + $source = '<a href="http://www.example.com/node/1">Host and path</a>' + . '<br /><a href="http://www.example.com">Host, no path</a>' + . '<br /><a href="' . $base_path . 'node/1">Path, no host</a>' + . '<br /><a href="node/1">Relative path</a>'; + // @todo Footnote urls should be absolute. + $tt = "Host and path [1]" + . "\nHost, no path [2]" + // @todo The following two references should be combined. + . "\nPath, no host [3]" + . "\nRelative path [4]" + . "\n" + . "\n[1] http://www.example.com/node/1" + . "\n[2] http://www.example.com" + // @todo The following two references should be combined. + . "\n[3] $base_url/node/1" + . "\n[4] node/1\n"; + $this->assertHtmlToText($source, $tt, 'Footnotes'); + } + + /** + * Test that combinations of paragraph breaks, line breaks, linefeeds, + * and spaces are properly handled. + */ + function testDrupalHtmlToTextParagraphs() { + $tests = array(); + $tests[] = array( + 'html' => "<p>line 1<br />\nline 2<br />line 3\n<br />line 4</p><p>paragraph</p>", + // @todo Trailing line breaks should be trimmed. + 'text' => "line 1\nline 2\nline 3\nline 4\n\nparagraph\n\n", + ); + $tests[] = array( + 'html' => "<p>line 1<br /> line 2</p> <p>line 4<br /> line 5</p> <p>0</p>", + // @todo Trailing line breaks should be trimmed. + 'text' => "line 1\nline 2\n\nline 4\nline 5\n\n0\n\n", + ); + foreach ($tests as $test) { + $this->assertHtmlToText($test['html'], $test['text'], 'Paragraph breaks'); + } + } + + /** + * Tests that drupal_html_to_text() wraps before 1000 characters. + * + * RFC 3676 says, "The Text/Plain media type is the lowest common + * denominator of Internet email, with lines of no more than 998 characters." + * + * RFC 2046 says, "SMTP [RFC-821] allows a maximum of 998 octets before the + * next CRLF sequence." + * + * RFC 821 says, "The maximum total length of a text line including the + * <CRLF> is 1000 characters." + */ + function testVeryLongLineWrap() { + $input = 'Drupal<br /><p>' . str_repeat('x', 2100) . '</><br />Drupal'; + $output = drupal_html_to_text($input); + // This awkward construct comes from includes/mail.inc lines 8-13. + $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS); + // We must use strlen() rather than drupal_strlen() in order to count + // octets rather than characters. + $line_length_limit = 1000 - drupal_strlen($eol); + $maximum_line_length = 0; + foreach (explode($eol, $output) as $line) { + // We must use strlen() rather than drupal_strlen() in order to count + // octets rather than characters. + $maximum_line_length = max($maximum_line_length, strlen($line . $eol)); + } + $verbose = 'Maximum line length found was ' . $maximum_line_length . ' octets.'; + // @todo This should assert that $maximum_line_length <= 1000. + $this->pass($verbose); + } +} |