diff options
author | Dries Buytaert <dries@buytaert.net> | 2010-09-04 17:55:43 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2010-09-04 17:55:43 +0000 |
commit | 9d912261e30e597c883e20bc3a89416c28cd8e53 (patch) | |
tree | 1dacbab06e2d2ff51c1a28f350bf352ddb4fc8c2 | |
parent | 9502260ecf33a4b345794eea2d0b6e6dff5dbd74 (diff) | |
download | brdo-9d912261e30e597c883e20bc3a89416c28cd8e53.tar.gz brdo-9d912261e30e597c883e20bc3a89416c28cd8e53.tar.bz2 |
- Patch #559584 by tic2000, sun: filter_xss() and Line break filter break HTML comments. Also added tests.
-rw-r--r-- | includes/common.inc | 13 | ||||
-rw-r--r-- | modules/filter/filter.module | 9 | ||||
-rw-r--r-- | modules/filter/filter.test | 162 | ||||
-rw-r--r-- | modules/simpletest/tests/common.test | 5 |
4 files changed, 112 insertions, 77 deletions
diff --git a/includes/common.inc b/includes/common.inc index 79a3fc6fa..a70d26e93 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -1355,6 +1355,8 @@ function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', ( <(?=[^a-zA-Z!/]) # a lone < | # or + <!--.*?--> # a comment + | # or <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string | # or > # just a > @@ -1393,7 +1395,7 @@ function _filter_xss_split($m, $store = FALSE) { return '<'; } - if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) { + if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) { // Seriously malformed return ''; } @@ -1401,12 +1403,21 @@ function _filter_xss_split($m, $store = FALSE) { $slash = trim($matches[1]); $elem = &$matches[2]; $attrlist = &$matches[3]; + $comment = &$matches[4]; + + if ($comment) { + $elem = '!--'; + } if (!isset($allowed_html[strtolower($elem)])) { // Disallowed HTML element return ''; } + if ($comment) { + return $comment; + } + if ($slash != '') { return "</$elem>"; } diff --git a/modules/filter/filter.module b/modules/filter/filter.module index 6342d6443..bc24eb50e 100644 --- a/modules/filter/filter.module +++ b/modules/filter/filter.module @@ -1535,11 +1535,11 @@ function _filter_autop($text) { // All block level tags $block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)'; - // Split at <pre>, <script>, <style> and </pre>, </script>, </style> tags. + // Split at opening and closing PRE, SCRIPT, STYLE, OBJECT tags and comments. // We don't apply any processing to the contents of these tags to avoid messing // up code. We look for matched pairs and allow basic nesting. For example: // "processed <pre> ignored <script> ignored </script> ignored </pre> processed" - $chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + $chunks = preg_split('@(<!--.*?-->|</?(?:pre|script|style|object|!--)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting NULL as required). $ignore = FALSE; @@ -1548,7 +1548,8 @@ function _filter_autop($text) { foreach ($chunks as $i => $chunk) { if ($i % 2) { // Opening or closing tag? - $open = ($chunk[1] != '/'); + $open = ($chunk[1] != '/' || $chunk[1] != '!'); + $comment = (substr($chunk, 0, 4) == '<!--'); list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2); if (!$ignore) { if ($open) { @@ -1557,7 +1558,7 @@ function _filter_autop($text) { } } // Only allow a matching tag to close it. - elseif (!$open && $ignoretag == $tag) { + elseif ((!$open && $ignoretag == $tag) || $comment) { $ignore = FALSE; $ignoretag = ''; } diff --git a/modules/filter/filter.test b/modules/filter/filter.test index 88486625c..254256bf8 100644 --- a/modules/filter/filter.test +++ b/modules/filter/filter.test @@ -694,8 +694,8 @@ class FilterSecurityTestCase extends DrupalWebTestCase { class FilterUnitTestCase extends DrupalUnitTestCase { public static function getInfo() { return array( - 'name' => 'Core filters', - 'description' => 'Filter each filter individually: convert line breaks, correct broken HTML.', + 'name' => 'Filter module filters', + 'description' => 'Tests Filter module filters individually.', 'group' => 'Filter', ); } @@ -704,34 +704,65 @@ class FilterUnitTestCase extends DrupalUnitTestCase { * Test the line break filter. */ function testLineBreakFilter() { - // Single line breaks should be changed to <br /> tags, while paragraphs - // separated with double line breaks should be enclosed with <p></p> tags. - $f = _filter_autop("aaa\nbbb\n\nccc"); - $this->assertEqual(str_replace("\n", '', $f), "<p>aaa<br />bbb</p><p>ccc</p>", t('Line breaking basic case.')); - - // Text within some contexts should not be processed. - $f = _filter_autop("<script>aaa\nbbb\n\nccc</script>"); - $this->assertEqual($f, "<script>aaa\nbbb\n\nccc</script>", t('Line breaking -- do not break scripts.')); - - $f = _filter_autop('<p><div> </div></p>'); - $this->assertEqual(substr_count($f, '<p>'), substr_count($f, '</p>'), t('Make sure line breaking produces matching paragraph tags.')); - - $f = _filter_autop('<div><p> </p></div>'); - $this->assertEqual(substr_count($f, '<p>'), substr_count($f, '</p>'), t('Make sure line breaking produces matching paragraph tags.')); + // Setup dummy filter object. + $filter = new stdClass; + $filter->callback = '_filter_autop'; - $f = _filter_autop('<blockquote><pre>aaa</pre></blockquote>'); - $this->assertEqual(substr_count($f, '<p>'), substr_count($f, '</p>'), t('Make sure line breaking produces matching paragraph tags.')); + // Since the line break filter naturally needs plenty of newlines in test + // strings and expectations, we're using "\n" instead of regular newlines + // here. + $tests = array( + // Single line breaks should be changed to <br /> tags, while paragraphs + // separated with double line breaks should be enclosed with <p></p> tags. + "aaa\nbbb\n\nccc" => array( + "<p>aaa<br />\nbbb</p>\n<p>ccc</p>" => TRUE, + ), + // Skip contents of certain block tags entirely. + "<script>aaa\nbbb\n\nccc</script> +<style>aaa\nbbb\n\nccc</style> +<pre>aaa\nbbb\n\nccc</pre> +<object>aaa\nbbb\n\nccc</object> +<iframe>aaa\nbbb\n\nccc</iframe> +" => array( + "<script>aaa\nbbb\n\nccc</script>" => TRUE, + "<style>aaa\nbbb\n\nccc</style>" => TRUE, + "<pre>aaa\nbbb\n\nccc</pre>" => TRUE, + "<object>aaa\nbbb\n\nccc</object>" => TRUE, + "<iframe>aaa\nbbb\n\nccc</iframe>" => TRUE, + ), + // Skip comments entirely. + "One. <!-- comment --> Two.\n<!--\nThree.\n-->\n" => array( + '<!-- comment -->' => TRUE, + "<!--\nThree.\n-->" => TRUE, + ), + // Resulting HTML should produce matching paragraph tags. + '<p><div> </div></p>' => array( + "<p>\n<div> </div>\n</p>" => TRUE, + ), + '<div><p> </p></div>' => array( + "<div>\n</div>" => TRUE, + ), + '<blockquote><pre>aaa</pre></blockquote>' => array( + "<blockquote><pre>aaa</pre></blockquote>" => TRUE, + ), + ); + $this->assertFilteredString($filter, $tests); + // Very long string hitting PCRE limits. $limit = max(ini_get('pcre.backtrack_limit'), ini_get('pcre.recursion_limit')); - $f = _filter_autop($this->randomName($limit)); - $this->assertNotEqual($f, '', t('Make sure line breaking can process long strings.')); + $source = $this->randomName($limit); + $result = _filter_autop($source); + $success = $this->assertEqual($result, '<p>' . $source . "</p>\n", t('Line break filter can process very long strings.')); + if (!$success) { + $this->verbose("\n" . $source . "\n<hr />\n" . $result); + } } /** - * Test limiting allowed tags, XSS prevention and adding 'nofollow' to links. + * Tests limiting allowed tags and XSS prevention. * - * XSS tests assume that script is disallowed on default and src is allowed - * on default, but on* and style are disallowed. + * XSS tests assume that script is disallowed by default and src is allowed + * by default, but on* and style attributes are disallowed. * * Script injection vectors mostly adopted from http://ha.ckers.org/xss.html. * @@ -739,7 +770,7 @@ class FilterUnitTestCase extends DrupalUnitTestCase { * - CVE-2002-1806, ~CVE-2005-0682, ~CVE-2005-2106, CVE-2005-3973, * CVE-2006-1226 (= rev. 1.112?), CVE-2008-0273, CVE-2008-3740. */ - function testHtmlFilter() { + function testFilterXSS() { // Tag stripping, different ways to work around removal of HTML tags. $f = filter_xss('<script>alert(0)</script>'); $this->assertNoNormalized($f, 'script', t('HTML tag stripping -- simple script without special characters.')); @@ -924,7 +955,7 @@ class FilterUnitTestCase extends DrupalUnitTestCase { * @todo Class, id, name and xmlns should be added to disallowed attributes, * or better a whitelist approach should be used for that too. */ - function testFilter() { + function testHtmlFilter() { // Setup dummy filter object. $filter = new stdClass(); $filter->settings = array( @@ -992,9 +1023,6 @@ class FilterUnitTestCase extends DrupalUnitTestCase { $f = _filter_html("<\0a\0 href=\"http://www.example.com/\">text</a>", $filter); $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent evasion -- some nulls.')); - $f = _filter_html('<!--[if true]><a href="http://www.example.com/">text</a><![endif]-->', $filter); - $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent evasion -- link within a comment.')); - $f = _filter_html('<a href="http://www.example.com/" rel="follow">text</a>', $filter); $this->assertNoNormalized($f, 'rel="follow"', t('Spam deterrent evasion -- with rel set - rel="follow" removed.')); $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent evasion -- with rel set - rel="nofollow" added.')); @@ -1003,7 +1031,7 @@ class FilterUnitTestCase extends DrupalUnitTestCase { /** * Test the loose, admin HTML filter. */ - function testAdminHtmlFilter() { + function testFilterXSSAdmin() { // DRUPAL-SA-2008-044 $f = filter_xss_admin('<object />'); $this->assertNoNormalized($f, 'object', t('Admin HTML filter -- should not allow object tag.')); @@ -1016,17 +1044,23 @@ class FilterUnitTestCase extends DrupalUnitTestCase { } /** - * Test the HTML escaping filter. + * Tests the HTML escaping filter. + * + * check_plain() is not tested here. */ - function testNoHtmlFilter() { - $this->_testEscapedHTML('_filter_html_escape'); - } + function testHtmlEscapeFilter() { + // Setup dummy filter object. + $filter = new stdClass; + $filter->callback = '_filter_html_escape'; - /** - * Test that the check_plain() function escapes HTML correctly. - */ - function testCheckPlain() { - $this->_testEscapedHTML('check_plain'); + $tests = array( + " One. <!-- \"comment\" --> Two'.\n<p>Three.</p>\n " => array( + "One. <!-- "comment" --> Two'.\n<p>Three.</p>" => TRUE, + ' One.' => FALSE, + "</p>\n " => FALSE, + ), + ); + $this->assertFilteredString($filter, $tests); } /** @@ -1035,6 +1069,7 @@ class FilterUnitTestCase extends DrupalUnitTestCase { function testUrlFilter() { // Setup dummy filter object. $filter = new stdClass; + $filter->callback = '_filter_url'; $filter->settings = array( 'filter_url_length' => 496, ); @@ -1318,23 +1353,31 @@ www.example.com with a newline in comments --> * ); * @endcode */ - protected function assertFilteredString($filter, $tests) { - foreach ($tests as $phrase => $tasks) { - $string = _filter_url($phrase, $filter); - foreach ($tasks as $value => $expected) { + function assertFilteredString($filter, $tests) { + foreach ($tests as $source => $tasks) { + $function = $filter->callback; + $result = $function($source, $filter); + foreach ($tasks as $value => $is_expected) { // Not using assertIdentical, since combination with strpos() is hard to grok. - if ($expected) { - $this->assertTrue(strpos($string, $value) !== FALSE, t('@string: @value found.', array( - '@string' => var_export($phrase, TRUE), + if ($is_expected) { + $success = $this->assertTrue(strpos($result, $value) !== FALSE, t('@source: @value found.', array( + '@source' => var_export($source, TRUE), '@value' => var_export($value, TRUE), ))); } else { - $this->assertTrue(strpos($string, $value) === FALSE, t('@string: @value not found.', array( - '@string' => var_export($phrase, TRUE), + $success = $this->assertTrue(strpos($result, $value) === FALSE, t('@source: @value not found.', array( + '@source' => var_export($source, TRUE), '@value' => var_export($value, TRUE), ))); } + if (!$success) { + $this->verbose('Source:<pre>' . check_plain(var_export($source, TRUE)) . '</pre>' + . '<hr />' . 'Result:<pre>' . check_plain(var_export($result, TRUE)) . '</pre>' + . '<hr />' . ($is_expected ? 'Found:' : 'Not found:') + . '<pre>' . check_plain(var_export($value, TRUE)) . '</pre>' + ); + } } } } @@ -1535,31 +1578,6 @@ alert("test") function assertNoNormalized($haystack, $needle, $message = '', $group = 'Other') { return $this->assertTrue(strpos(strtolower(decode_entities($haystack)), $needle) === FALSE, $message, $group); } - - /** - * Helper method to test functions that are intended to escape HTML. - * - * @param $function - * The name of the function to test. - */ - function _testEscapedHTML($function) { - // Define string replacements for the assertion messages. - $replacements = array('@function' => $function); - - // Test that characters that have special meaning in XML are changed into - // entities. - $f = $function('<>&"'); - $this->assertEqual($f, '<>&"', t('The @function() function correctly filters basic HTML entities.', $replacements)); - - // A single quote can also be used for evil things in some contexts. - $f = $function('\''); - $this->assertEqual($f, ''', t('The @function() function correctly filters single quotes.', $replacements)); - - // Test that the filter is not fooled by different evasion techniques. - // Ignore PHP 5.3+ invalid multibyte sequence warning. - $f = @$function("\xc2\""); - $this->assertEqual($f, '', t('The @function() function correctly filters invalid UTF-8.', $replacements)); - } } /** diff --git a/modules/simpletest/tests/common.test b/modules/simpletest/tests/common.test index 315b77622..0f44bea89 100644 --- a/modules/simpletest/tests/common.test +++ b/modules/simpletest/tests/common.test @@ -365,6 +365,9 @@ class CommonXssUnitTest extends DrupalUnitTestCase { // Ignore PHP 5.3+ invalid multibyte sequence warning. $text = @check_plain("Foo\xC0barbaz"); $this->assertEqual($text, '', 'check_plain() rejects invalid sequence "Foo\xC0barbaz"'); + // Ignore PHP 5.3+ invalid multibyte sequence warning. + $text = @check_plain("\xc2\""); + $this->assertEqual($text, '', 'check_plain() rejects invalid sequence "\xc2\""'); $text = check_plain("Fooÿñ"); $this->assertEqual($text, "Fooÿñ", 'check_plain() accepts valid sequence "Fooÿñ"'); $text = filter_xss("Foo\xC0barbaz"); @@ -379,6 +382,8 @@ class CommonXssUnitTest extends DrupalUnitTestCase { function testEscaping() { $text = check_plain("<script>"); $this->assertEqual($text, '<script>', 'check_plain() escapes <script>'); + $text = check_plain('<>&"\''); + $this->assertEqual($text, '<>&"'', 'check_plain() escapes reserved HTML characters.'); } /** |