diff options
author | Dries Buytaert <dries@buytaert.net> | 2010-09-02 15:56:10 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2010-09-02 15:56:10 +0000 |
commit | 679fdfcdd25ab9829834237f10f14bed41d99d3a (patch) | |
tree | 3c9f9e200d6d4608ad18b0472505293239086f89 | |
parent | fb300d1f967d17a10678582f46e82bd133d23aae (diff) | |
download | brdo-679fdfcdd25ab9829834237f10f14bed41d99d3a.tar.gz brdo-679fdfcdd25ab9829834237f10f14bed41d99d3a.tar.bz2 |
- Patch #161217 by sun, hingo: URL filter fixes and tests.
-rw-r--r-- | modules/filter/filter.module | 196 | ||||
-rw-r--r-- | modules/filter/filter.test | 383 | ||||
-rw-r--r-- | modules/filter/tests/filter.url-input.txt | 36 | ||||
-rw-r--r-- | modules/filter/tests/filter.url-output.txt | 36 | ||||
-rw-r--r-- | modules/simpletest/drupal_web_test_case.php | 36 |
5 files changed, 582 insertions, 105 deletions
diff --git a/modules/filter/filter.module b/modules/filter/filter.module index 0654cc926..6342d6443 100644 --- a/modules/filter/filter.module +++ b/modules/filter/filter.module @@ -1298,46 +1298,202 @@ function _filter_url_settings($form, &$form_state, $filter, $format, $defaults) } /** - * URL filter. Automatically converts text web addresses (URLs, e-mail addresses, - * ftp links, etc.) into hyperlinks. + * URL filter. Automatically converts text into hyperlinks. + * + * This filter identifies and makes clickable three types of "links". + * - URLs like http://example.com. + * - E-mail addresses like name@example.com. + * - Web addresses without the "http://" protocol defined, like www.example.com. + * Each type must be processed separately, as there is no one regular + * expression that could possibly match all of the cases in one pass. */ function _filter_url($text, $filter) { - // Pass length to regexp callback + // Tags to skip and not recurse into. + $ignore_tags = 'a|script|style|code|pre'; + + // Pass length to regexp callback. _filter_url_trim(NULL, $filter->settings['filter_url_length']); - $text = ' ' . $text . ' '; + // Create an array which contains the regexps for each type of link. + // The key to the regexp is the name of a function that is used as + // callback function to process matches of the regexp. The callback function + // is to return the replacement for the match. The array is used and + // matching/replacement done below inside some loops. + $tasks = array(); + + // Prepare protocols pattern for absolute URLs. + // check_url() will replace any bad protocols with HTTP, so we need to support + // the identical list. While '//' is technically optional for MAILTO only, + // we cannot cleanly differ between protocols here without hard-coding MAILTO, + // so '//' is optional for all protocols. + // @see filter_xss_bad_protocol() + $protocols = variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal', 'rtsp')); + $protocols = implode(':(?://)?|', $protocols) . ':(?://)?'; + + // Prepare domain name pattern. + // The ICANN seems to be on track towards accepting more diverse top level + // domains, so this pattern has been "future-proofed" to allow for TLDs + // of length 2-64. + $domain = '(?:[A-Za-z0-9._+-]+\.)?[A-Za-z]{2,64}\b'; + $ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}'; + $auth = '[a-zA-Z0-9:%_+*~#?&=.,/;-]+@'; + $trail = '[a-zA-Z0-9:%_+*~#&\[\]=/;?\.,-]*[a-zA-Z0-9:%_+*~#&\[\]=/;-]'; + + // Prepare pattern for optional trailing punctuation. + // Even these characters could have a valid meaning for the URL, such usage is + // rare compared to using a URL at the end of or within a sentence, so these + // trailing characters are optionally excluded. + $punctuation = '[\.,?!]*?'; // Match absolute URLs. - $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text); + $url_pattern = "(?:$auth)?(?:$domain|$ip)/?(?:$trail)?"; + $pattern = "`((?:$protocols)(?:$url_pattern))($punctuation)`"; + $tasks['_filter_url_parse_full_links'] = $pattern; // Match e-mail addresses. - $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text); + $url_pattern = "[A-Za-z0-9._-]+@(?:$domain)"; + $pattern = "`($url_pattern)`"; + $tasks['_filter_url_parse_email_links'] = $pattern; + + // Match www domains. + $url_pattern = "www\.(?:$domain)/?(?:$trail)?"; + $pattern = "`($url_pattern)($punctuation)`"; + $tasks['_filter_url_parse_partial_links'] = $pattern; + + // Each type of URL needs to be processed separately. The text is joined and + // re-split after each task, since all injected HTML tags must be correctly + // protected before the next task. + foreach ($tasks as $task => $pattern) { + // HTML comments need to be handled separately, as they may contain HTML + // markup, especially a '>'. Therefore, remove all comment contents and add + // them back later. + _filter_url_escape_comments('', TRUE); + $text = preg_replace_callback('`<!--(.*?)-->`s', '_filter_url_escape_comments', $text); + + // Split at all tags; ensures that no tags or attributes are processed. + $chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // PHP ensures that the array consists of alternating delimiters and + // literals, and begins and ends with a literal (inserting NULL as + // required). Therefore, the first chunk is always text: + $chunk_type = 'text'; + // If a tag of $ignore_tags is found, it is stored in $open_tag and only + // removed when the closing tag is found. Until the closing tag is found, + // no replacements are made. + $open_tag = ''; + + for ($i = 0; $i < count($chunks); $i++) { + if ($chunk_type == 'text') { + // Only process this text if there are no unclosed $ignore_tags. + if ($open_tag == '') { + // If there is a match, inject a link into this chunk via the callback + // function contained in $task. + $chunks[$i] = preg_replace_callback($pattern, $task, $chunks[$i]); + } + // Text chunk is done, so next chunk must be a tag. + $chunk_type = 'tag'; + } + else { + // Only process this tag if there are no unclosed $ignore_tags. + if ($open_tag == '') { + // Check whether this tag is contained in $ignore_tags. + if (preg_match("`<($ignore_tags)(?:\s|>)`i", $chunks[$i], $matches)) { + $open_tag = $matches[1]; + } + } + // Otherwise, check whether this is the closing tag for $open_tag. + else { + if (preg_match("`<\/$open_tag>`i", $chunks[$i], $matches)) { + $open_tag = ''; + } + } + // Tag chunk is done, so next chunk must be text. + $chunk_type = 'text'; + } + } - // Match www domains/addresses. - $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text); - $text = substr($text, 1, -1); + $text = implode($chunks); + // Revert back to the original comment contents + _filter_url_escape_comments('', FALSE); + $text = preg_replace_callback('`<!--(.*?)-->`', '_filter_url_escape_comments', $text); + } return $text; } /** - * Make links out of absolute URLs. + * preg_replace callback to make links out of absolute URLs. */ function _filter_url_parse_full_links($match) { - $match[2] = decode_entities($match[2]); - $caption = check_plain(_filter_url_trim($match[2])); - $match[2] = check_url($match[2]); - return $match[1] . '<a href="' . $match[2] . '">' . $caption . '</a>' . $match[5]; + // The $i:th parenthesis in the regexp contains the URL. + $i = 1; + + $match[$i] = decode_entities($match[$i]); + $caption = check_plain(_filter_url_trim($match[$i])); + $match[$i] = check_plain($match[$i]); + return '<a href="' . $match[$i] . '">' . $caption . '</a>' . $match[$i + 1]; +} + +/** + * preg_replace callback to make links out of e-mail addresses. + */ +function _filter_url_parse_email_links($match) { + // The $i:th parenthesis in the regexp contains the URL. + $i = 0; + + $match[$i] = decode_entities($match[$i]); + $caption = check_plain(_filter_url_trim($match[$i])); + $match[$i] = check_plain($match[$i]); + return '<a href="mailto:' . $match[$i] . '">' . $caption . '</a>'; } /** - * Make links out of domain names starting with "www." + * preg_replace callback to make links out of domain names starting with "www." */ function _filter_url_parse_partial_links($match) { - $match[2] = decode_entities($match[2]); - $caption = check_plain(_filter_url_trim($match[2])); - $match[2] = check_plain($match[2]); - return $match[1] . '<a href="http://' . $match[2] . '">' . $caption . '</a>' . $match[3]; + // The $i:th parenthesis in the regexp contains the URL. + $i = 1; + + $match[$i] = decode_entities($match[$i]); + $caption = check_plain(_filter_url_trim($match[$i])); + $match[$i] = check_plain($match[$i]); + return '<a href="http://' . $match[$i] . '">' . $caption . '</a>' . $match[$i + 1]; +} + +/** + * preg_replace callback to escape contents of HTML comments + * + * @param $match + * An array containing matches to replace from preg_replace_callback(), + * whereas $match[1] is expected to contain the content to be filtered. + * @param $escape + * (optional) Boolean whether to escape (TRUE) or unescape comments (FALSE). + * Defaults to neither. If TRUE, statically cached $comments are reset. + */ +function _filter_url_escape_comments($match, $escape = NULL) { + static $mode, $comments = array(); + + if (isset($escape)) { + $mode = $escape; + if ($escape){ + $comments = array(); + } + return; + } + + // Replace all HTML coments with a '<!-- [hash] -->' placeholder. + if ($mode) { + $content = $match[1]; + $hash = md5($content); + $comments[$hash] = $content; + return "<!-- $hash -->"; + } + // Or replace placeholders with actual comment contents. + else { + $hash = $match[1]; + $hash = trim($hash); + $content = $comments[$hash]; + return "<!--$content-->"; + } } /** @@ -1350,7 +1506,7 @@ function _filter_url_trim($text, $length = NULL) { } // Use +3 for '...' string length. - if (strlen($text) > $_length + 3) { + if ($_length && strlen($text) > $_length + 3) { $text = substr($text, 0, $_length) . '...'; } diff --git a/modules/filter/filter.test b/modules/filter/filter.test index 113ed53d5..6b80aa35e 100644 --- a/modules/filter/filter.test +++ b/modules/filter/filter.test @@ -1030,84 +1030,333 @@ class FilterUnitTestCase extends DrupalUnitTestCase { } /** - * Test the URL filter. + * Tests the URL filter. */ function testUrlFilter() { // Setup dummy filter object. - $filter = new stdClass(); + $filter = new stdClass; $filter->settings = array( 'filter_url_length' => 496, ); - - // Converting URLs. - $f = _filter_url('http://www.example.com/', $filter); - $this->assertEqual($f, '<a href="http://www.example.com/">http://www.example.com/</a>', t('Converting URLs.')); - - $f = _filter_url('http://www.example.com/?a=1&b=2', $filter); - $this->assertEqual($f, '<a href="http://www.example.com/?a=1&b=2">http://www.example.com/?a=1&b=2</a>', t('Converting URLs -- ampersands.')); - - $f = _filter_url('ftp://user:pass@ftp.example.com/dir1/dir2', $filter); - $this->assertEqual($f, '<a href="ftp://user:pass@ftp.example.com/dir1/dir2">ftp://user:pass@ftp.example.com/dir1/dir2</a>', t('Converting URLs -- FTP scheme.')); - - // Converting domain names. - $f = _filter_url('www.example.com', $filter); - $this->assertEqual($f, '<a href="http://www.example.com">www.example.com</a>', t('Converting domain names.')); - - $f = _filter_url('<li>www.example.com</li>', $filter); - $this->assertEqual($f, '<li><a href="http://www.example.com">www.example.com</a></li>', t('Converting domain names -- domain in a list.')); - - $f = _filter_url('(www.example.com/dir?a=1&b=2#a)', $filter); - $this->assertEqual($f, '(<a href="http://www.example.com/dir?a=1&b=2#a">www.example.com/dir?a=1&b=2#a</a>)', t('Converting domain names -- domain in parentheses.')); - - // Converting e-mail addresses. - $f = _filter_url('johndoe@example.com', $filter); - $this->assertEqual($f, '<a href="mailto:johndoe@example.com">johndoe@example.com</a>', t('Converting e-mail addresses.')); - - $f = _filter_url('aaa@sub.tv', $filter); - $this->assertEqual($f, '<a href="mailto:aaa@sub.tv">aaa@sub.tv</a>', t('Converting e-mail addresses -- a short e-mail from Tuvalu.')); + // @todo Possible categories: + // - absolute, mail, partial + // - characters/encoding, surrounding markup, security + + // Filter selection/pattern matching. + $tests = array( + // HTTP URLs. + ' +http://example.com or www.example.com +' => array( + '<a href="http://example.com">http://example.com</a>' => TRUE, + '<a href="http://www.example.com">www.example.com</a>' => TRUE, + ), + // MAILTO URLs. + ' +person@example.com or mailto:person2@example.com +' => array( + '<a href="mailto:person@example.com">person@example.com</a>' => TRUE, + '<a href="mailto:person2@example.com">mailto:person2@example.com</a>' => TRUE, + ), + // URI parts. + ' +http://trailingslash.com/ or www.trailingslash.com/ +http://host.com/some/path?query=foo&bar[baz]=beer#fragment or www.host.com/some/path?query=foo&bar[baz]=beer#fragment +ftp://user:pass@ftp.example.com/~home/dir1 +sftp://user@nonstandardport:222/dir +ssh://192.168.0.100/srv/git/drupal.git +' => array( + '<a href="http://trailingslash.com/">http://trailingslash.com/</a>' => TRUE, + '<a href="http://www.trailingslash.com/">www.trailingslash.com/</a>' => TRUE, + '<a href="http://host.com/some/path?query=foo&bar[baz]=beer#fragment">http://host.com/some/path?query=foo&bar[baz]=beer#fragment</a>' => TRUE, + '<a href="http://www.host.com/some/path?query=foo&bar[baz]=beer#fragment">www.host.com/some/path?query=foo&bar[baz]=beer#fragment</a>' => TRUE, + '<a href="ftp://user:pass@ftp.example.com/~home/dir1">ftp://user:pass@ftp.example.com/~home/dir1</a>' => TRUE, + '<a href="sftp://user@nonstandardport:222/dir">sftp://user@nonstandardport:222/dir</a>' => TRUE, + '<a href="ssh://192.168.0.100/srv/git/drupal.git">ssh://192.168.0.100/srv/git/drupal.git</a>' => TRUE, + ), + // Encoding. + ' +http://ampersand.com/?a=1&b=2 +http://encoded.com/?a=1&b=2 +' => array( + '<a href="http://ampersand.com/?a=1&b=2">http://ampersand.com/?a=1&b=2</a>' => TRUE, + '<a href="http://encoded.com/?a=1&b=2">http://encoded.com/?a=1&b=2</a>' => TRUE, + ), + // Domain name length. + ' +www.ex.ex or www.example.example or www.toolongdomainexampledomainexampledomainexampledomainexampledomain or +me@me.tv +' => array( + '<a href="http://www.ex.ex">www.ex.ex</a>' => TRUE, + '<a href="http://www.example.example">www.example.example</a>' => TRUE, + 'http://www.toolong' => FALSE, + '<a href="mailto:me@me.tv">me@me.tv</a>' => TRUE, + ), + // Absolute URL protocols. + // The list to test is found in the beginning of _filter_url() at + // $protocols = variable_get('filter_allowed_protocols'... (approx line 1325). + ' +https://example.com, +ftp://ftp.example.com, +news://example.net, +telnet://example, +irc://example.host, +ssh://odd.geek, +sftp://secure.host?, +webcal://calendar, +rtsp://127.0.0.1, +not foo://disallowed.com. +' => array( + 'href="https://example.com"' => TRUE, + 'href="ftp://ftp.example.com"' => TRUE, + 'href="news://example.net"' => TRUE, + 'href="telnet://example"' => TRUE, + 'href="irc://example.host"' => TRUE, + 'href="ssh://odd.geek"' => TRUE, + 'href="sftp://secure.host"' => TRUE, + 'href="webcal://calendar"' => TRUE, + 'href="rtsp://127.0.0.1"' => TRUE, + 'href="foo://disallowed.com"' => FALSE, + 'not foo://disallowed.com.' => TRUE, + ), + ); + $this->assertFilteredString($filter, $tests); + + // Surrounding text/punctuation. + $tests = array( + ' +Partial URL with trailing period www.partial.com. +E-mail with trailing comma person@example.com, +Absolute URL with trailing question http://www.absolute.com? +Query string with trailing exclamation www.query.com/index.php?a=! +Partial URL with 3 trailing www.partial.periods... +E-mail with 3 trailing exclamations@example.com!!! +Absolute URL and query string with 2 different punctuation characters (http://www.example.com/q=abc). +' => array( + 'period <a href="http://www.partial.com">www.partial.com</a>.' => TRUE, + 'comma <a href="mailto:person@example.com">person@example.com</a>,' => TRUE, + 'question <a href="http://www.absolute.com">http://www.absolute.com</a>?' => TRUE, + 'exclamation <a href="http://www.query.com/index.php?a=">www.query.com/index.php?a=</a>!' => TRUE, + 'trailing <a href="http://www.partial.periods">www.partial.periods</a>...' => TRUE, + 'trailing <a href="mailto:exclamations@example.com">exclamations@example.com</a>!!!' => TRUE, + 'characters (<a href="http://www.example.com/q=abc">http://www.example.com/q=abc</a>).' => TRUE, + ), + ' +(www.parenthesis.com/dir?a=1&b=2#a) +' => array( + '(<a href="http://www.parenthesis.com/dir?a=1&b=2#a">www.parenthesis.com/dir?a=1&b=2#a</a>)' => TRUE, + ), + ); + $this->assertFilteredString($filter, $tests); + + // Surrounding markup. + $tests = array( + ' +<p xmlns="www.namespace.com" /> +<p xmlns="http://namespace.com"> +An <a href="http://example.com" title="Read more at www.example.info...">anchor</a>. +</p> +' => array( + '<p xmlns="www.namespace.com" />' => TRUE, + '<p xmlns="http://namespace.com">' => TRUE, + 'href="http://www.namespace.com"' => FALSE, + 'href="http://namespace.com"' => FALSE, + 'An <a href="http://example.com" title="Read more at www.example.info...">anchor</a>.' => TRUE, + ), + ' +Not <a href="foo">www.relative.com</a> or <a href="http://absolute.com">www.absolute.com</a> +but <strong>http://www.strong.net</strong> or <em>www.emphasis.info</em> +' => array( + '<a href="foo">www.relative.com</a>' => TRUE, + 'href="http://www.relative.com"' => FALSE, + '<a href="http://absolute.com">www.absolute.com</a>' => TRUE, + '<strong><a href="http://www.strong.net">http://www.strong.net</a></strong>' => TRUE, + '<em><a href="http://www.emphasis.info">www.emphasis.info</a></em>' => TRUE, + ), + ' +Test <code>using www.example.com the code tag</code>. +' => array( + 'href' => FALSE, + 'http' => FALSE, + ), + ' +Intro. +<blockquote> +Quoted text linking to www.example.com, written by person@example.com, originating from http://origin.example.com. <code>@see www.usage.example.com or <em>www.example.info</em> bla bla</code>. +</blockquote> + +Outro. +' => array( + 'href="http://www.example.com"' => TRUE, + 'href="mailto:person@example.com"' => TRUE, + 'href="http://origin.example.com"' => TRUE, + 'http://www.usage.example.com' => FALSE, + 'http://www.example.info' => FALSE, + 'Intro.' => TRUE, + 'Outro.' => TRUE, + ), + ' +Unknown tag <x>containing x and www.example.com</x>? And a tag <pooh>beginning with p and containing www.example.pooh with p?</pooh> +' => array( + 'href="http://www.example.com"' => TRUE, + 'href="http://www.example.pooh"' => TRUE, + ), + ' +<p>Test <br/>: This is a www.example17.com example <strong>with</strong> various http://www.example18.com tags. *<br/> + It is important www.example19.com to *<br/>test different URLs and http://www.example20.com in the same paragraph. *<br> +HTML www.example21.com soup by person@example22.com can litererally http://www.example23.com contain *img*<img> anything. Just a www.example24.com with http://www.example25.com thrown in. www.example26.com from person@example27.com with extra http://www.example28.com. +' => array( + 'href="http://www.example17.com"' => TRUE, + 'href="http://www.example18.com"' => TRUE, + 'href="http://www.example19.com"' => TRUE, + 'href="http://www.example20.com"' => TRUE, + 'href="http://www.example21.com"' => TRUE, + 'href="mailto:person@example22.com"' => TRUE, + 'href="http://www.example23.com"' => TRUE, + 'href="http://www.example24.com"' => TRUE, + 'href="http://www.example25.com"' => TRUE, + 'href="http://www.example26.com"' => TRUE, + 'href="mailto:person@example27.com"' => TRUE, + 'href="http://www.example28.com"' => TRUE, + ), + ' +<script> +<!-- + // @see www.example.com + var exampleurl = "http://example.net"; +--> +<!--//--><![CDATA[//><!-- + // @see www.example.com + var exampleurl = "http://example.net"; +//--><!]]> +</script> +' => array( + 'href="http://www.example.com"' => FALSE, + 'href="http://example.net"' => FALSE, + ), + ' +<style>body { + background: url(http://example.com/pixel.gif); +}</style> +' => array( + 'href' => FALSE, + ), + ' +<!-- Skip any URLs like www.example.com in comments --> +' => array( + 'href' => FALSE, + ), + ' +<!-- Skip any URLs like +www.example.com with a newline in comments --> +' => array( + 'href' => FALSE, + ), + ' +<!-- Skip any URLs like www.comment.com in comments. <p>Also ignore http://commented.out/markup.</p> --> +' => array( + 'href' => FALSE, + ), + ' +<dl> +<dt>www.example.com</dt> +<dd>http://example.com</dd> +<dd>person@example.com</dd> +<dt>Check www.example.net</dt> +<dd>Some text around http://www.example.info by person@example.info?</dd> +</dl> +' => array( + 'href="http://www.example.com"' => TRUE, + 'href="http://example.com"' => TRUE, + 'href="mailto:person@example.com"' => TRUE, + 'href="http://www.example.net"' => TRUE, + 'href="http://www.example.info"' => TRUE, + 'href="mailto:person@example.info"' => TRUE, + ), + ); + $this->assertFilteredString($filter, $tests); // URL trimming. - $filter->settings['filter_url_length'] = 28; - - $f = _filter_url('http://www.example.com/d/ff.ext?a=1&b=2#a1', $filter); - $this->assertNormalized($f, 'http://www.example.com/d/ff....', t('URL trimming.')); - - // Not breaking existing links. - $f = _filter_url('<a href="http://www.example.com">www.example.com</a>', $filter); - $this->assertEqual($f, '<a href="http://www.example.com">www.example.com</a>', t('Converting URLs -- do not break existing links.')); - - $f = _filter_url('<a href="foo">http://www.example.com</a>', $filter); - $this->assertEqual($f, '<a href="foo">http://www.example.com</a>', t('Converting URLs -- do not break existing, relative links.')); - - // Addresses within some tags such as code or script should not be converted. - $f = _filter_url('<code>http://www.example.com</code>', $filter); - $this->assertEqual($f, '<code>http://www.example.com</code>', t('Converting URLs -- skip code contents.')); - - $f = _filter_url('<code><em>http://www.example.com</em></code>', $filter); - $this->assertEqual($f, '<code><em>http://www.example.com</em></code>', t('Converting URLs -- really skip code contents.')); - - $f = _filter_url('<script>http://www.example.com</script>', $filter); - $this->assertEqual($f, '<script>http://www.example.com</script>', t('Converting URLs -- do not process scripts.')); - - // Addresses in attributes should not be converted. - $f = _filter_url('<p xmlns="http://www.example.com" />', $filter); - $this->assertEqual($f, '<p xmlns="http://www.example.com" />', t('Converting URLs -- do not convert addresses in attributes.')); - - $f = _filter_url('<a title="Go to www.example.com" href="http://www.example.com">text</a>', $filter); - $this->assertEqual($f, '<a title="Go to www.example.com" href="http://www.example.com">text</a>', t('Converting URLs -- do not break existing links with custom title attribute.')); + $filter->settings['filter_url_length'] = 20; + $tests = array( + 'www.trimmed.com/d/ff.ext?a=1&b=2#a1' => array( + '<a href="http://www.trimmed.com/d/ff.ext?a=1&b=2#a1">www.trimmed.com/d/ff...</a>' => TRUE, + ), + ); + $this->assertFilteredString($filter, $tests); + } - // Even though a dot at the end of a URL can indicate a fully qualified - // domain name, such usage is rare compared to using a link at the end - // of a sentence, so remove the dot from the link. - // @todo It can also be used at the end of a filename or a query string. - $f = _filter_url('www.example.com.', $filter); - $this->assertEqual($f, '<a href="http://www.example.com">www.example.com</a>.', t('Converting URLs -- do not recognize a dot at the end of a domain name (FQDNs).')); + /** + * Asserts multiple filter output expectations for multiple input strings. + * + * @param $filter + * A input filter object. + * @param $tests + * An associative array, whereas each key is an arbitrary input string and + * each value is again an associative array whose keys are filter output + * strings and whose values are Booleans indicating whether the output is + * expected or not. + * + * For example: + * @code + * $tests = array( + * 'Input string' => array( + * '<p>Input string</p>' => TRUE, + * 'Input string<br' => FALSE, + * ), + * ); + * @endcode + */ + protected function assertFilteredString($filter, $tests) { + foreach ($tests as $phrase => $tasks) { + $string = _filter_url($phrase, $filter); + foreach ($tasks as $value => $expected) { + // Not using assertIdentical, since combination with strpos() is hard to grok. + if ($expected) { + $this->assertTrue(strpos($string, $value) !== FALSE, t('@string: @value found.', array( + '@string' => var_export($phrase, TRUE), + '@value' => var_export($value, TRUE), + ))); + } + else { + $this->assertTrue(strpos($string, $value) === FALSE, t('@string: @value not found.', array( + '@string' => var_export($phrase, TRUE), + '@value' => var_export($value, TRUE), + ))); + } + } + } + } - $f = _filter_url('http://www.example.com.', $filter); - $this->assertEqual($f, '<a href="http://www.example.com">http://www.example.com</a>.', t('Converting URLs -- do not recognize a dot at the end of an URL (FQDNs).')); + /** + * Tests URL filter on longer content. + * + * Filters based on regular expressions should also be tested with a more + * complex content than just isolated test lines. + * The most common errors are: + * - accidental '*' (greedy) match instead of '*?' (minimal) match. + * - only matching first occurrence instead of all. + * - newlines not matching '.*'. + * + * This test covers: + * - Document with multiple newlines and paragraphs (two newlines). + * - Mix of several HTML tags, invalid non-HTML tags, tags to ignore and HTML + * comments. + * - Empty HTML tags (BR, IMG). + * - Mix of absolute and partial URLs, and e-mail addresses in one content. + */ + function testUrlFilterContent() { + // Setup dummy filter object. + $filter = new stdClass; + $filter->settings = array( + 'filter_url_length' => 496, + ); + $path = drupal_get_path('module', 'filter') . '/tests'; - $f = _filter_url('www.example.com/index.php?a=.', $filter); - $this->assertEqual($f, '<a href="http://www.example.com/index.php?a=">www.example.com/index.php?a=</a>.', t('Converting URLs -- do forget about a dot at the end of a query string.')); + $input = file_get_contents($path . '/filter.url-input.txt'); + $expected = file_get_contents($path . '/filter.url-output.txt'); + $result = _filter_url($input, $filter); + $this->assertIdentical($result, $expected, 'Complex HTML document was correctly processed.'); } /** diff --git a/modules/filter/tests/filter.url-input.txt b/modules/filter/tests/filter.url-input.txt new file mode 100644 index 000000000..7b33af56c --- /dev/null +++ b/modules/filter/tests/filter.url-input.txt @@ -0,0 +1,36 @@ +This is just a www.test.com. paragraph with person@test.com. some http://www.test.com. urls thrown in and also <code>using www.test.com the code tag</code>. + +<blockquote> +This is just a www.test.com. paragraph with person@test.com. some http://www.test.com. urls thrown in and also <code>using www.test.com the code tag</code>. +</blockquote> + +<code>Testing code tag http://www.test.com abc</code> + +http://www.test.com +www.test.com +person@test.com +<code>www.test.com</code> + +What about tags that don't exist <x>like x say www.test.com</x>? And what about tag <pooh>beginning www.test.com with p?</pooh> + +Test <br/>: This is just a www.test.com. paragraph <strong>with</strong> some http://www.test.com urls thrown in. *<br/> This is just a www.test.com paragraph *<br/> with some http://www.test.com urls thrown in. *<br/>This is just a www.test.com paragraph person@test.com with some http://www.test.com urls *img*<img/> thrown in. This is just a www.test.com paragraph with some http://www.test.com urls thrown in. This is just a www.test.com paragraph person@test.com with some http://www.test.com urls thrown in. + +This is just a www.test.com paragraph <strong>with</strong> some http://www.test.com urls thrown in. <br /> This is just a www.test.com paragraph with some http://www.test.com urls thrown in. This is just a www.test.com paragraph person@test.com with some http://www.test.com urls thrown in. This is just a www.test.com paragraph with some http://www.test.com urls thrown in. This is just a www.test.com paragraph person@test.com with some http://www.test.com urls thrown in. + +The old URL filter has problems with <a title="kind of link www.example.com with text" href="http://www.example.com">this kind of link</a> with www address as part of text in title. www.test.com + +<!-- This url www.test.com is inside a comment --> + +<dl> +<dt>www.test.com</dt> +<dd>http://www.test.com</dd> +<dd>person@test.com</dd> +<dt>check www.test.com</dt> +<dd>this with some text around: http://www.test.com not so easy person@test.com now?</dd> +</dl> + +<!-- <p>This url http://www.test.com is + inside a comment containing newlines and +<em>html</em> tags.</p> --> + +This is the end!
\ No newline at end of file diff --git a/modules/filter/tests/filter.url-output.txt b/modules/filter/tests/filter.url-output.txt new file mode 100644 index 000000000..9cc507308 --- /dev/null +++ b/modules/filter/tests/filter.url-output.txt @@ -0,0 +1,36 @@ +This is just a <a href="http://www.test.com">www.test.com</a>. paragraph with <a href="mailto:person@test.com">person@test.com</a>. some <a href="http://www.test.com">http://www.test.com</a>. urls thrown in and also <code>using www.test.com the code tag</code>. + +<blockquote> +This is just a <a href="http://www.test.com">www.test.com</a>. paragraph with <a href="mailto:person@test.com">person@test.com</a>. some <a href="http://www.test.com">http://www.test.com</a>. urls thrown in and also <code>using www.test.com the code tag</code>. +</blockquote> + +<code>Testing code tag http://www.test.com abc</code> + +<a href="http://www.test.com">http://www.test.com</a> +<a href="http://www.test.com">www.test.com</a> +<a href="mailto:person@test.com">person@test.com</a> +<code>www.test.com</code> + +What about tags that don't exist <x>like x say <a href="http://www.test.com">www.test.com</a></x>? And what about tag <pooh>beginning <a href="http://www.test.com">www.test.com</a> with p?</pooh> + +Test <br/>: This is just a <a href="http://www.test.com">www.test.com</a>. paragraph <strong>with</strong> some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. *<br/> This is just a <a href="http://www.test.com">www.test.com</a> paragraph *<br/> with some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. *<br/>This is just a <a href="http://www.test.com">www.test.com</a> paragraph <a href="mailto:person@test.com">person@test.com</a> with some <a href="http://www.test.com">http://www.test.com</a> urls *img*<img/> thrown in. This is just a <a href="http://www.test.com">www.test.com</a> paragraph with some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. This is just a <a href="http://www.test.com">www.test.com</a> paragraph <a href="mailto:person@test.com">person@test.com</a> with some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. + +This is just a <a href="http://www.test.com">www.test.com</a> paragraph <strong>with</strong> some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. <br /> This is just a <a href="http://www.test.com">www.test.com</a> paragraph with some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. This is just a <a href="http://www.test.com">www.test.com</a> paragraph <a href="mailto:person@test.com">person@test.com</a> with some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. This is just a <a href="http://www.test.com">www.test.com</a> paragraph with some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. This is just a <a href="http://www.test.com">www.test.com</a> paragraph <a href="mailto:person@test.com">person@test.com</a> with some <a href="http://www.test.com">http://www.test.com</a> urls thrown in. + +The old URL filter has problems with <a title="kind of link www.example.com with text" href="http://www.example.com">this kind of link</a> with www address as part of text in title. <a href="http://www.test.com">www.test.com</a> + +<!-- This url www.test.com is inside a comment --> + +<dl> +<dt><a href="http://www.test.com">www.test.com</a></dt> +<dd><a href="http://www.test.com">http://www.test.com</a></dd> +<dd><a href="mailto:person@test.com">person@test.com</a></dd> +<dt>check <a href="http://www.test.com">www.test.com</a></dt> +<dd>this with some text around: <a href="http://www.test.com">http://www.test.com</a> not so easy <a href="mailto:person@test.com">person@test.com</a> now?</dd> +</dl> + +<!-- <p>This url http://www.test.com is + inside a comment containing newlines and +<em>html</em> tags.</p> --> + +This is the end!
\ No newline at end of file diff --git a/modules/simpletest/drupal_web_test_case.php b/modules/simpletest/drupal_web_test_case.php index 981554d97..945cbe9d0 100644 --- a/modules/simpletest/drupal_web_test_case.php +++ b/modules/simpletest/drupal_web_test_case.php @@ -412,6 +412,24 @@ abstract class DrupalTestCase { } /** + * Logs verbose message in a text file. + * + * The a link to the vebose message will be placed in the test results via + * as a passing assertion with the text '[verbose message]'. + * + * @param $message + * The verbose message to be stored. + * + * @see simpletest_verbose() + */ + protected function verbose($message) { + if ($id = simpletest_verbose($message)) { + $url = file_create_url($this->originalFileDirectory . '/simpletest/verbose/' . get_class($this) . '-' . $id . '.html'); + $this->error(l(t('Verbose message'), $url, array('attributes' => array('target' => '_blank'))), 'User notice'); + } + } + + /** * Run all tests in this class. */ public function run() { @@ -3071,24 +3089,6 @@ class DrupalWebTestCase extends DrupalTestCase { $this->verbose(t('Email:') . '<pre>' . print_r($mail, TRUE) . '</pre>'); } } - - /** - * Logs verbose message in a text file. - * - * The a link to the vebose message will be placed in the test results via - * as a passing assertion with the text '[verbose message]'. - * - * @param $message - * The verbose message to be stored. - * - * @see simpletest_verbose() - */ - protected function verbose($message) { - if ($id = simpletest_verbose($message)) { - $url = file_create_url($this->originalFileDirectory . '/simpletest/verbose/' . get_class($this) . '-' . $id . '.html'); - $this->error(l(t('Verbose message'), $url, array('attributes' => array('target' => '_blank'))), 'User notice'); - } - } } /** |