diff options
author | Steven Wittens <steven@10.no-reply.drupal.org> | 2005-04-07 22:46:55 +0000 |
---|---|---|
committer | Steven Wittens <steven@10.no-reply.drupal.org> | 2005-04-07 22:46:55 +0000 |
commit | 0f41706acc230a03e241b302d46b552e801770d0 (patch) | |
tree | 330ccc004aed92b4f73af2fafc2f83f998f5c01e | |
parent | 7d3d5532dfb93d1ed9e04b64e2576312591f1f28 (diff) | |
download | brdo-0f41706acc230a03e241b302d46b552e801770d0.tar.gz brdo-0f41706acc230a03e241b302d46b552e801770d0.tar.bz2 |
- #19874: Fix aggregator escaping after check_plain bug
- Fix bug in decode_entities() with double-escaped entities.
-rw-r--r-- | includes/common.inc | 25 | ||||
-rw-r--r-- | modules/aggregator.module | 8 | ||||
-rw-r--r-- | modules/aggregator/aggregator.module | 8 |
3 files changed, 24 insertions, 17 deletions
diff --git a/includes/common.inc b/includes/common.inc index 15c8041d2..d6cd3f820 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -1736,6 +1736,7 @@ function mime_header_encode($string, $charset = 'UTF-8') { /** * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes. + * Double-escaped entities will only be decoded once ("&lt;" becomes "<", not "<"). * * @param $text * The text to decode entities in. @@ -1751,20 +1752,33 @@ function decode_entities($text, $exclude = array()) { $table = array_flip(get_html_translation_table(HTML_ENTITIES)); // PHP gives us ISO-8859-1 data, we need UTF-8. $table = array_map('utf8_encode', $table); + // Add apostrophe (XML) + $table['''] = "'"; } - $text = strtr($text, array_diff($table, $exclude)); + $newtable = array_diff($table, $exclude); - // Any remaining entities are numerical. Use a regexp to replace them. - return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text); + // Use a regexp to select all entities in one pass, to avoid decoding double-escaped entities twice. + return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text); } /** * Helper function for decode_entities */ -function _decode_entities($hex, $codepoint, $original, $exclude) { - if ($hex != '') { +function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) { + // Named entity + if (!$prefix) { + if (isset($table[$original])) { + return $table[$original]; + } + else { + return $original; + } + } + // Hexadecimal numerical entity + if ($prefix == '#x') { $codepoint = base_convert($codepoint, 16, 10); } + // Encode codepoint as UTF-8 bytes if ($codepoint < 0x80) { $str = chr($codepoint); } @@ -1783,6 +1797,7 @@ function _decode_entities($hex, $codepoint, $original, $exclude) { . chr(0x80 | (($codepoint >> 6) & 0x3F)) . chr(0x80 | ( $codepoint & 0x3F)); } + // Check for excluded characters if (in_array($str, $exclude)) { return $original; } diff --git a/modules/aggregator.module b/modules/aggregator.module index 51acc9374..0d7601db3 100644 --- a/modules/aggregator.module +++ b/modules/aggregator.module @@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) { } xml_parser_free($xml_parser); - // initialize the translation table: - $tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS)); - $tt['''] = "'"; - /* ** We reverse the array such that we store the first item last, ** and the last item first. In the database, the newest item @@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) { // Prepare the item: foreach ($item as $key => $value) { // TODO: Make handling of aggregated HTML more flexible/configurable. - $value = strtr(trim($value), $tt); + $value = decode_entities(trim($value)); $value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>'); $value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value); $value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value); @@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) { $output .= " <div class=\"body\">\n"; $output .= ' <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n"; if ($item->description) { - $output .= ' <div class="description">'. check_plain($item->description) ."</div>\n"; + $output .= ' <div class="description">'. $item->description ."</div>\n"; } if ($item->ftitle && $item->fid) { $output .= ' <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n"; diff --git a/modules/aggregator/aggregator.module b/modules/aggregator/aggregator.module index 51acc9374..0d7601db3 100644 --- a/modules/aggregator/aggregator.module +++ b/modules/aggregator/aggregator.module @@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) { } xml_parser_free($xml_parser); - // initialize the translation table: - $tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS)); - $tt['''] = "'"; - /* ** We reverse the array such that we store the first item last, ** and the last item first. In the database, the newest item @@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) { // Prepare the item: foreach ($item as $key => $value) { // TODO: Make handling of aggregated HTML more flexible/configurable. - $value = strtr(trim($value), $tt); + $value = decode_entities(trim($value)); $value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>'); $value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value); $value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value); @@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) { $output .= " <div class=\"body\">\n"; $output .= ' <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n"; if ($item->description) { - $output .= ' <div class="description">'. check_plain($item->description) ."</div>\n"; + $output .= ' <div class="description">'. $item->description ."</div>\n"; } if ($item->ftitle && $item->fid) { $output .= ' <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n"; |