summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteven Wittens <steven@10.no-reply.drupal.org>2005-04-07 22:46:55 +0000
committerSteven Wittens <steven@10.no-reply.drupal.org>2005-04-07 22:46:55 +0000
commit0f41706acc230a03e241b302d46b552e801770d0 (patch)
tree330ccc004aed92b4f73af2fafc2f83f998f5c01e
parent7d3d5532dfb93d1ed9e04b64e2576312591f1f28 (diff)
downloadbrdo-0f41706acc230a03e241b302d46b552e801770d0.tar.gz
brdo-0f41706acc230a03e241b302d46b552e801770d0.tar.bz2
- #19874: Fix aggregator escaping after check_plain bug
- Fix bug in decode_entities() with double-escaped entities.
-rw-r--r--includes/common.inc25
-rw-r--r--modules/aggregator.module8
-rw-r--r--modules/aggregator/aggregator.module8
3 files changed, 24 insertions, 17 deletions
diff --git a/includes/common.inc b/includes/common.inc
index 15c8041d2..d6cd3f820 100644
--- a/includes/common.inc
+++ b/includes/common.inc
@@ -1736,6 +1736,7 @@ function mime_header_encode($string, $charset = 'UTF-8') {
/**
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
+ * Double-escaped entities will only be decoded once ("&amp;lt;" becomes "&lt;", not "<").
*
* @param $text
* The text to decode entities in.
@@ -1751,20 +1752,33 @@ function decode_entities($text, $exclude = array()) {
$table = array_flip(get_html_translation_table(HTML_ENTITIES));
// PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map('utf8_encode', $table);
+ // Add apostrophe (XML)
+ $table['&apos;'] = "'";
}
- $text = strtr($text, array_diff($table, $exclude));
+ $newtable = array_diff($table, $exclude);
- // Any remaining entities are numerical. Use a regexp to replace them.
- return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text);
+ // Use a regexp to select all entities in one pass, to avoid decoding double-escaped entities twice.
+ return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text);
}
/**
* Helper function for decode_entities
*/
-function _decode_entities($hex, $codepoint, $original, $exclude) {
- if ($hex != '') {
+function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
+ // Named entity
+ if (!$prefix) {
+ if (isset($table[$original])) {
+ return $table[$original];
+ }
+ else {
+ return $original;
+ }
+ }
+ // Hexadecimal numerical entity
+ if ($prefix == '#x') {
$codepoint = base_convert($codepoint, 16, 10);
}
+ // Encode codepoint as UTF-8 bytes
if ($codepoint < 0x80) {
$str = chr($codepoint);
}
@@ -1783,6 +1797,7 @@ function _decode_entities($hex, $codepoint, $original, $exclude) {
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
+ // Check for excluded characters
if (in_array($str, $exclude)) {
return $original;
}
diff --git a/modules/aggregator.module b/modules/aggregator.module
index 51acc9374..0d7601db3 100644
--- a/modules/aggregator.module
+++ b/modules/aggregator.module
@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
}
xml_parser_free($xml_parser);
- // initialize the translation table:
- $tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS));
- $tt['&apos;'] = "'";
-
/*
** We reverse the array such that we store the first item last,
** and the last item first. In the database, the newest item
@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
// Prepare the item:
foreach ($item as $key => $value) {
// TODO: Make handling of aggregated HTML more flexible/configurable.
- $value = strtr(trim($value), $tt);
+ $value = decode_entities(trim($value));
$value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>');
$value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value);
$value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value);
@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
$output .= " <div class=\"body\">\n";
$output .= ' <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n";
if ($item->description) {
- $output .= ' <div class="description">'. check_plain($item->description) ."</div>\n";
+ $output .= ' <div class="description">'. $item->description ."</div>\n";
}
if ($item->ftitle && $item->fid) {
$output .= ' <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n";
diff --git a/modules/aggregator/aggregator.module b/modules/aggregator/aggregator.module
index 51acc9374..0d7601db3 100644
--- a/modules/aggregator/aggregator.module
+++ b/modules/aggregator/aggregator.module
@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
}
xml_parser_free($xml_parser);
- // initialize the translation table:
- $tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS));
- $tt['&apos;'] = "'";
-
/*
** We reverse the array such that we store the first item last,
** and the last item first. In the database, the newest item
@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
// Prepare the item:
foreach ($item as $key => $value) {
// TODO: Make handling of aggregated HTML more flexible/configurable.
- $value = strtr(trim($value), $tt);
+ $value = decode_entities(trim($value));
$value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>');
$value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value);
$value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value);
@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
$output .= " <div class=\"body\">\n";
$output .= ' <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n";
if ($item->description) {
- $output .= ' <div class="description">'. check_plain($item->description) ."</div>\n";
+ $output .= ' <div class="description">'. $item->description ."</div>\n";
}
if ($item->ftitle && $item->fid) {
$output .= ' <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n";