- #19874: Fix aggregator escaping after check_plain bug

- Fix bug in decode_entities() with double-escaped entities.
author: Steven Wittens <steven@10.no-reply.drupal.org> 2005-04-07 22:46:55 +0000
committer: Steven Wittens <steven@10.no-reply.drupal.org> 2005-04-07 22:46:55 +0000
commit: 0f41706acc230a03e241b302d46b552e801770d0 (patch)
tree: 330ccc004aed92b4f73af2fafc2f83f998f5c01e
parent: 7d3d5532dfb93d1ed9e04b64e2576312591f1f28 (diff)
download: brdo-0f41706acc230a03e241b302d46b552e801770d0.tar.gz
brdo-0f41706acc230a03e241b302d46b552e801770d0.tar.bz2
3 files changed, 24 insertions, 17 deletions
diff --git a/includes/common.inc b/includes/common.inc
index 15c8041d2..d6cd3f820 100644
--- a/includes/common.inc
+++ b/includes/common.inc
@@ -1736,6 +1736,7 @@ function mime_header_encode($string, $charset = 'UTF-8') {
 
 /**
  * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
+ * Double-escaped entities will only be decoded once ("&amp;lt;" becomes "&lt;", not "<").
  *
  * @param $text
  *   The text to decode entities in.
@@ -1751,20 +1752,33 @@ function decode_entities($text, $exclude = array()) {
     $table = array_flip(get_html_translation_table(HTML_ENTITIES));
     // PHP gives us ISO-8859-1 data, we need UTF-8.
     $table = array_map('utf8_encode', $table);
+    // Add apostrophe (XML)
+    $table['&apos;'] = "'";
   }
-  $text = strtr($text, array_diff($table, $exclude));
+  $newtable = array_diff($table, $exclude);
 
-  // Any remaining entities are numerical. Use a regexp to replace them.
-  return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text);
+  // Use a regexp to select all entities in one pass, to avoid decoding double-escaped entities twice.
+  return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text);
 }
 
 /**
  * Helper function for decode_entities
  */
-function _decode_entities($hex, $codepoint, $original, $exclude) {
-  if ($hex != '') {
+function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
+  // Named entity
+  if (!$prefix) {
+    if (isset($table[$original])) {
+      return $table[$original];
+    }
+    else {
+      return $original;
+    }
+  }
+  // Hexadecimal numerical entity
+  if ($prefix == '#x') {
     $codepoint = base_convert($codepoint, 16, 10);
   }
+  // Encode codepoint as UTF-8 bytes
   if ($codepoint < 0x80) {
     $str = chr($codepoint);
   }
@@ -1783,6 +1797,7 @@ function _decode_entities($hex, $codepoint, $original, $exclude) {
          . chr(0x80 | (($codepoint >> 6)  & 0x3F))
          . chr(0x80 | ( $codepoint        & 0x3F));
   }
+  // Check for excluded characters
   if (in_array($str, $exclude)) {
     return $original;
   }
diff --git a/modules/aggregator.module b/modules/aggregator.module
index 51acc9374..0d7601db3 100644
--- a/modules/aggregator.module
+++ b/modules/aggregator.module
@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
   }
   xml_parser_free($xml_parser);
 
-  // initialize the translation table:
-  $tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS));
-  $tt['&apos;'] = "'";
-
   /*
   ** We reverse the array such that we store the first item last,
   ** and the last item first.  In the database, the newest item
@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
     // Prepare the item:
     foreach ($item as $key => $value) {
       // TODO: Make handling of aggregated HTML more flexible/configurable.
-      $value = strtr(trim($value), $tt);
+      $value = decode_entities(trim($value));
       $value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>');
       $value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value);
       $value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value);
@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
   $output .= " <div class=\"body\">\n";
   $output .= '  <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n";
   if ($item->description) {
-    $output .= '  <div class="description">'. check_plain($item->description) ."</div>\n";
+    $output .= '  <div class="description">'. $item->description ."</div>\n";
   }
   if ($item->ftitle && $item->fid) {
     $output .= '  <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n";
diff --git a/modules/aggregator/aggregator.module b/modules/aggregator/aggregator.module
index 51acc9374..0d7601db3 100644
--- a/modules/aggregator/aggregator.module
+++ b/modules/aggregator/aggregator.module
@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
   }
   xml_parser_free($xml_parser);
 
-  // initialize the translation table:
-  $tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS));
-  $tt['&apos;'] = "'";
-
   /*
   ** We reverse the array such that we store the first item last,
   ** and the last item first.  In the database, the newest item
@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
     // Prepare the item:
     foreach ($item as $key => $value) {
       // TODO: Make handling of aggregated HTML more flexible/configurable.
-      $value = strtr(trim($value), $tt);
+      $value = decode_entities(trim($value));
       $value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>');
       $value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value);
       $value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value);
@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
   $output .= " <div class=\"body\">\n";
   $output .= '  <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n";
   if ($item->description) {
-    $output .= '  <div class="description">'. check_plain($item->description) ."</div>\n";
+    $output .= '  <div class="description">'. $item->description ."</div>\n";
   }
   if ($item->ftitle && $item->fid) {
     $output .= '  <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n";
author	Steven Wittens <steven@10.no-reply.drupal.org>	2005-04-07 22:46:55 +0000
committer	Steven Wittens <steven@10.no-reply.drupal.org>	2005-04-07 22:46:55 +0000
commit	0f41706acc230a03e241b302d46b552e801770d0 (patch)
tree	330ccc004aed92b4f73af2fafc2f83f998f5c01e
parent	7d3d5532dfb93d1ed9e04b64e2576312591f1f28 (diff)
download	brdo-0f41706acc230a03e241b302d46b552e801770d0.tar.gz brdo-0f41706acc230a03e241b302d46b552e801770d0.tar.bz2