From 45f34467fbc5c398adccc9555a15dbec7353accf Mon Sep 17 00:00:00 2001 From: Dries Buytaert Date: Fri, 2 Jan 2009 22:09:53 +0000 Subject: - Patch #212130 by Damien Tournoud, grendzy: decode_entities() should support all (X)HTML entities. --- includes/unicode.inc | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'includes/unicode.inc') diff --git a/includes/unicode.inc b/includes/unicode.inc index d9b81f062..f25f472d2 100644 --- a/includes/unicode.inc +++ b/includes/unicode.inc @@ -323,33 +323,30 @@ function _mime_header_decode($matches) { * array('<', '&', '"'). This affects both named and numerical entities. */ function decode_entities($text, $exclude = array()) { - static $table; - // We store named entities in a table for quick processing. - if (!isset($table)) { - // Get all named HTML entities. - $table = array_flip(get_html_translation_table(HTML_ENTITIES)); - // PHP gives us ISO-8859-1 data, we need UTF-8. - $table = array_map('utf8_encode', $table); - // Add apostrophe (XML) - $table['''] = "'"; - } - $newtable = array_diff($table, $exclude); + static $html_entities; + if (!isset($html_entities)) { + include DRUPAL_ROOT . '/includes/unicode.entities.inc'; + } + + // Flip the exclude list so that we can do quick lookups later. + $exclude = array_flip($exclude); // Use a regexp to select all entities in one pass, to avoid decoding // double-escaped entities twice. The PREG_REPLACE_EVAL modifier 'e' is // being used to allow for a callback (see // http://php.net/manual/en/reference.pcre.pattern.modifiers). - return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text); + return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $html_entities, $exclude)', $text); } /** * Helper function for decode_entities */ -function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) { +function _decode_entities($prefix, $codepoint, $original, &$html_entities, &$exclude) { // Named entity if (!$prefix) { - if (isset($table[$original])) { - return $table[$original]; + // A named entity not in the exclude list. + if (isset($html_entities[$original]) && !isset($exclude[$html_entities[$original]])) { + return $html_entities[$original]; } else { return $original; @@ -383,7 +380,7 @@ function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) { . chr(0x80 | ( $codepoint & 0x3F)); } // Check for excluded characters - if (in_array($str, $exclude)) { + if (isset($exclude[$str])) { return $original; } else { -- cgit v1.2.3