diff options
author | Dries Buytaert <dries@buytaert.net> | 2009-01-02 22:09:53 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2009-01-02 22:09:53 +0000 |
commit | 45f34467fbc5c398adccc9555a15dbec7353accf (patch) | |
tree | 397285ff394e82ed369b6cf85574ad260e871077 /includes/unicode.inc | |
parent | 86aa636c8b5b2a3ef9437352af28cb8ed5f04523 (diff) | |
download | brdo-45f34467fbc5c398adccc9555a15dbec7353accf.tar.gz brdo-45f34467fbc5c398adccc9555a15dbec7353accf.tar.bz2 |
- Patch #212130 by Damien Tournoud, grendzy: decode_entities() should support all (X)HTML entities.
Diffstat (limited to 'includes/unicode.inc')
-rw-r--r-- | includes/unicode.inc | 29 |
1 files changed, 13 insertions, 16 deletions
diff --git a/includes/unicode.inc b/includes/unicode.inc index d9b81f062..f25f472d2 100644 --- a/includes/unicode.inc +++ b/includes/unicode.inc @@ -323,33 +323,30 @@ function _mime_header_decode($matches) { * array('<', '&', '"'). This affects both named and numerical entities. */ function decode_entities($text, $exclude = array()) { - static $table; - // We store named entities in a table for quick processing. - if (!isset($table)) { - // Get all named HTML entities. - $table = array_flip(get_html_translation_table(HTML_ENTITIES)); - // PHP gives us ISO-8859-1 data, we need UTF-8. - $table = array_map('utf8_encode', $table); - // Add apostrophe (XML) - $table['''] = "'"; - } - $newtable = array_diff($table, $exclude); + static $html_entities; + if (!isset($html_entities)) { + include DRUPAL_ROOT . '/includes/unicode.entities.inc'; + } + + // Flip the exclude list so that we can do quick lookups later. + $exclude = array_flip($exclude); // Use a regexp to select all entities in one pass, to avoid decoding // double-escaped entities twice. The PREG_REPLACE_EVAL modifier 'e' is // being used to allow for a callback (see // http://php.net/manual/en/reference.pcre.pattern.modifiers). - return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text); + return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $html_entities, $exclude)', $text); } /** * Helper function for decode_entities */ -function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) { +function _decode_entities($prefix, $codepoint, $original, &$html_entities, &$exclude) { // Named entity if (!$prefix) { - if (isset($table[$original])) { - return $table[$original]; + // A named entity not in the exclude list. + if (isset($html_entities[$original]) && !isset($exclude[$html_entities[$original]])) { + return $html_entities[$original]; } else { return $original; @@ -383,7 +380,7 @@ function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) { . chr(0x80 | ( $codepoint & 0x3F)); } // Check for excluded characters - if (in_array($str, $exclude)) { + if (isset($exclude[$str])) { return $original; } else { |