summaryrefslogtreecommitdiff
path: root/includes/unicode.inc
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2009-01-02 22:09:53 +0000
committerDries Buytaert <dries@buytaert.net>2009-01-02 22:09:53 +0000
commit45f34467fbc5c398adccc9555a15dbec7353accf (patch)
tree397285ff394e82ed369b6cf85574ad260e871077 /includes/unicode.inc
parent86aa636c8b5b2a3ef9437352af28cb8ed5f04523 (diff)
downloadbrdo-45f34467fbc5c398adccc9555a15dbec7353accf.tar.gz
brdo-45f34467fbc5c398adccc9555a15dbec7353accf.tar.bz2
- Patch #212130 by Damien Tournoud, grendzy: decode_entities() should support all (X)HTML entities.
Diffstat (limited to 'includes/unicode.inc')
-rw-r--r--includes/unicode.inc29
1 files changed, 13 insertions, 16 deletions
diff --git a/includes/unicode.inc b/includes/unicode.inc
index d9b81f062..f25f472d2 100644
--- a/includes/unicode.inc
+++ b/includes/unicode.inc
@@ -323,33 +323,30 @@ function _mime_header_decode($matches) {
* array('<', '&', '"'). This affects both named and numerical entities.
*/
function decode_entities($text, $exclude = array()) {
- static $table;
- // We store named entities in a table for quick processing.
- if (!isset($table)) {
- // Get all named HTML entities.
- $table = array_flip(get_html_translation_table(HTML_ENTITIES));
- // PHP gives us ISO-8859-1 data, we need UTF-8.
- $table = array_map('utf8_encode', $table);
- // Add apostrophe (XML)
- $table['&apos;'] = "'";
- }
- $newtable = array_diff($table, $exclude);
+ static $html_entities;
+ if (!isset($html_entities)) {
+ include DRUPAL_ROOT . '/includes/unicode.entities.inc';
+ }
+
+ // Flip the exclude list so that we can do quick lookups later.
+ $exclude = array_flip($exclude);
// Use a regexp to select all entities in one pass, to avoid decoding
// double-escaped entities twice. The PREG_REPLACE_EVAL modifier 'e' is
// being used to allow for a callback (see
// http://php.net/manual/en/reference.pcre.pattern.modifiers).
- return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text);
+ return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $html_entities, $exclude)', $text);
}
/**
* Helper function for decode_entities
*/
-function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
+function _decode_entities($prefix, $codepoint, $original, &$html_entities, &$exclude) {
// Named entity
if (!$prefix) {
- if (isset($table[$original])) {
- return $table[$original];
+ // A named entity not in the exclude list.
+ if (isset($html_entities[$original]) && !isset($exclude[$html_entities[$original]])) {
+ return $html_entities[$original];
}
else {
return $original;
@@ -383,7 +380,7 @@ function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
. chr(0x80 | ( $codepoint & 0x3F));
}
// Check for excluded characters
- if (in_array($str, $exclude)) {
+ if (isset($exclude[$str])) {
return $original;
}
else {