summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/common.inc33
1 files changed, 24 insertions, 9 deletions
diff --git a/includes/common.inc b/includes/common.inc
index 097ab84c2..4a029dcc3 100644
--- a/includes/common.inc
+++ b/includes/common.inc
@@ -587,6 +587,9 @@ function valid_input_data($data) {
else {
// Detect dangerous input data.
+ // Decode all normal character entities.
+ $data = decode_entities($data, array('<', '&', '"'));
+
// Check strings:
$match = preg_match('/\Wjavascript\s*:/i', $data);
$match += preg_match('/\Wexpression\s*\(/i', $data);
@@ -1675,47 +1678,59 @@ function mime_header_encode($string, $charset = 'UTF-8') {
/**
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
+ *
+ * @param $text
+ * The text to decode entities in.
+ * @param $exclude
+ * An array of characters which should not be decoded. For example,
+ * array('<', '&', '"'). This affects both named and numerical entities.
*/
-function decode_entities($text) {
+function decode_entities($text, $exclude = array()) {
static $table;
// We store named entities in a table for quick processing.
if (!isset($table)) {
// Get all named HTML entities.
$table = array_flip(get_html_translation_table(HTML_ENTITIES, $special));
- // PHP gives us Windows-1252/ISO-8859-1 data, we need UTF-8.
+ // PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map('utf8_encode', $table);
}
- $text = strtr($text, $table);
+ $text = strtr($text, array_diff($table, $exclude));
// Any remaining entities are numerical. Use a regexp to replace them.
- return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2")', $text);
+ return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text);
}
/**
* Helper function for decode_entities
*/
-function _decode_entities($hex, $codepoint) {
+function _decode_entities($hex, $codepoint, $original, $exclude) {
if ($hex != '') {
$codepoint = base_convert($codepoint, 16, 10);
}
if ($codepoint < 0x80) {
- return chr($codepoint);
+ $str = chr($codepoint);
}
else if ($codepoint < 0x800) {
- return chr(0xC0 | ($codepoint >> 6))
+ $str = chr(0xC0 | ($codepoint >> 6))
. chr(0x80 | ($codepoint & 0x3F));
}
else if ($codepoint < 0x10000) {
- return chr(0xE0 | ( $codepoint >> 12))
+ $str = chr(0xE0 | ( $codepoint >> 12))
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
else if ($codepoint < 0x200000) {
- return chr(0xF0 | ( $codepoint >> 18))
+ $str = chr(0xF0 | ( $codepoint >> 18))
. chr(0x80 | (($codepoint >> 12) & 0x3F))
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
+ if (in_array($str, $exclude)) {
+ return $original;
+ }
+ else {
+ return $str;
+ }
}
/**