summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/common.inc14
1 files changed, 12 insertions, 2 deletions
diff --git a/includes/common.inc b/includes/common.inc
index bae9c2554..0fc380eef 100644
--- a/includes/common.inc
+++ b/includes/common.inc
@@ -1710,7 +1710,7 @@ function drupal_map_assoc($array, $function = NULL) {
* be used instead of xml_parser_create(), because PHP's XML parser doesn't check
* the input encoding itself.
*
- * This is also where unsupported encodings should be converted.
+ * This is also where unsupported encodings will be converted.
* Callers should take this into account: $data might have been changed after
* the call.
*
@@ -1720,8 +1720,18 @@ function drupal_map_assoc($array, $function = NULL) {
* An XML parser object.
*/
function drupal_xml_parser_create(&$data) {
+ // Default XML encoding is UTF-8
$encoding = 'utf-8';
- if (ereg('^<\?xml[^>]+encoding="([^"]+)"', $data, $match)) {
+ $bom = false;
+
+ // Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it).
+ if (!strncmp($data, "\xEF\xBB\xBF", 3)) {
+ $bom = true;
+ $data = substr($data, 3);
+ }
+
+ // Check for an encoding declaration in the XML prolog if no BOM was found.
+ if (!$bom && ereg('^<\?xml[^>]+encoding="([^"]+)"', $data, $match)) {
$encoding = $match[1];
}