summaryrefslogtreecommitdiff
path: root/includes/bootstrap.inc
diff options
context:
space:
mode:
Diffstat (limited to 'includes/bootstrap.inc')
-rw-r--r--includes/bootstrap.inc13
1 files changed, 5 insertions, 8 deletions
diff --git a/includes/bootstrap.inc b/includes/bootstrap.inc
index da33a88f2..3a661aca4 100644
--- a/includes/bootstrap.inc
+++ b/includes/bootstrap.inc
@@ -815,14 +815,8 @@ function check_plain($text) {
* is outside of a tag, and thus deemed safe by a filter, can be interpreted
* by the browser as if it were inside the tag.
*
- * This function exploits preg_match behaviour (since PHP 4.3.5) when used
- * with the u modifier, as a fast way to find invalid UTF-8. When the matched
- * string contains an invalid byte sequence, it will fail silently.
- *
- * preg_match may not fail on 4 and 5 octet sequences, even though they
- * are not supported by the specification.
- *
- * The specific preg_match behaviour is present since PHP 4.3.5.
+ * The function does not return FALSE for strings containing character codes
+ * above U+10FFFF, even though these are prohibited by RFC 3629.
*
* @param $text
* The text to check.
@@ -833,6 +827,9 @@ function drupal_validate_utf8($text) {
if (strlen($text) == 0) {
return TRUE;
}
+ // With the PCRE_UTF8 modifier 'u', preg_match() fails silently on strings
+ // containing invalid UTF-8 byte sequences. It does not reject character
+ // codes above U+10FFFF (represented by 4 or more octets), though.
return (preg_match('/^./us', $text) == 1);
}