summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/filter/filter.module282
1 files changed, 0 insertions, 282 deletions
diff --git a/modules/filter/filter.module b/modules/filter/filter.module
index 2cef2ad5e..d3a452b7c 100644
--- a/modules/filter/filter.module
+++ b/modules/filter/filter.module
@@ -912,287 +912,5 @@ function _filter_autop($text) {
}
/**
- * Very permissive XSS/HTML filter for admin-only use.
- *
- * Use only for fields where it is impractical to use the
- * whole filter system, but where some (mainly inline) mark-up
- * is desired (so check_plain() is not acceptable).
- *
- * Allows all tags that can be used inside an HTML body, save
- * for scripts and styles.
- */
-function filter_xss_admin($string) {
- return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
-}
-
-/**
- * Filters XSS. Based on kses by Ulf Harnhammar, see
- * http://sourceforge.net/projects/kses
- *
- * For examples of various XSS attacks, see:
- * http://ha.ckers.org/xss.html
- *
- * This code does four things:
- * - Removes characters and constructs that can trick browsers
- * - Makes sure all HTML entities are well-formed
- * - Makes sure all HTML tags and attributes are well-formed
- * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
- *
- * @param $string
- * The string with raw HTML in it. It will be stripped of everything that can cause
- * an XSS attack.
- * @param $allowed_tags
- * An array of allowed tags.
- */
-function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
- // Only operate on valid UTF-8 strings. This is necessary to prevent cross
- // site scripting issues on Internet Explorer 6.
- if (!drupal_validate_utf8($string)) {
- return '';
- }
- // Store the text format
- _filter_xss_split($allowed_tags, TRUE);
- // Remove NULL characters (ignored by some browsers)
- $string = str_replace(chr(0), '', $string);
- // Remove Netscape 4 JS entities
- $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
-
- // Defuse all HTML entities
- $string = str_replace('&', '&', $string);
- // Change back only well-formed entities in our whitelist
- // Named entities
- $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
- // Decimal numeric entities
- $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
- // Hexadecimal numeric entities
- $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
-
- return preg_replace_callback('%
- (
- <(?=[^a-zA-Z!/]) # a lone <
- | # or
- <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
- | # or
- > # just a >
- )%x', '_filter_xss_split', $string);
-}
-
-/**
- * Processes an HTML tag.
- *
- * @param $m
- * An array with various meaning depending on the value of $store.
- * If $store is TRUE then the array contains the allowed tags.
- * If $store is FALSE then the array has one element, the HTML tag to process.
- * @param $store
- * Whether to store $m.
- * @return
- * If the element isn't allowed, an empty string. Otherwise, the cleaned up
- * version of the HTML element.
- */
-function _filter_xss_split($m, $store = FALSE) {
- static $allowed_html;
-
- if ($store) {
- $allowed_html = array_flip($m);
- return;
- }
-
- $string = $m[1];
-
- if (substr($string, 0, 1) != '<') {
- // We matched a lone ">" character
- return '&gt;';
- }
- elseif (strlen($string) == 1) {
- // We matched a lone "<" character
- return '&lt;';
- }
-
- if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
- // Seriously malformed
- return '';
- }
-
- $slash = trim($matches[1]);
- $elem = &$matches[2];
- $attrlist = &$matches[3];
-
- if (!isset($allowed_html[strtolower($elem)])) {
- // Disallowed HTML element
- return '';
- }
-
- if ($slash != '') {
- return "</$elem>";
- }
-
- // Is there a closing XHTML slash at the end of the attributes?
- $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
- $xhtml_slash = $count ? ' /' : '';
-
- // Clean up attributes
- $attr2 = implode(' ', _filter_xss_attributes($attrlist));
- $attr2 = preg_replace('/[<>]/', '', $attr2);
- $attr2 = strlen($attr2) ? ' ' . $attr2 : '';
-
- return "<$elem$attr2$xhtml_slash>";
-}
-
-/**
- * Processes a string of HTML attributes.
- *
- * @return
- * Cleaned up version of the HTML attributes.
- */
-function _filter_xss_attributes($attr) {
- $attrarr = array();
- $mode = 0;
- $attrname = '';
-
- while (strlen($attr) != 0) {
- // Was the last operation successful?
- $working = 0;
-
- switch ($mode) {
- case 0:
- // Attribute name, href for instance
- if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
- $attrname = strtolower($match[1]);
- $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
- $working = $mode = 1;
- $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
- }
-
- break;
-
- case 1:
- // Equals sign or valueless ("selected")
- if (preg_match('/^\s*=\s*/', $attr)) {
- $working = 1; $mode = 2;
- $attr = preg_replace('/^\s*=\s*/', '', $attr);
- break;
- }
-
- if (preg_match('/^\s+/', $attr)) {
- $working = 1; $mode = 0;
- if (!$skip) {
- $attrarr[] = $attrname;
- }
- $attr = preg_replace('/^\s+/', '', $attr);
- }
-
- break;
-
- case 2:
- // Attribute value, a URL after href= for instance
- if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
- $thisval = filter_xss_bad_protocol($match[1]);
-
- if (!$skip) {
- $attrarr[] = "$attrname=\"$thisval\"";
- }
- $working = 1;
- $mode = 0;
- $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
- break;
- }
-
- if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
- $thisval = filter_xss_bad_protocol($match[1]);
-
- if (!$skip) {
- $attrarr[] = "$attrname='$thisval'";
- }
- $working = 1; $mode = 0;
- $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
- break;
- }
-
- if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
- $thisval = filter_xss_bad_protocol($match[1]);
-
- if (!$skip) {
- $attrarr[] = "$attrname=\"$thisval\"";
- }
- $working = 1; $mode = 0;
- $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
- }
-
- break;
- }
-
- if ($working == 0) {
- // not well formed, remove and try again
- $attr = preg_replace('/
- ^
- (
- "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
- | # or
- \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
- | # or
- \S # - a non-whitespace character
- )* # any number of the above three
- \s* # any number of whitespaces
- /x', '', $attr);
- $mode = 0;
- }
- }
-
- // the attribute list ends with a valueless attribute like "selected"
- if ($mode == 1) {
- $attrarr[] = $attrname;
- }
- return $attrarr;
-}
-
-/**
- * Processes an HTML attribute value and ensures it does not contain an URL
- * with a disallowed protocol (e.g. javascript:)
- *
- * @param $string
- * The string with the attribute value.
- * @param $decode
- * Whether to decode entities in the $string. Set to FALSE if the $string
- * is in plain text, TRUE otherwise. Defaults to TRUE.
- * @return
- * Cleaned up and HTML-escaped version of $string.
- */
-function filter_xss_bad_protocol($string, $decode = TRUE) {
- static $allowed_protocols;
- if (!isset($allowed_protocols)) {
- $allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('ftp', 'http', 'https', 'irc', 'mailto', 'news', 'nntp', 'rtsp', 'sftp', 'ssh', 'telnet', 'webcal')));
- }
-
- // Get the plain text representation of the attribute value (i.e. its meaning).
- if ($decode) {
- $string = decode_entities($string);
- }
-
- // Iteratively remove any invalid protocol found.
-
- do {
- $before = $string;
- $colonpos = strpos($string, ':');
- if ($colonpos > 0) {
- // We found a colon, possibly a protocol. Verify.
- $protocol = substr($string, 0, $colonpos);
- // If a colon is preceded by a slash, question mark or hash, it cannot
- // possibly be part of the URL scheme. This must be a relative URL,
- // which inherits the (safe) protocol of the base document.
- if (preg_match('![/?#]!', $protocol)) {
- break;
- }
- // Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must be case-insensitive
- // Check if this is a disallowed protocol.
- if (!isset($allowed_protocols[strtolower($protocol)])) {
- $string = substr($string, $colonpos + 1);
- }
- }
- } while ($before != $string);
- return check_plain($string);
-}
-
-/**
* @} End of "Standard filters".
*/