- Various fixes. Updated CHANGELOG.txt

author: Dries Buytaert <dries@buytaert.net> 2005-11-29 20:17:10 +0000
committer: Dries Buytaert <dries@buytaert.net> 2005-11-29 20:17:10 +0000
commit: 2d0df351d704ba34d191831f7d4b8cb385555be2 (patch)
tree: b18d02dfa6d075ff540eeffc4ab38393e53a3da0
parent: 236da7af059e6cf3aba6e8ec923e082b1f8fe143 (diff)
download: brdo-2d0df351d704ba34d191831f7d4b8cb385555be2.tar.gz
brdo-2d0df351d704ba34d191831f7d4b8cb385555be2.tar.bz2
16 files changed, 683 insertions, 288 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 1a68edc4f..91521595d 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -52,6 +52,10 @@ Drupal x.x.x, xxxx-xx-xx (development version)
 - PostgreSQL support:
     * removed dependency on PL/pgSQL procedural language
 
+Drupal 4.6.4, 2005-11-29
+------------------------
+- fixed bugs, including 3 security vulnerabilities.
+
 Drupal 4.6.3, 2005-08-15
 ------------------------
 - fixed bugs, including a critical "arbitrary PHP code execution" bug.
@@ -108,6 +112,10 @@ Drupal 4.6.0, 2005-04-15
 - documentation:
     * improved and extended PHPDoc/Doxygen comments.
 
+Drupal 4.5.6, 2005-11-29
+------------------------
+- fixed bugs, including 3 security vulnerabilities.
+
 Drupal 4.5.5, 2005-08-15
 ------------------------
 - fixed bugs, including a critical "arbitrary PHP code execution" bug.
diff --git a/includes/bootstrap.inc b/includes/bootstrap.inc
index 6c87605ac..9d3e44cd0 100644
--- a/includes/bootstrap.inc
+++ b/includes/bootstrap.inc
@@ -708,14 +708,12 @@ function arg($index) {
 }
 
 /**
- * Prepare a URL for use in an HTML attribute.
+ * Prepare a URL for use in an HTML attribute. Strips harmful protocols.
  *
- * We replace ( and ) with their url-encoded equivalents to prevent XSS attacks.
  */
 function check_url($uri) {
   $uri = htmlspecialchars($uri, ENT_QUOTES);
-
-  $uri = strtr($uri, array('(' => '%28', ')' => '%29'));
+  $uri = filter_xss_bad_protocol($uri, FALSE);
 
   return $uri;
 }
diff --git a/includes/common.inc b/includes/common.inc
index 84c955eed..08545ce0e 100644
--- a/includes/common.inc
+++ b/includes/common.inc
@@ -616,13 +616,6 @@ function t($string, $args = 0) {
 }
 
 /**
- * Encode special characters in a plain-text string for display as HTML.
- */
-function check_plain($text) {
-  return htmlspecialchars($text, ENT_QUOTES);
-}
-
-/**
  * @defgroup validation Input validation
  * @{
  * Functions to validate user input.
@@ -668,54 +661,6 @@ function valid_url($url, $absolute = FALSE) {
 }
 
 /**
- * Validate data input by a user.
- *
- * Ensures that user data cannot be used to perform attacks on the site.
- *
- * @param $data
- *   The input to check.
- * @return
- *   TRUE if the input data is acceptable.
- */
-function valid_input_data($data) {
-  if (is_array($data) || is_object($data)) {
-    // Form data can contain a number of nested arrays.
-    foreach ($data as $key => $value) {
-      if (!valid_input_data($key) || !valid_input_data($value)) {
-        return FALSE;
-      }
-    }
-  }
-  else if (isset($data)) {
-    // Detect dangerous input data.
-
-    // Decode all normal character entities.
-    $data = decode_entities($data, array('<', '&', '"'));
-
-    // Check strings:
-    $match  = preg_match('/\Wjavascript\s*:/i', $data);
-    $match += preg_match('/\Wexpression\s*\(/i', $data);
-    $match += preg_match('/\Walert\s*\(/i', $data);
-
-    // Check attributes:
-    $match += preg_match("/\W(dynsrc|datasrc|data|lowsrc|on[a-z]+)\s*=[^>]+?>/i", $data);
-
-    // Check tags:
-    $match += preg_match("/<\s*(applet|script|object|style|embed|form|blink|meta|html|frame|iframe|layer|ilayer|head|frameset|xml)/i", $data);
-
-    if ($match) {
-      watchdog('security', t('Terminated request because of suspicious input data: %data.', array('%data' => theme('placeholder', $data))));
-      return FALSE;
-    }
-  }
-
-  return TRUE;
-}
-/**
- * @} End of "defgroup validation".
- */
-
-/**
  * Register an event for the current visitor (hostname/IP) to the flood control mechanism.
  *
  * @param $name
@@ -1366,17 +1311,7 @@ function _drupal_bootstrap_full() {
   }
   // Initialize all enabled modules.
   module_init();
-  if (!user_access('bypass input data check')) {
-    // We can't use $_REQUEST because it consists of the contents of $_POST,
-    // $_GET and $_COOKIE: if any of the input arrays share a key, only one
-    // value will be verified.
-    if (!valid_input_data($_GET)
-    || !valid_input_data($_POST)
-    || !valid_input_data($_COOKIE)
-    || !valid_input_data($_FILES)) {
-      die('Terminated request because of suspicious input data.');
-    }
-  }
+  // Undo magic quotes
   fix_gpc_magic();
   // Initialize the localization system.
   $locale = locale_initialize();
diff --git a/includes/database.mysql.inc b/includes/database.mysql.inc
index ed02fa886..3fce79940 100644
--- a/includes/database.mysql.inc
+++ b/includes/database.mysql.inc
@@ -105,7 +105,7 @@ function _db_query($query, $debug = 0) {
     return $result;
   }
   else {
-    trigger_error(mysql_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+    trigger_error(check_plain(mysql_error() ."\nquery: ". $query), E_USER_ERROR);
     return FALSE;
   }
 }
diff --git a/includes/database.mysqli.inc b/includes/database.mysqli.inc
index f2583b214..3eaea5854 100644
--- a/includes/database.mysqli.inc
+++ b/includes/database.mysqli.inc
@@ -113,7 +113,7 @@ function _db_query($query, $debug = 0) {
     return $result;
   }
   else {
-    trigger_error(mysqli_error($active_db) ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+    trigger_error(check_plain(mysqli_error($active_db) ."\nquery: ". $query), E_USER_ERROR);
     return FALSE;
   }
 }
diff --git a/includes/database.pgsql.inc b/includes/database.pgsql.inc
index 415565a47..3c2f8f710 100644
--- a/includes/database.pgsql.inc
+++ b/includes/database.pgsql.inc
@@ -92,7 +92,7 @@ function _db_query($query, $debug = 0) {
     return $last_result;
   }
   else {
-    trigger_error(pg_last_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+    trigger_error(check_plain(pg_last_error() ."\nquery: ". $query), E_USER_ERROR);
     return FALSE;
   }
 }
diff --git a/includes/file.inc b/includes/file.inc
index 41f9a78f1..55f7825d5 100644
--- a/includes/file.inc
+++ b/includes/file.inc
@@ -144,8 +144,24 @@ function file_check_upload($source) {
   elseif ($_FILES["edit"]["name"][$source] && is_uploaded_file($_FILES["edit"]["tmp_name"][$source])) {
     $file = new StdClass();
     $file->filename = trim(basename($_FILES["edit"]["name"][$source]), '.');
-    $file->filemime = $_FILES["edit"]["type"][$source];
     $file->filepath = $_FILES["edit"]["tmp_name"][$source];
+
+    if (function_exists('mime_content_type')) {
+      $file->filemime = mime_content_type($file->filepath);
+      if ($file->filemime != $_FILES["edit"]["type"][$source]) {
+        watchdog('file', t('For %file the system thinks its MIME type is %detected while the user has given %given for MIME type', array('%file' => theme('placeholder', $file->filepath), '%detected' => theme('placeholder', $file>-filemime), '%given' => theme('placeholder', $_FILES['edit']['type'][$source]))));
+      }
+    }
+    else {
+      $file->filemime = $_FILES["edit"]["type"][$source];
+    }
+    if (((substr($file->filemime, 0, 5) == 'text/' || strpos($file->filemime, 'javascript')) && (substr($file->filepath, -4) != '.txt')) || preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) {
+      $file->filemime = 'text/plain';
+      rename($file->filepath, $file->filepath .'.txt');
+      $file->filepath .= '.txt';
+      $file->filename .= '.txt';
+    }
+
     $file->error = $_FILES["edit"]["error"][$source];
     $file->filesize = $_FILES["edit"]["size"][$source];
     $file->source = $source;
diff --git a/includes/unicode.inc b/includes/unicode.inc
index 3508aad64..0c562a8c1 100644
--- a/includes/unicode.inc
+++ b/includes/unicode.inc
@@ -303,6 +303,10 @@ function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
   if ($prefix == '#x') {
     $codepoint = base_convert($codepoint, 16, 10);
   }
+  // Decimal numerical entity (strip leading zeros to avoid PHP octal notation)
+  else {
+    $codepoint = preg_replace('/^0+/', '', $codepoint);
+  }
   // Encode codepoint as UTF-8 bytes
   if ($codepoint < 0x80) {
     $str = chr($codepoint);
diff --git a/modules/filter.module b/modules/filter.module
index 41e6ad399..2ee9f1743 100644
--- a/modules/filter.module
+++ b/modules/filter.module
@@ -14,9 +14,6 @@ define('FILTER_FORMAT_DEFAULT', 0);
 define('FILTER_HTML_STRIP', 1);
 define('FILTER_HTML_ESCAPE', 2);
 
-define('FILTER_STYLE_ALLOW', 0);
-define('FILTER_STYLE_STRIP', 1);
-
 /**
  * Implementation of hook_help().
  */
@@ -65,105 +62,100 @@ function filter_filter_tips($delta, $format, $long = false) {
   global $base_url;
   switch ($delta) {
     case 0:
-      switch (variable_get("filter_html_$format", FILTER_HTML_STRIP)) {
-
-        case FILTER_HTML_STRIP:
-          if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
-            switch ($long) {
-              case 0:
-                return t('Allowed HTML tags') .': '. check_plain($allowed_html);
-              case 1:
-                $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
-                if (!variable_get("filter_html_help_$format", 1)) {
-                  return $output;
-                }
+      if (variable_get("filter_html_$format", FILTER_HTML_STRIP) ==  FILTER_HTML_STRIP) {
+        if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
+          switch ($long) {
+            case 0:
+              return t('Allowed HTML tags') .': '. check_plain($allowed_html);
+            case 1:
+              $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
+              if (!variable_get("filter_html_help_$format", 1)) {
+                return $output;
+              }
 
-                $output .= t('
+              $output .= t('
 <p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
 <p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
-                $tips = array(
-                  'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
-                  'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
-                  'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
-                  'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
-                  'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
-                  'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
-                  'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
-                  'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
-                  'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
-                  'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
-                  'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
-                  'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
-                  'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
-                  'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
-                  'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
-                  // Assumes and describes tr, td, th.
-                  'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
-                  'tr' => NULL, 'td' => NULL, 'th' => NULL,
-                  'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
-                  'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
-                   // Assumes and describes li.
-                  'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
-                  'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
-                  'li' => NULL,
-                  // Assumes and describes dt and dd.
-                  'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
-                  'dt' => NULL, 'dd' => NULL,
-                  'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
-                  'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
-                  'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
-                  'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
-                  'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
-                  'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
-                );
-                $header = array(t('Tag Description'), t('You Type'), t('You Get'));
-                preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
-                foreach ($out[1] as $tag) {
-                  if (array_key_exists($tag, $tips)) {
-                    if ($tips[$tag]) {
-                      $rows[] = array(
-                        array('data' => $tips[$tag][0], 'class' => 'description'),
-                        array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
-                        array('data' => $tips[$tag][1], 'class' => 'get')
-                      );
-                    }
-                  }
-                  else {
+              $tips = array(
+                'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
+                'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
+                'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
+                'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
+                'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
+                'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
+                'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
+                'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
+                'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
+                'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
+                'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
+                'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
+                'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
+                'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
+                'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
+                // Assumes and describes tr, td, th.
+                'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
+                'tr' => NULL, 'td' => NULL, 'th' => NULL,
+                'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
+                'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
+                 // Assumes and describes li.
+                'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
+                'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
+                'li' => NULL,
+                // Assumes and describes dt and dd.
+                'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
+                'dt' => NULL, 'dd' => NULL,
+                'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
+                'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
+                'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
+                'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
+                'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
+                'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
+              );
+              $header = array(t('Tag Description'), t('You Type'), t('You Get'));
+              preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
+              foreach ($out[1] as $tag) {
+                if (array_key_exists($tag, $tips)) {
+                  if ($tips[$tag]) {
                     $rows[] = array(
-                      array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+                      array('data' => $tips[$tag][0], 'class' => 'description'),
+                      array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
+                      array('data' => $tips[$tag][1], 'class' => 'get')
                     );
                   }
                 }
-                $output .= theme('table', $header, $rows);
+                else {
+                  $rows[] = array(
+                    array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+                  );
+                }
+              }
+              $output .= theme('table', $header, $rows);
 
-                $output .= t('
+              $output .= t('
 <p>Most unusual characters can be directly entered without any problems.</p>
 <p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
-                $entities = array(
-                  array( t('Ampersand'), '&amp;'),
-                  array( t('Greater than'), '&gt;'),
-                  array( t('Less than'), '&lt;'),
-                  array( t('Quotation mark'), '&quot;'),
+              $entities = array(
+                array( t('Ampersand'), '&amp;'),
+                array( t('Greater than'), '&gt;'),
+                array( t('Less than'), '&lt;'),
+                array( t('Quotation mark'), '&quot;'),
+              );
+              $header = array(t('Character Description'), t('You Type'), t('You Get'));
+              unset($rows);
+              foreach ($entities as $entity) {
+                $rows[] = array(
+                  array('data' => $entity[0], 'class' => 'description'),
+                  array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
+                  array('data' => $entity[1], 'class' => 'get')
                 );
-                $header = array(t('Character Description'), t('You Type'), t('You Get'));
-                unset($rows);
-                foreach ($entities as $entity) {
-                  $rows[] = array(
-                    array('data' => $entity[0], 'class' => 'description'),
-                    array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
-                    array('data' => $entity[1], 'class' => 'get')
-                  );
-                }
-                $output .= theme('table', $header, $rows);
-                return $output;
-            }
-          }
-          else {
-            return t('No HTML tags allowed');
+              }
+              $output .= theme('table', $header, $rows);
+              return $output;
           }
-
-        case FILTER_STYLE_STRIP:
+        }
+        else {
           return t('No HTML tags allowed');
+        }
       }
       break;
 
@@ -975,10 +967,9 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
  */
 function _filter_html_settings($format) {
   $form['filter_html'] = array('#type' => 'fieldset', '#title' => t('HTML filter'), '#collapsible' => TRUE, '#collapsed' => TRUE);
-  $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
+  $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
   $form['filter_html']["allowed_html_$format"] = array('#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), '#size' => 64, '#maxlength' => 255, '#description' => t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.'));
   $form['filter_html']["filter_html_help_$format"] = array('#type' => 'checkbox', '#title' => t('Display HTML help'), '#default_value' => variable_get("filter_html_help_$format", 1), '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'));
-  $form['filter_html']["filter_style_$format"] = array('#type' => 'radios', '#title' => t('HTML style attributes'), '#default_value' => variable_get("filter_style_$format", FILTER_STYLE_STRIP), '#options' => array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), '#description' => t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.'));
   $form['filter_html']["filter_html_nofollow_$format"] = array('#type' => 'checkbox', '#title' => t('Spam link deterrent'), '#default_value' => variable_get("filter_html_nofollow_$format", FALSE), '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'));
 
   return $form;
@@ -989,12 +980,7 @@ function _filter_html_settings($format) {
  */
 function _filter_html($text, $format) {
   if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
-    // Allow users to enter HTML, but filter it
-    $text = strip_tags($text, variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'));
-    if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) {
-      $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text);
-    }
-    $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text);
+    $text = filter_xss($text, $format);
   }
 
   if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
@@ -1065,6 +1051,245 @@ function _filter_autop($text) {
 }
 
 /**
+ * Filters XSS. Based on kses by Ulf Harnhammar, see
+ * http://sourceforge.net/projects/kses
+ *
+ * For examples of various XSS attacks, see:
+ * http://ha.ckers.org/xss.html
+ *
+ * This code does four things:
+ * - Removes characters and constructs that can trick browsers
+ * - Makes sure all HTML entities are well-formed
+ * - Makes sure all HTML tags and attributes are well-formed
+ * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ *   The string with raw HTML in it. It will be stripped of everything that can cause
+ *   an XSS attack.
+ * @param $format
+ *   The format to use.
+ */
+function filter_xss($string, $format) {
+  // Store the input format
+  _filter_xss_split($format);
+  // Remove NUL characters (ignored by some browsers)
+  $string = str_replace(chr(0), '', $string);
+  // Remove Netscape 4 JS entities
+  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+  // Defuse all HTML entities
+  $string = str_replace('&', '&amp;', $string);
+  // Change back only well-formed entities in our whitelist
+  // Named entities
+  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+  // Decimal numeric entities
+  $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
+  // Hexadecimal numeric entities
+  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
+
+  return preg_replace_callback('%
+    (
+    <[^>]*.(>|$)  # a string that starts with a <, up until the > or the end of the string
+    |             # or
+    >             # just a >
+    )%x
+    ', '_filter_xss_split', $string);
+}
+
+/**
+ * Processes an HTML tag.
+ *
+ * @param
+ *   On first call, a format identifier. On subsequent calls, an array where the
+ *   first element is the HTML tag to process.
+ * @return
+ *   If the element isn't allowed, an empty string. Otherwise, the cleaned up version
+ *   of the HTML element.
+ */
+function _filter_xss_split($m) {
+  static $allowed_html;
+
+  if (!isset($allowed_html)) {
+    $allowed_html = array_flip(preg_split('/\s+|<|>/', variable_get("allowed_html_$m", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY));
+    return;
+  }
+
+  $string = &$m[1];
+
+  if (substr($string, 0, 1) != '<') {
+    // We matched a lone ">" character
+    return '&gt;';
+  }
+
+  if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
+    // Seriously malformed
+    return '';
+  }
+
+  $slash = trim($matches[1]);
+  $elem = &$matches[2];
+  $attrlist = &$matches[3];
+
+  if (!isset($allowed_html[strtolower($elem)])) {
+    // Disallowed HTML element
+    return '';
+  }
+
+  if ($slash != '') {
+    return "</$elem>";
+  }
+  // Is there a closing XHTML slash at the end of the attributes?
+  $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : '';
+
+  // Clean up attributes
+  $attr2 = implode(' ', _filter_xss_attributes($attrlist));
+  $attr2 = preg_replace('/[<>]/', '', $attr2);
+
+  return "<$elem $attr2$xhtml_slash>";
+}
+
+/**
+ * Processes a string of HTML attributes.
+ *
+ * @return
+ *   Cleaned up version of the HTML attributes.
+ */
+function _filter_xss_attributes($attr) {
+  $attrarr = array();
+  $mode = 0;
+  $attrname = '';
+
+  while (strlen($attr) != 0) {
+    // Was the last operation successful?
+    $working = 0;
+
+    switch ($mode) {
+      case 0:
+        // Attribute name, href for instance
+        if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
+          $attrname = strtolower($match[1]);
+          $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
+          $working = $mode = 1;
+          $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
+        }
+
+        break;
+
+      case 1:
+        // Equals sign or valueless ("selected")
+        if (preg_match('/^\s*=\s*/', $attr)) {
+          $working = 1; $mode = 2;
+          $attr = preg_replace('/^\s*=\s*/', '', $attr);
+          break;
+        }
+
+        if (preg_match('/^\s+/', $attr)) {
+          $working = 1; $mode = 0;
+          if (!$skip) {
+            $attrarr[] = $attrname;
+          }
+          $attr = preg_replace('/^\s+/', '', $attr);
+        }
+
+        break;
+
+      case 2:
+        // Attribute value, a URL after href= for instance
+        if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname=\"$thisval\"";
+          }
+          $working = 1;
+          $mode = 0;
+          $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
+          break;
+        }
+
+        if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname='$thisval'";;
+          }
+          $working = 1; $mode = 0;
+          $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
+          break;
+        }
+
+        if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname=\"$thisval\"";
+          }
+          $working = 1; $mode = 0;
+          $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
+        }
+
+        break;
+    }
+
+    if ($working == 0) {
+      // not well formed, remove and try again
+      $attr = preg_replace('/
+        ^
+        (
+        "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
+        |               # or
+        \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
+        |               # or
+        \S              # - a non-whitespace character
+        )*              # any number of the above three
+        \s*             # any number of whitespaces
+        /x', '', $attr);
+      $mode = 0;
+    }
+  }
+
+  // the attribute list ends with a valueless attribute like "selected"
+  if ($mode == 1) {
+    $attrarr[] = $attrname;
+  }
+  return $attrarr;
+}
+
+/**
+ * Processes an HTML attribute value and ensures it does not contain an URL
+ * with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ *   The string with the attribute value.
+ * @param $decode
+ *   Whether to decode entities in the $string. Set to FALSE if the $string
+ *   is in plain text, TRUE otherwise. Defaults to TRUE.
+ * @return
+ *   Cleaned up and HTML-escaped version of $string.
+ */
+function filter_xss_bad_protocol($string, $decode = TRUE) {
+  // Get the plain text representation of the attribute value (i.e. its meaning)
+  if ($decode) {
+    $string = decode_entities($string);
+  }
+  // Remove soft hyphen
+  $string = str_replace(chr(194) . chr(173), '', $string);
+  $string2 = '';
+  // Strip protocols
+  do {
+    $before = $string;
+    $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string);
+  } while ($before != $string);
+  return check_plain($string);
+}
+
+function _filter_xss_bad_protocol($m) {
+  static $allowed_protocols = array('http' => TRUE, 'https' => TRUE, 'ftp' => TRUE, 'news' => TRUE, 'nntp' => TRUE, 'telnet' => TRUE, 'gopher' => TRUE, 'mailto' => TRUE, 'e2dk' => TRUE, 'smb' => TRUE, 'irc' => TRUE, 'rsync' => TRUE, 'ssh' => TRUE, 'sftp' => TRUE);
+  $string = preg_replace('/\s+/', '', $m[1]);
+  return isset($allowed_protocols[$string]) ? "$string:" : '';
+}
+
+/**
  * @} End of "Standard filters".
  */
 
diff --git a/modules/filter/filter.module b/modules/filter/filter.module
index 41e6ad399..2ee9f1743 100644
--- a/modules/filter/filter.module
+++ b/modules/filter/filter.module
@@ -14,9 +14,6 @@ define('FILTER_FORMAT_DEFAULT', 0);
 define('FILTER_HTML_STRIP', 1);
 define('FILTER_HTML_ESCAPE', 2);
 
-define('FILTER_STYLE_ALLOW', 0);
-define('FILTER_STYLE_STRIP', 1);
-
 /**
  * Implementation of hook_help().
  */
@@ -65,105 +62,100 @@ function filter_filter_tips($delta, $format, $long = false) {
   global $base_url;
   switch ($delta) {
     case 0:
-      switch (variable_get("filter_html_$format", FILTER_HTML_STRIP)) {
-
-        case FILTER_HTML_STRIP:
-          if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
-            switch ($long) {
-              case 0:
-                return t('Allowed HTML tags') .': '. check_plain($allowed_html);
-              case 1:
-                $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
-                if (!variable_get("filter_html_help_$format", 1)) {
-                  return $output;
-                }
+      if (variable_get("filter_html_$format", FILTER_HTML_STRIP) ==  FILTER_HTML_STRIP) {
+        if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
+          switch ($long) {
+            case 0:
+              return t('Allowed HTML tags') .': '. check_plain($allowed_html);
+            case 1:
+              $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
+              if (!variable_get("filter_html_help_$format", 1)) {
+                return $output;
+              }
 
-                $output .= t('
+              $output .= t('
 <p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
 <p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
-                $tips = array(
-                  'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
-                  'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
-                  'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
-                  'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
-                  'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
-                  'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
-                  'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
-                  'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
-                  'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
-                  'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
-                  'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
-                  'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
-                  'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
-                  'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
-                  'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
-                  // Assumes and describes tr, td, th.
-                  'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
-                  'tr' => NULL, 'td' => NULL, 'th' => NULL,
-                  'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
-                  'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
-                   // Assumes and describes li.
-                  'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
-                  'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
-                  'li' => NULL,
-                  // Assumes and describes dt and dd.
-                  'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
-                  'dt' => NULL, 'dd' => NULL,
-                  'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
-                  'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
-                  'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
-                  'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
-                  'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
-                  'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
-                );
-                $header = array(t('Tag Description'), t('You Type'), t('You Get'));
-                preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
-                foreach ($out[1] as $tag) {
-                  if (array_key_exists($tag, $tips)) {
-                    if ($tips[$tag]) {
-                      $rows[] = array(
-                        array('data' => $tips[$tag][0], 'class' => 'description'),
-                        array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
-                        array('data' => $tips[$tag][1], 'class' => 'get')
-                      );
-                    }
-                  }
-                  else {
+              $tips = array(
+                'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
+                'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
+                'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
+                'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
+                'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
+                'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
+                'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
+                'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
+                'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
+                'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
+                'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
+                'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
+                'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
+                'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
+                'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
+                // Assumes and describes tr, td, th.
+                'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
+                'tr' => NULL, 'td' => NULL, 'th' => NULL,
+                'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
+                'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
+                 // Assumes and describes li.
+                'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
+                'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
+                'li' => NULL,
+                // Assumes and describes dt and dd.
+                'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
+                'dt' => NULL, 'dd' => NULL,
+                'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
+                'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
+                'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
+                'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
+                'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
+                'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
+              );
+              $header = array(t('Tag Description'), t('You Type'), t('You Get'));
+              preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
+              foreach ($out[1] as $tag) {
+                if (array_key_exists($tag, $tips)) {
+                  if ($tips[$tag]) {
                     $rows[] = array(
-                      array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+                      array('data' => $tips[$tag][0], 'class' => 'description'),
+                      array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
+                      array('data' => $tips[$tag][1], 'class' => 'get')
                     );
                   }
                 }
-                $output .= theme('table', $header, $rows);
+                else {
+                  $rows[] = array(
+                    array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+                  );
+                }
+              }
+              $output .= theme('table', $header, $rows);
 
-                $output .= t('
+              $output .= t('
 <p>Most unusual characters can be directly entered without any problems.</p>
 <p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
-                $entities = array(
-                  array( t('Ampersand'), '&amp;'),
-                  array( t('Greater than'), '&gt;'),
-                  array( t('Less than'), '&lt;'),
-                  array( t('Quotation mark'), '&quot;'),
+              $entities = array(
+                array( t('Ampersand'), '&amp;'),
+                array( t('Greater than'), '&gt;'),
+                array( t('Less than'), '&lt;'),
+                array( t('Quotation mark'), '&quot;'),
+              );
+              $header = array(t('Character Description'), t('You Type'), t('You Get'));
+              unset($rows);
+              foreach ($entities as $entity) {
+                $rows[] = array(
+                  array('data' => $entity[0], 'class' => 'description'),
+                  array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
+                  array('data' => $entity[1], 'class' => 'get')
                 );
-                $header = array(t('Character Description'), t('You Type'), t('You Get'));
-                unset($rows);
-                foreach ($entities as $entity) {
-                  $rows[] = array(
-                    array('data' => $entity[0], 'class' => 'description'),
-                    array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
-                    array('data' => $entity[1], 'class' => 'get')
-                  );
-                }
-                $output .= theme('table', $header, $rows);
-                return $output;
-            }
-          }
-          else {
-            return t('No HTML tags allowed');
+              }
+              $output .= theme('table', $header, $rows);
+              return $output;
           }
-
-        case FILTER_STYLE_STRIP:
+        }
+        else {
           return t('No HTML tags allowed');
+        }
       }
       break;
 
@@ -975,10 +967,9 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
  */
 function _filter_html_settings($format) {
   $form['filter_html'] = array('#type' => 'fieldset', '#title' => t('HTML filter'), '#collapsible' => TRUE, '#collapsed' => TRUE);
-  $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
+  $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
   $form['filter_html']["allowed_html_$format"] = array('#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), '#size' => 64, '#maxlength' => 255, '#description' => t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.'));
   $form['filter_html']["filter_html_help_$format"] = array('#type' => 'checkbox', '#title' => t('Display HTML help'), '#default_value' => variable_get("filter_html_help_$format", 1), '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'));
-  $form['filter_html']["filter_style_$format"] = array('#type' => 'radios', '#title' => t('HTML style attributes'), '#default_value' => variable_get("filter_style_$format", FILTER_STYLE_STRIP), '#options' => array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), '#description' => t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.'));
   $form['filter_html']["filter_html_nofollow_$format"] = array('#type' => 'checkbox', '#title' => t('Spam link deterrent'), '#default_value' => variable_get("filter_html_nofollow_$format", FALSE), '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'));
 
   return $form;
@@ -989,12 +980,7 @@ function _filter_html_settings($format) {
  */
 function _filter_html($text, $format) {
   if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
-    // Allow users to enter HTML, but filter it
-    $text = strip_tags($text, variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'));
-    if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) {
-      $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text);
-    }
-    $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text);
+    $text = filter_xss($text, $format);
   }
 
   if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
@@ -1065,6 +1051,245 @@ function _filter_autop($text) {
 }
 
 /**
+ * Filters XSS. Based on kses by Ulf Harnhammar, see
+ * http://sourceforge.net/projects/kses
+ *
+ * For examples of various XSS attacks, see:
+ * http://ha.ckers.org/xss.html
+ *
+ * This code does four things:
+ * - Removes characters and constructs that can trick browsers
+ * - Makes sure all HTML entities are well-formed
+ * - Makes sure all HTML tags and attributes are well-formed
+ * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ *   The string with raw HTML in it. It will be stripped of everything that can cause
+ *   an XSS attack.
+ * @param $format
+ *   The format to use.
+ */
+function filter_xss($string, $format) {
+  // Store the input format
+  _filter_xss_split($format);
+  // Remove NUL characters (ignored by some browsers)
+  $string = str_replace(chr(0), '', $string);
+  // Remove Netscape 4 JS entities
+  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+  // Defuse all HTML entities
+  $string = str_replace('&', '&amp;', $string);
+  // Change back only well-formed entities in our whitelist
+  // Named entities
+  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+  // Decimal numeric entities
+  $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
+  // Hexadecimal numeric entities
+  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
+
+  return preg_replace_callback('%
+    (
+    <[^>]*.(>|$)  # a string that starts with a <, up until the > or the end of the string
+    |             # or
+    >             # just a >
+    )%x
+    ', '_filter_xss_split', $string);
+}
+
+/**
+ * Processes an HTML tag.
+ *
+ * @param
+ *   On first call, a format identifier. On subsequent calls, an array where the
+ *   first element is the HTML tag to process.
+ * @return
+ *   If the element isn't allowed, an empty string. Otherwise, the cleaned up version
+ *   of the HTML element.
+ */
+function _filter_xss_split($m) {
+  static $allowed_html;
+
+  if (!isset($allowed_html)) {
+    $allowed_html = array_flip(preg_split('/\s+|<|>/', variable_get("allowed_html_$m", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY));
+    return;
+  }
+
+  $string = &$m[1];
+
+  if (substr($string, 0, 1) != '<') {
+    // We matched a lone ">" character
+    return '&gt;';
+  }
+
+  if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
+    // Seriously malformed
+    return '';
+  }
+
+  $slash = trim($matches[1]);
+  $elem = &$matches[2];
+  $attrlist = &$matches[3];
+
+  if (!isset($allowed_html[strtolower($elem)])) {
+    // Disallowed HTML element
+    return '';
+  }
+
+  if ($slash != '') {
+    return "</$elem>";
+  }
+  // Is there a closing XHTML slash at the end of the attributes?
+  $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : '';
+
+  // Clean up attributes
+  $attr2 = implode(' ', _filter_xss_attributes($attrlist));
+  $attr2 = preg_replace('/[<>]/', '', $attr2);
+
+  return "<$elem $attr2$xhtml_slash>";
+}
+
+/**
+ * Processes a string of HTML attributes.
+ *
+ * @return
+ *   Cleaned up version of the HTML attributes.
+ */
+function _filter_xss_attributes($attr) {
+  $attrarr = array();
+  $mode = 0;
+  $attrname = '';
+
+  while (strlen($attr) != 0) {
+    // Was the last operation successful?
+    $working = 0;
+
+    switch ($mode) {
+      case 0:
+        // Attribute name, href for instance
+        if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
+          $attrname = strtolower($match[1]);
+          $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
+          $working = $mode = 1;
+          $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
+        }
+
+        break;
+
+      case 1:
+        // Equals sign or valueless ("selected")
+        if (preg_match('/^\s*=\s*/', $attr)) {
+          $working = 1; $mode = 2;
+          $attr = preg_replace('/^\s*=\s*/', '', $attr);
+          break;
+        }
+
+        if (preg_match('/^\s+/', $attr)) {
+          $working = 1; $mode = 0;
+          if (!$skip) {
+            $attrarr[] = $attrname;
+          }
+          $attr = preg_replace('/^\s+/', '', $attr);
+        }
+
+        break;
+
+      case 2:
+        // Attribute value, a URL after href= for instance
+        if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname=\"$thisval\"";
+          }
+          $working = 1;
+          $mode = 0;
+          $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
+          break;
+        }
+
+        if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname='$thisval'";;
+          }
+          $working = 1; $mode = 0;
+          $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
+          break;
+        }
+
+        if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname=\"$thisval\"";
+          }
+          $working = 1; $mode = 0;
+          $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
+        }
+
+        break;
+    }
+
+    if ($working == 0) {
+      // not well formed, remove and try again
+      $attr = preg_replace('/
+        ^
+        (
+        "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
+        |               # or
+        \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
+        |               # or
+        \S              # - a non-whitespace character
+        )*              # any number of the above three
+        \s*             # any number of whitespaces
+        /x', '', $attr);
+      $mode = 0;
+    }
+  }
+
+  // the attribute list ends with a valueless attribute like "selected"
+  if ($mode == 1) {
+    $attrarr[] = $attrname;
+  }
+  return $attrarr;
+}
+
+/**
+ * Processes an HTML attribute value and ensures it does not contain an URL
+ * with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ *   The string with the attribute value.
+ * @param $decode
+ *   Whether to decode entities in the $string. Set to FALSE if the $string
+ *   is in plain text, TRUE otherwise. Defaults to TRUE.
+ * @return
+ *   Cleaned up and HTML-escaped version of $string.
+ */
+function filter_xss_bad_protocol($string, $decode = TRUE) {
+  // Get the plain text representation of the attribute value (i.e. its meaning)
+  if ($decode) {
+    $string = decode_entities($string);
+  }
+  // Remove soft hyphen
+  $string = str_replace(chr(194) . chr(173), '', $string);
+  $string2 = '';
+  // Strip protocols
+  do {
+    $before = $string;
+    $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string);
+  } while ($before != $string);
+  return check_plain($string);
+}
+
+function _filter_xss_bad_protocol($m) {
+  static $allowed_protocols = array('http' => TRUE, 'https' => TRUE, 'ftp' => TRUE, 'news' => TRUE, 'nntp' => TRUE, 'telnet' => TRUE, 'gopher' => TRUE, 'mailto' => TRUE, 'e2dk' => TRUE, 'smb' => TRUE, 'irc' => TRUE, 'rsync' => TRUE, 'ssh' => TRUE, 'sftp' => TRUE);
+  $string = preg_replace('/\s+/', '', $m[1]);
+  return isset($allowed_protocols[$string]) ? "$string:" : '';
+}
+
+/**
  * @} End of "Standard filters".
  */
 
diff --git a/modules/search.module b/modules/search.module
index 6cba60fe4..74535839e 100644
--- a/modules/search.module
+++ b/modules/search.module
@@ -332,7 +332,7 @@ function search_simplify($text) {
 
   // Simple CJK handling
   if (variable_get('overlap_cjk', true)) {
-    $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);    
+    $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
   }
 
   // To improve searching for numerical data such as dates, IP addresses
diff --git a/modules/search/search.module b/modules/search/search.module
index 6cba60fe4..74535839e 100644
--- a/modules/search/search.module
+++ b/modules/search/search.module
@@ -332,7 +332,7 @@ function search_simplify($text) {
 
   // Simple CJK handling
   if (variable_get('overlap_cjk', true)) {
-    $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);    
+    $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
   }
 
   // To improve searching for numerical data such as dates, IP addresses
diff --git a/modules/system.module b/modules/system.module
index d31d4dd62..248ae68a4 100644
--- a/modules/system.module
+++ b/modules/system.module
@@ -47,7 +47,7 @@ function system_help($section) {
  * Implementation of hook_perm().
  */
 function system_perm() {
-  return array('administer site configuration', 'access administration pages', 'bypass input data check');
+  return array('administer site configuration', 'access administration pages');
 }
 
 /**
diff --git a/modules/system/system.module b/modules/system/system.module
index d31d4dd62..248ae68a4 100644
--- a/modules/system/system.module
+++ b/modules/system/system.module
@@ -47,7 +47,7 @@ function system_help($section) {
  * Implementation of hook_perm().
  */
 function system_perm() {
-  return array('administer site configuration', 'access administration pages', 'bypass input data check');
+  return array('administer site configuration', 'access administration pages');
 }
 
 /**
diff --git a/modules/upload.module b/modules/upload.module
index 195e9d677..79254ba83 100644
--- a/modules/upload.module
+++ b/modules/upload.module
@@ -208,14 +208,6 @@ function upload_nodeapi(&$node, $op, $arg) {
           }
         }
 
-        // Rename possibly executable scripts to prevent accidental execution.
-        // Uploaded files are attachments and should be shown in their original
-        // form, rather than run.
-        if (preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) {
-          $file->filename .= '.txt';
-          $file->filemime = 'text/plain';
-        }
-
         if ($error['extension'] == count($user->roles) && $user->uid != 1) {
           form_set_error('upload', t('The selected file %name can not be attached to this post, because it is only possible to attach files with the following extensions: %files-allowed.', array('%name' => theme('placeholder', $file->filename), '%files-allowed' => theme('placeholder', $extensions))));
         }
diff --git a/modules/upload/upload.module b/modules/upload/upload.module
index 195e9d677..79254ba83 100644
--- a/modules/upload/upload.module
+++ b/modules/upload/upload.module
@@ -208,14 +208,6 @@ function upload_nodeapi(&$node, $op, $arg) {
           }
         }
 
-        // Rename possibly executable scripts to prevent accidental execution.
-        // Uploaded files are attachments and should be shown in their original
-        // form, rather than run.
-        if (preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) {
-          $file->filename .= '.txt';
-          $file->filemime = 'text/plain';
-        }
-
         if ($error['extension'] == count($user->roles) && $user->uid != 1) {
           form_set_error('upload', t('The selected file %name can not be attached to this post, because it is only possible to attach files with the following extensions: %files-allowed.', array('%name' => theme('placeholder', $file->filename), '%files-allowed' => theme('placeholder', $extensions))));
         }
author	Dries Buytaert <dries@buytaert.net>	2005-11-29 20:17:10 +0000
committer	Dries Buytaert <dries@buytaert.net>	2005-11-29 20:17:10 +0000
commit	2d0df351d704ba34d191831f7d4b8cb385555be2 (patch)
tree	b18d02dfa6d075ff540eeffc4ab38393e53a3da0
parent	236da7af059e6cf3aba6e8ec923e082b1f8fe143 (diff)
download	brdo-2d0df351d704ba34d191831f7d4b8cb385555be2.tar.gz brdo-2d0df351d704ba34d191831f7d4b8cb385555be2.tar.bz2