summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2005-11-29 20:17:10 +0000
committerDries Buytaert <dries@buytaert.net>2005-11-29 20:17:10 +0000
commit2d0df351d704ba34d191831f7d4b8cb385555be2 (patch)
treeb18d02dfa6d075ff540eeffc4ab38393e53a3da0
parent236da7af059e6cf3aba6e8ec923e082b1f8fe143 (diff)
downloadbrdo-2d0df351d704ba34d191831f7d4b8cb385555be2.tar.gz
brdo-2d0df351d704ba34d191831f7d4b8cb385555be2.tar.bz2
- Various fixes. Updated CHANGELOG.txt
-rw-r--r--CHANGELOG.txt8
-rw-r--r--includes/bootstrap.inc6
-rw-r--r--includes/common.inc67
-rw-r--r--includes/database.mysql.inc2
-rw-r--r--includes/database.mysqli.inc2
-rw-r--r--includes/database.pgsql.inc2
-rw-r--r--includes/file.inc18
-rw-r--r--includes/unicode.inc4
-rw-r--r--modules/filter.module419
-rw-r--r--modules/filter/filter.module419
-rw-r--r--modules/search.module2
-rw-r--r--modules/search/search.module2
-rw-r--r--modules/system.module2
-rw-r--r--modules/system/system.module2
-rw-r--r--modules/upload.module8
-rw-r--r--modules/upload/upload.module8
16 files changed, 683 insertions, 288 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 1a68edc4f..91521595d 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -52,6 +52,10 @@ Drupal x.x.x, xxxx-xx-xx (development version)
- PostgreSQL support:
* removed dependency on PL/pgSQL procedural language
+Drupal 4.6.4, 2005-11-29
+------------------------
+- fixed bugs, including 3 security vulnerabilities.
+
Drupal 4.6.3, 2005-08-15
------------------------
- fixed bugs, including a critical "arbitrary PHP code execution" bug.
@@ -108,6 +112,10 @@ Drupal 4.6.0, 2005-04-15
- documentation:
* improved and extended PHPDoc/Doxygen comments.
+Drupal 4.5.6, 2005-11-29
+------------------------
+- fixed bugs, including 3 security vulnerabilities.
+
Drupal 4.5.5, 2005-08-15
------------------------
- fixed bugs, including a critical "arbitrary PHP code execution" bug.
diff --git a/includes/bootstrap.inc b/includes/bootstrap.inc
index 6c87605ac..9d3e44cd0 100644
--- a/includes/bootstrap.inc
+++ b/includes/bootstrap.inc
@@ -708,14 +708,12 @@ function arg($index) {
}
/**
- * Prepare a URL for use in an HTML attribute.
+ * Prepare a URL for use in an HTML attribute. Strips harmful protocols.
*
- * We replace ( and ) with their url-encoded equivalents to prevent XSS attacks.
*/
function check_url($uri) {
$uri = htmlspecialchars($uri, ENT_QUOTES);
-
- $uri = strtr($uri, array('(' => '%28', ')' => '%29'));
+ $uri = filter_xss_bad_protocol($uri, FALSE);
return $uri;
}
diff --git a/includes/common.inc b/includes/common.inc
index 84c955eed..08545ce0e 100644
--- a/includes/common.inc
+++ b/includes/common.inc
@@ -616,13 +616,6 @@ function t($string, $args = 0) {
}
/**
- * Encode special characters in a plain-text string for display as HTML.
- */
-function check_plain($text) {
- return htmlspecialchars($text, ENT_QUOTES);
-}
-
-/**
* @defgroup validation Input validation
* @{
* Functions to validate user input.
@@ -668,54 +661,6 @@ function valid_url($url, $absolute = FALSE) {
}
/**
- * Validate data input by a user.
- *
- * Ensures that user data cannot be used to perform attacks on the site.
- *
- * @param $data
- * The input to check.
- * @return
- * TRUE if the input data is acceptable.
- */
-function valid_input_data($data) {
- if (is_array($data) || is_object($data)) {
- // Form data can contain a number of nested arrays.
- foreach ($data as $key => $value) {
- if (!valid_input_data($key) || !valid_input_data($value)) {
- return FALSE;
- }
- }
- }
- else if (isset($data)) {
- // Detect dangerous input data.
-
- // Decode all normal character entities.
- $data = decode_entities($data, array('<', '&', '"'));
-
- // Check strings:
- $match = preg_match('/\Wjavascript\s*:/i', $data);
- $match += preg_match('/\Wexpression\s*\(/i', $data);
- $match += preg_match('/\Walert\s*\(/i', $data);
-
- // Check attributes:
- $match += preg_match("/\W(dynsrc|datasrc|data|lowsrc|on[a-z]+)\s*=[^>]+?>/i", $data);
-
- // Check tags:
- $match += preg_match("/<\s*(applet|script|object|style|embed|form|blink|meta|html|frame|iframe|layer|ilayer|head|frameset|xml)/i", $data);
-
- if ($match) {
- watchdog('security', t('Terminated request because of suspicious input data: %data.', array('%data' => theme('placeholder', $data))));
- return FALSE;
- }
- }
-
- return TRUE;
-}
-/**
- * @} End of "defgroup validation".
- */
-
-/**
* Register an event for the current visitor (hostname/IP) to the flood control mechanism.
*
* @param $name
@@ -1366,17 +1311,7 @@ function _drupal_bootstrap_full() {
}
// Initialize all enabled modules.
module_init();
- if (!user_access('bypass input data check')) {
- // We can't use $_REQUEST because it consists of the contents of $_POST,
- // $_GET and $_COOKIE: if any of the input arrays share a key, only one
- // value will be verified.
- if (!valid_input_data($_GET)
- || !valid_input_data($_POST)
- || !valid_input_data($_COOKIE)
- || !valid_input_data($_FILES)) {
- die('Terminated request because of suspicious input data.');
- }
- }
+ // Undo magic quotes
fix_gpc_magic();
// Initialize the localization system.
$locale = locale_initialize();
diff --git a/includes/database.mysql.inc b/includes/database.mysql.inc
index ed02fa886..3fce79940 100644
--- a/includes/database.mysql.inc
+++ b/includes/database.mysql.inc
@@ -105,7 +105,7 @@ function _db_query($query, $debug = 0) {
return $result;
}
else {
- trigger_error(mysql_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+ trigger_error(check_plain(mysql_error() ."\nquery: ". $query), E_USER_ERROR);
return FALSE;
}
}
diff --git a/includes/database.mysqli.inc b/includes/database.mysqli.inc
index f2583b214..3eaea5854 100644
--- a/includes/database.mysqli.inc
+++ b/includes/database.mysqli.inc
@@ -113,7 +113,7 @@ function _db_query($query, $debug = 0) {
return $result;
}
else {
- trigger_error(mysqli_error($active_db) ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+ trigger_error(check_plain(mysqli_error($active_db) ."\nquery: ". $query), E_USER_ERROR);
return FALSE;
}
}
diff --git a/includes/database.pgsql.inc b/includes/database.pgsql.inc
index 415565a47..3c2f8f710 100644
--- a/includes/database.pgsql.inc
+++ b/includes/database.pgsql.inc
@@ -92,7 +92,7 @@ function _db_query($query, $debug = 0) {
return $last_result;
}
else {
- trigger_error(pg_last_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+ trigger_error(check_plain(pg_last_error() ."\nquery: ". $query), E_USER_ERROR);
return FALSE;
}
}
diff --git a/includes/file.inc b/includes/file.inc
index 41f9a78f1..55f7825d5 100644
--- a/includes/file.inc
+++ b/includes/file.inc
@@ -144,8 +144,24 @@ function file_check_upload($source) {
elseif ($_FILES["edit"]["name"][$source] && is_uploaded_file($_FILES["edit"]["tmp_name"][$source])) {
$file = new StdClass();
$file->filename = trim(basename($_FILES["edit"]["name"][$source]), '.');
- $file->filemime = $_FILES["edit"]["type"][$source];
$file->filepath = $_FILES["edit"]["tmp_name"][$source];
+
+ if (function_exists('mime_content_type')) {
+ $file->filemime = mime_content_type($file->filepath);
+ if ($file->filemime != $_FILES["edit"]["type"][$source]) {
+ watchdog('file', t('For %file the system thinks its MIME type is %detected while the user has given %given for MIME type', array('%file' => theme('placeholder', $file->filepath), '%detected' => theme('placeholder', $file>-filemime), '%given' => theme('placeholder', $_FILES['edit']['type'][$source]))));
+ }
+ }
+ else {
+ $file->filemime = $_FILES["edit"]["type"][$source];
+ }
+ if (((substr($file->filemime, 0, 5) == 'text/' || strpos($file->filemime, 'javascript')) && (substr($file->filepath, -4) != '.txt')) || preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) {
+ $file->filemime = 'text/plain';
+ rename($file->filepath, $file->filepath .'.txt');
+ $file->filepath .= '.txt';
+ $file->filename .= '.txt';
+ }
+
$file->error = $_FILES["edit"]["error"][$source];
$file->filesize = $_FILES["edit"]["size"][$source];
$file->source = $source;
diff --git a/includes/unicode.inc b/includes/unicode.inc
index 3508aad64..0c562a8c1 100644
--- a/includes/unicode.inc
+++ b/includes/unicode.inc
@@ -303,6 +303,10 @@ function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
if ($prefix == '#x') {
$codepoint = base_convert($codepoint, 16, 10);
}
+ // Decimal numerical entity (strip leading zeros to avoid PHP octal notation)
+ else {
+ $codepoint = preg_replace('/^0+/', '', $codepoint);
+ }
// Encode codepoint as UTF-8 bytes
if ($codepoint < 0x80) {
$str = chr($codepoint);
diff --git a/modules/filter.module b/modules/filter.module
index 41e6ad399..2ee9f1743 100644
--- a/modules/filter.module
+++ b/modules/filter.module
@@ -14,9 +14,6 @@ define('FILTER_FORMAT_DEFAULT', 0);
define('FILTER_HTML_STRIP', 1);
define('FILTER_HTML_ESCAPE', 2);
-define('FILTER_STYLE_ALLOW', 0);
-define('FILTER_STYLE_STRIP', 1);
-
/**
* Implementation of hook_help().
*/
@@ -65,105 +62,100 @@ function filter_filter_tips($delta, $format, $long = false) {
global $base_url;
switch ($delta) {
case 0:
- switch (variable_get("filter_html_$format", FILTER_HTML_STRIP)) {
-
- case FILTER_HTML_STRIP:
- if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
- switch ($long) {
- case 0:
- return t('Allowed HTML tags') .': '. check_plain($allowed_html);
- case 1:
- $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
- if (!variable_get("filter_html_help_$format", 1)) {
- return $output;
- }
+ if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
+ if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
+ switch ($long) {
+ case 0:
+ return t('Allowed HTML tags') .': '. check_plain($allowed_html);
+ case 1:
+ $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
+ if (!variable_get("filter_html_help_$format", 1)) {
+ return $output;
+ }
- $output .= t('
+ $output .= t('
<p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
<p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
- $tips = array(
- 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
- 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
- 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
- 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
- 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
- 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
- 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
- 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
- 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
- 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
- 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
- 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
- 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
- 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
- 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
- // Assumes and describes tr, td, th.
- 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
- 'tr' => NULL, 'td' => NULL, 'th' => NULL,
- 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
- 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
- // Assumes and describes li.
- 'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
- 'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
- 'li' => NULL,
- // Assumes and describes dt and dd.
- 'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
- 'dt' => NULL, 'dd' => NULL,
- 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
- 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
- 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
- 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
- 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
- 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
- );
- $header = array(t('Tag Description'), t('You Type'), t('You Get'));
- preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
- foreach ($out[1] as $tag) {
- if (array_key_exists($tag, $tips)) {
- if ($tips[$tag]) {
- $rows[] = array(
- array('data' => $tips[$tag][0], 'class' => 'description'),
- array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
- array('data' => $tips[$tag][1], 'class' => 'get')
- );
- }
- }
- else {
+ $tips = array(
+ 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
+ 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
+ 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
+ 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
+ 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
+ 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
+ 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
+ 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
+ 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
+ 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
+ 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
+ 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
+ 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
+ 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
+ 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
+ // Assumes and describes tr, td, th.
+ 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
+ 'tr' => NULL, 'td' => NULL, 'th' => NULL,
+ 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
+ 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
+ // Assumes and describes li.
+ 'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
+ 'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
+ 'li' => NULL,
+ // Assumes and describes dt and dd.
+ 'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
+ 'dt' => NULL, 'dd' => NULL,
+ 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
+ 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
+ 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
+ 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
+ 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
+ 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
+ );
+ $header = array(t('Tag Description'), t('You Type'), t('You Get'));
+ preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
+ foreach ($out[1] as $tag) {
+ if (array_key_exists($tag, $tips)) {
+ if ($tips[$tag]) {
$rows[] = array(
- array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+ array('data' => $tips[$tag][0], 'class' => 'description'),
+ array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
+ array('data' => $tips[$tag][1], 'class' => 'get')
);
}
}
- $output .= theme('table', $header, $rows);
+ else {
+ $rows[] = array(
+ array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+ );
+ }
+ }
+ $output .= theme('table', $header, $rows);
- $output .= t('
+ $output .= t('
<p>Most unusual characters can be directly entered without any problems.</p>
<p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
- $entities = array(
- array( t('Ampersand'), '&amp;'),
- array( t('Greater than'), '&gt;'),
- array( t('Less than'), '&lt;'),
- array( t('Quotation mark'), '&quot;'),
+ $entities = array(
+ array( t('Ampersand'), '&amp;'),
+ array( t('Greater than'), '&gt;'),
+ array( t('Less than'), '&lt;'),
+ array( t('Quotation mark'), '&quot;'),
+ );
+ $header = array(t('Character Description'), t('You Type'), t('You Get'));
+ unset($rows);
+ foreach ($entities as $entity) {
+ $rows[] = array(
+ array('data' => $entity[0], 'class' => 'description'),
+ array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
+ array('data' => $entity[1], 'class' => 'get')
);
- $header = array(t('Character Description'), t('You Type'), t('You Get'));
- unset($rows);
- foreach ($entities as $entity) {
- $rows[] = array(
- array('data' => $entity[0], 'class' => 'description'),
- array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
- array('data' => $entity[1], 'class' => 'get')
- );
- }
- $output .= theme('table', $header, $rows);
- return $output;
- }
- }
- else {
- return t('No HTML tags allowed');
+ }
+ $output .= theme('table', $header, $rows);
+ return $output;
}
-
- case FILTER_STYLE_STRIP:
+ }
+ else {
return t('No HTML tags allowed');
+ }
}
break;
@@ -975,10 +967,9 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
*/
function _filter_html_settings($format) {
$form['filter_html'] = array('#type' => 'fieldset', '#title' => t('HTML filter'), '#collapsible' => TRUE, '#collapsed' => TRUE);
- $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
+ $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
$form['filter_html']["allowed_html_$format"] = array('#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), '#size' => 64, '#maxlength' => 255, '#description' => t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.'));
$form['filter_html']["filter_html_help_$format"] = array('#type' => 'checkbox', '#title' => t('Display HTML help'), '#default_value' => variable_get("filter_html_help_$format", 1), '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'));
- $form['filter_html']["filter_style_$format"] = array('#type' => 'radios', '#title' => t('HTML style attributes'), '#default_value' => variable_get("filter_style_$format", FILTER_STYLE_STRIP), '#options' => array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), '#description' => t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.'));
$form['filter_html']["filter_html_nofollow_$format"] = array('#type' => 'checkbox', '#title' => t('Spam link deterrent'), '#default_value' => variable_get("filter_html_nofollow_$format", FALSE), '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'));
return $form;
@@ -989,12 +980,7 @@ function _filter_html_settings($format) {
*/
function _filter_html($text, $format) {
if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
- // Allow users to enter HTML, but filter it
- $text = strip_tags($text, variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'));
- if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) {
- $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text);
- }
- $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text);
+ $text = filter_xss($text, $format);
}
if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
@@ -1065,6 +1051,245 @@ function _filter_autop($text) {
}
/**
+ * Filters XSS. Based on kses by Ulf Harnhammar, see
+ * http://sourceforge.net/projects/kses
+ *
+ * For examples of various XSS attacks, see:
+ * http://ha.ckers.org/xss.html
+ *
+ * This code does four things:
+ * - Removes characters and constructs that can trick browsers
+ * - Makes sure all HTML entities are well-formed
+ * - Makes sure all HTML tags and attributes are well-formed
+ * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ * The string with raw HTML in it. It will be stripped of everything that can cause
+ * an XSS attack.
+ * @param $format
+ * The format to use.
+ */
+function filter_xss($string, $format) {
+ // Store the input format
+ _filter_xss_split($format);
+ // Remove NUL characters (ignored by some browsers)
+ $string = str_replace(chr(0), '', $string);
+ // Remove Netscape 4 JS entities
+ $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+ // Defuse all HTML entities
+ $string = str_replace('&', '&amp;', $string);
+ // Change back only well-formed entities in our whitelist
+ // Named entities
+ $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+ // Decimal numeric entities
+ $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
+ // Hexadecimal numeric entities
+ $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
+
+ return preg_replace_callback('%
+ (
+ <[^>]*.(>|$) # a string that starts with a <, up until the > or the end of the string
+ | # or
+ > # just a >
+ )%x
+ ', '_filter_xss_split', $string);
+}
+
+/**
+ * Processes an HTML tag.
+ *
+ * @param
+ * On first call, a format identifier. On subsequent calls, an array where the
+ * first element is the HTML tag to process.
+ * @return
+ * If the element isn't allowed, an empty string. Otherwise, the cleaned up version
+ * of the HTML element.
+ */
+function _filter_xss_split($m) {
+ static $allowed_html;
+
+ if (!isset($allowed_html)) {
+ $allowed_html = array_flip(preg_split('/\s+|<|>/', variable_get("allowed_html_$m", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY));
+ return;
+ }
+
+ $string = &$m[1];
+
+ if (substr($string, 0, 1) != '<') {
+ // We matched a lone ">" character
+ return '&gt;';
+ }
+
+ if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
+ // Seriously malformed
+ return '';
+ }
+
+ $slash = trim($matches[1]);
+ $elem = &$matches[2];
+ $attrlist = &$matches[3];
+
+ if (!isset($allowed_html[strtolower($elem)])) {
+ // Disallowed HTML element
+ return '';
+ }
+
+ if ($slash != '') {
+ return "</$elem>";
+ }
+ // Is there a closing XHTML slash at the end of the attributes?
+ $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : '';
+
+ // Clean up attributes
+ $attr2 = implode(' ', _filter_xss_attributes($attrlist));
+ $attr2 = preg_replace('/[<>]/', '', $attr2);
+
+ return "<$elem $attr2$xhtml_slash>";
+}
+
+/**
+ * Processes a string of HTML attributes.
+ *
+ * @return
+ * Cleaned up version of the HTML attributes.
+ */
+function _filter_xss_attributes($attr) {
+ $attrarr = array();
+ $mode = 0;
+ $attrname = '';
+
+ while (strlen($attr) != 0) {
+ // Was the last operation successful?
+ $working = 0;
+
+ switch ($mode) {
+ case 0:
+ // Attribute name, href for instance
+ if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
+ $attrname = strtolower($match[1]);
+ $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
+ $working = $mode = 1;
+ $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
+ }
+
+ break;
+
+ case 1:
+ // Equals sign or valueless ("selected")
+ if (preg_match('/^\s*=\s*/', $attr)) {
+ $working = 1; $mode = 2;
+ $attr = preg_replace('/^\s*=\s*/', '', $attr);
+ break;
+ }
+
+ if (preg_match('/^\s+/', $attr)) {
+ $working = 1; $mode = 0;
+ if (!$skip) {
+ $attrarr[] = $attrname;
+ }
+ $attr = preg_replace('/^\s+/', '', $attr);
+ }
+
+ break;
+
+ case 2:
+ // Attribute value, a URL after href= for instance
+ if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname=\"$thisval\"";
+ }
+ $working = 1;
+ $mode = 0;
+ $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
+ break;
+ }
+
+ if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname='$thisval'";;
+ }
+ $working = 1; $mode = 0;
+ $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
+ break;
+ }
+
+ if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname=\"$thisval\"";
+ }
+ $working = 1; $mode = 0;
+ $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
+ }
+
+ break;
+ }
+
+ if ($working == 0) {
+ // not well formed, remove and try again
+ $attr = preg_replace('/
+ ^
+ (
+ "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
+ | # or
+ \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
+ | # or
+ \S # - a non-whitespace character
+ )* # any number of the above three
+ \s* # any number of whitespaces
+ /x', '', $attr);
+ $mode = 0;
+ }
+ }
+
+ // the attribute list ends with a valueless attribute like "selected"
+ if ($mode == 1) {
+ $attrarr[] = $attrname;
+ }
+ return $attrarr;
+}
+
+/**
+ * Processes an HTML attribute value and ensures it does not contain an URL
+ * with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ * The string with the attribute value.
+ * @param $decode
+ * Whether to decode entities in the $string. Set to FALSE if the $string
+ * is in plain text, TRUE otherwise. Defaults to TRUE.
+ * @return
+ * Cleaned up and HTML-escaped version of $string.
+ */
+function filter_xss_bad_protocol($string, $decode = TRUE) {
+ // Get the plain text representation of the attribute value (i.e. its meaning)
+ if ($decode) {
+ $string = decode_entities($string);
+ }
+ // Remove soft hyphen
+ $string = str_replace(chr(194) . chr(173), '', $string);
+ $string2 = '';
+ // Strip protocols
+ do {
+ $before = $string;
+ $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string);
+ } while ($before != $string);
+ return check_plain($string);
+}
+
+function _filter_xss_bad_protocol($m) {
+ static $allowed_protocols = array('http' => TRUE, 'https' => TRUE, 'ftp' => TRUE, 'news' => TRUE, 'nntp' => TRUE, 'telnet' => TRUE, 'gopher' => TRUE, 'mailto' => TRUE, 'e2dk' => TRUE, 'smb' => TRUE, 'irc' => TRUE, 'rsync' => TRUE, 'ssh' => TRUE, 'sftp' => TRUE);
+ $string = preg_replace('/\s+/', '', $m[1]);
+ return isset($allowed_protocols[$string]) ? "$string:" : '';
+}
+
+/**
* @} End of "Standard filters".
*/
diff --git a/modules/filter/filter.module b/modules/filter/filter.module
index 41e6ad399..2ee9f1743 100644
--- a/modules/filter/filter.module
+++ b/modules/filter/filter.module
@@ -14,9 +14,6 @@ define('FILTER_FORMAT_DEFAULT', 0);
define('FILTER_HTML_STRIP', 1);
define('FILTER_HTML_ESCAPE', 2);
-define('FILTER_STYLE_ALLOW', 0);
-define('FILTER_STYLE_STRIP', 1);
-
/**
* Implementation of hook_help().
*/
@@ -65,105 +62,100 @@ function filter_filter_tips($delta, $format, $long = false) {
global $base_url;
switch ($delta) {
case 0:
- switch (variable_get("filter_html_$format", FILTER_HTML_STRIP)) {
-
- case FILTER_HTML_STRIP:
- if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
- switch ($long) {
- case 0:
- return t('Allowed HTML tags') .': '. check_plain($allowed_html);
- case 1:
- $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
- if (!variable_get("filter_html_help_$format", 1)) {
- return $output;
- }
+ if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
+ if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
+ switch ($long) {
+ case 0:
+ return t('Allowed HTML tags') .': '. check_plain($allowed_html);
+ case 1:
+ $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
+ if (!variable_get("filter_html_help_$format", 1)) {
+ return $output;
+ }
- $output .= t('
+ $output .= t('
<p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
<p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
- $tips = array(
- 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
- 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
- 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
- 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
- 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
- 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
- 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
- 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
- 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
- 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
- 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
- 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
- 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
- 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
- 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
- // Assumes and describes tr, td, th.
- 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
- 'tr' => NULL, 'td' => NULL, 'th' => NULL,
- 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
- 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
- // Assumes and describes li.
- 'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
- 'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
- 'li' => NULL,
- // Assumes and describes dt and dd.
- 'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
- 'dt' => NULL, 'dd' => NULL,
- 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
- 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
- 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
- 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
- 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
- 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
- );
- $header = array(t('Tag Description'), t('You Type'), t('You Get'));
- preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
- foreach ($out[1] as $tag) {
- if (array_key_exists($tag, $tips)) {
- if ($tips[$tag]) {
- $rows[] = array(
- array('data' => $tips[$tag][0], 'class' => 'description'),
- array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
- array('data' => $tips[$tag][1], 'class' => 'get')
- );
- }
- }
- else {
+ $tips = array(
+ 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
+ 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
+ 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
+ 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
+ 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
+ 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
+ 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
+ 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
+ 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
+ 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
+ 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
+ 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
+ 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
+ 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
+ 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
+ // Assumes and describes tr, td, th.
+ 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
+ 'tr' => NULL, 'td' => NULL, 'th' => NULL,
+ 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
+ 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
+ // Assumes and describes li.
+ 'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
+ 'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
+ 'li' => NULL,
+ // Assumes and describes dt and dd.
+ 'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
+ 'dt' => NULL, 'dd' => NULL,
+ 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
+ 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
+ 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
+ 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
+ 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
+ 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
+ );
+ $header = array(t('Tag Description'), t('You Type'), t('You Get'));
+ preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
+ foreach ($out[1] as $tag) {
+ if (array_key_exists($tag, $tips)) {
+ if ($tips[$tag]) {
$rows[] = array(
- array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+ array('data' => $tips[$tag][0], 'class' => 'description'),
+ array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
+ array('data' => $tips[$tag][1], 'class' => 'get')
);
}
}
- $output .= theme('table', $header, $rows);
+ else {
+ $rows[] = array(
+ array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+ );
+ }
+ }
+ $output .= theme('table', $header, $rows);
- $output .= t('
+ $output .= t('
<p>Most unusual characters can be directly entered without any problems.</p>
<p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
- $entities = array(
- array( t('Ampersand'), '&amp;'),
- array( t('Greater than'), '&gt;'),
- array( t('Less than'), '&lt;'),
- array( t('Quotation mark'), '&quot;'),
+ $entities = array(
+ array( t('Ampersand'), '&amp;'),
+ array( t('Greater than'), '&gt;'),
+ array( t('Less than'), '&lt;'),
+ array( t('Quotation mark'), '&quot;'),
+ );
+ $header = array(t('Character Description'), t('You Type'), t('You Get'));
+ unset($rows);
+ foreach ($entities as $entity) {
+ $rows[] = array(
+ array('data' => $entity[0], 'class' => 'description'),
+ array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
+ array('data' => $entity[1], 'class' => 'get')
);
- $header = array(t('Character Description'), t('You Type'), t('You Get'));
- unset($rows);
- foreach ($entities as $entity) {
- $rows[] = array(
- array('data' => $entity[0], 'class' => 'description'),
- array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
- array('data' => $entity[1], 'class' => 'get')
- );
- }
- $output .= theme('table', $header, $rows);
- return $output;
- }
- }
- else {
- return t('No HTML tags allowed');
+ }
+ $output .= theme('table', $header, $rows);
+ return $output;
}
-
- case FILTER_STYLE_STRIP:
+ }
+ else {
return t('No HTML tags allowed');
+ }
}
break;
@@ -975,10 +967,9 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
*/
function _filter_html_settings($format) {
$form['filter_html'] = array('#type' => 'fieldset', '#title' => t('HTML filter'), '#collapsible' => TRUE, '#collapsed' => TRUE);
- $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
+ $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
$form['filter_html']["allowed_html_$format"] = array('#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), '#size' => 64, '#maxlength' => 255, '#description' => t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.'));
$form['filter_html']["filter_html_help_$format"] = array('#type' => 'checkbox', '#title' => t('Display HTML help'), '#default_value' => variable_get("filter_html_help_$format", 1), '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'));
- $form['filter_html']["filter_style_$format"] = array('#type' => 'radios', '#title' => t('HTML style attributes'), '#default_value' => variable_get("filter_style_$format", FILTER_STYLE_STRIP), '#options' => array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), '#description' => t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.'));
$form['filter_html']["filter_html_nofollow_$format"] = array('#type' => 'checkbox', '#title' => t('Spam link deterrent'), '#default_value' => variable_get("filter_html_nofollow_$format", FALSE), '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'));
return $form;
@@ -989,12 +980,7 @@ function _filter_html_settings($format) {
*/
function _filter_html($text, $format) {
if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
- // Allow users to enter HTML, but filter it
- $text = strip_tags($text, variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'));
- if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) {
- $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text);
- }
- $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text);
+ $text = filter_xss($text, $format);
}
if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
@@ -1065,6 +1051,245 @@ function _filter_autop($text) {
}
/**
+ * Filters XSS. Based on kses by Ulf Harnhammar, see
+ * http://sourceforge.net/projects/kses
+ *
+ * For examples of various XSS attacks, see:
+ * http://ha.ckers.org/xss.html
+ *
+ * This code does four things:
+ * - Removes characters and constructs that can trick browsers
+ * - Makes sure all HTML entities are well-formed
+ * - Makes sure all HTML tags and attributes are well-formed
+ * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ * The string with raw HTML in it. It will be stripped of everything that can cause
+ * an XSS attack.
+ * @param $format
+ * The format to use.
+ */
+function filter_xss($string, $format) {
+ // Store the input format
+ _filter_xss_split($format);
+ // Remove NUL characters (ignored by some browsers)
+ $string = str_replace(chr(0), '', $string);
+ // Remove Netscape 4 JS entities
+ $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+ // Defuse all HTML entities
+ $string = str_replace('&', '&amp;', $string);
+ // Change back only well-formed entities in our whitelist
+ // Named entities
+ $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+ // Decimal numeric entities
+ $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
+ // Hexadecimal numeric entities
+ $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
+
+ return preg_replace_callback('%
+ (
+ <[^>]*.(>|$) # a string that starts with a <, up until the > or the end of the string
+ | # or
+ > # just a >
+ )%x
+ ', '_filter_xss_split', $string);
+}
+
+/**
+ * Processes an HTML tag.
+ *
+ * @param
+ * On first call, a format identifier. On subsequent calls, an array where the
+ * first element is the HTML tag to process.
+ * @return
+ * If the element isn't allowed, an empty string. Otherwise, the cleaned up version
+ * of the HTML element.
+ */
+function _filter_xss_split($m) {
+ static $allowed_html;
+
+ if (!isset($allowed_html)) {
+ $allowed_html = array_flip(preg_split('/\s+|<|>/', variable_get("allowed_html_$m", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY));
+ return;
+ }
+
+ $string = &$m[1];
+
+ if (substr($string, 0, 1) != '<') {
+ // We matched a lone ">" character
+ return '&gt;';
+ }
+
+ if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
+ // Seriously malformed
+ return '';
+ }
+
+ $slash = trim($matches[1]);
+ $elem = &$matches[2];
+ $attrlist = &$matches[3];
+
+ if (!isset($allowed_html[strtolower($elem)])) {
+ // Disallowed HTML element
+ return '';
+ }
+
+ if ($slash != '') {
+ return "</$elem>";
+ }
+ // Is there a closing XHTML slash at the end of the attributes?
+ $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : '';
+
+ // Clean up attributes
+ $attr2 = implode(' ', _filter_xss_attributes($attrlist));
+ $attr2 = preg_replace('/[<>]/', '', $attr2);
+
+ return "<$elem $attr2$xhtml_slash>";
+}
+
+/**
+ * Processes a string of HTML attributes.
+ *
+ * @return
+ * Cleaned up version of the HTML attributes.
+ */
+function _filter_xss_attributes($attr) {
+ $attrarr = array();
+ $mode = 0;
+ $attrname = '';
+
+ while (strlen($attr) != 0) {
+ // Was the last operation successful?
+ $working = 0;
+
+ switch ($mode) {
+ case 0:
+ // Attribute name, href for instance
+ if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
+ $attrname = strtolower($match[1]);
+ $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
+ $working = $mode = 1;
+ $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
+ }
+
+ break;
+
+ case 1:
+ // Equals sign or valueless ("selected")
+ if (preg_match('/^\s*=\s*/', $attr)) {
+ $working = 1; $mode = 2;
+ $attr = preg_replace('/^\s*=\s*/', '', $attr);
+ break;
+ }
+
+ if (preg_match('/^\s+/', $attr)) {
+ $working = 1; $mode = 0;
+ if (!$skip) {
+ $attrarr[] = $attrname;
+ }
+ $attr = preg_replace('/^\s+/', '', $attr);
+ }
+
+ break;
+
+ case 2:
+ // Attribute value, a URL after href= for instance
+ if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname=\"$thisval\"";
+ }
+ $working = 1;
+ $mode = 0;
+ $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
+ break;
+ }
+
+ if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname='$thisval'";;
+ }
+ $working = 1; $mode = 0;
+ $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
+ break;
+ }
+
+ if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname=\"$thisval\"";
+ }
+ $working = 1; $mode = 0;
+ $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
+ }
+
+ break;
+ }
+
+ if ($working == 0) {
+ // not well formed, remove and try again
+ $attr = preg_replace('/
+ ^
+ (
+ "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
+ | # or
+ \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
+ | # or
+ \S # - a non-whitespace character
+ )* # any number of the above three
+ \s* # any number of whitespaces
+ /x', '', $attr);
+ $mode = 0;
+ }
+ }
+
+ // the attribute list ends with a valueless attribute like "selected"
+ if ($mode == 1) {
+ $attrarr[] = $attrname;
+ }
+ return $attrarr;
+}
+
+/**
+ * Processes an HTML attribute value and ensures it does not contain an URL
+ * with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ * The string with the attribute value.
+ * @param $decode
+ * Whether to decode entities in the $string. Set to FALSE if the $string
+ * is in plain text, TRUE otherwise. Defaults to TRUE.
+ * @return
+ * Cleaned up and HTML-escaped version of $string.
+ */
+function filter_xss_bad_protocol($string, $decode = TRUE) {
+ // Get the plain text representation of the attribute value (i.e. its meaning)
+ if ($decode) {
+ $string = decode_entities($string);
+ }
+ // Remove soft hyphen
+ $string = str_replace(chr(194) . chr(173), '', $string);
+ $string2 = '';
+ // Strip protocols
+ do {
+ $before = $string;
+ $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string);
+ } while ($before != $string);
+ return check_plain($string);
+}
+
+function _filter_xss_bad_protocol($m) {
+ static $allowed_protocols = array('http' => TRUE, 'https' => TRUE, 'ftp' => TRUE, 'news' => TRUE, 'nntp' => TRUE, 'telnet' => TRUE, 'gopher' => TRUE, 'mailto' => TRUE, 'e2dk' => TRUE, 'smb' => TRUE, 'irc' => TRUE, 'rsync' => TRUE, 'ssh' => TRUE, 'sftp' => TRUE);
+ $string = preg_replace('/\s+/', '', $m[1]);
+ return isset($allowed_protocols[$string]) ? "$string:" : '';
+}
+
+/**
* @} End of "Standard filters".
*/
diff --git a/modules/search.module b/modules/search.module
index 6cba60fe4..74535839e 100644
--- a/modules/search.module
+++ b/modules/search.module
@@ -332,7 +332,7 @@ function search_simplify($text) {
// Simple CJK handling
if (variable_get('overlap_cjk', true)) {
- $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
+ $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
}
// To improve searching for numerical data such as dates, IP addresses
diff --git a/modules/search/search.module b/modules/search/search.module
index 6cba60fe4..74535839e 100644
--- a/modules/search/search.module
+++ b/modules/search/search.module
@@ -332,7 +332,7 @@ function search_simplify($text) {
// Simple CJK handling
if (variable_get('overlap_cjk', true)) {
- $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
+ $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
}
// To improve searching for numerical data such as dates, IP addresses
diff --git a/modules/system.module b/modules/system.module
index d31d4dd62..248ae68a4 100644
--- a/modules/system.module
+++ b/modules/system.module
@@ -47,7 +47,7 @@ function system_help($section) {
* Implementation of hook_perm().
*/
function system_perm() {
- return array('administer site configuration', 'access administration pages', 'bypass input data check');
+ return array('administer site configuration', 'access administration pages');
}
/**
diff --git a/modules/system/system.module b/modules/system/system.module
index d31d4dd62..248ae68a4 100644
--- a/modules/system/system.module
+++ b/modules/system/system.module
@@ -47,7 +47,7 @@ function system_help($section) {
* Implementation of hook_perm().
*/
function system_perm() {
- return array('administer site configuration', 'access administration pages', 'bypass input data check');
+ return array('administer site configuration', 'access administration pages');
}
/**
diff --git a/modules/upload.module b/modules/upload.module
index 195e9d677..79254ba83 100644
--- a/modules/upload.module
+++ b/modules/upload.module
@@ -208,14 +208,6 @@ function upload_nodeapi(&$node, $op, $arg) {
}
}
- // Rename possibly executable scripts to prevent accidental execution.
- // Uploaded files are attachments and should be shown in their original
- // form, rather than run.
- if (preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) {
- $file->filename .= '.txt';
- $file->filemime = 'text/plain';
- }
-
if ($error['extension'] == count($user->roles) && $user->uid != 1) {
form_set_error('upload', t('The selected file %name can not be attached to this post, because it is only possible to attach files with the following extensions: %files-allowed.', array('%name' => theme('placeholder', $file->filename), '%files-allowed' => theme('placeholder', $extensions))));
}
diff --git a/modules/upload/upload.module b/modules/upload/upload.module
index 195e9d677..79254ba83 100644
--- a/modules/upload/upload.module
+++ b/modules/upload/upload.module
@@ -208,14 +208,6 @@ function upload_nodeapi(&$node, $op, $arg) {
}
}
- // Rename possibly executable scripts to prevent accidental execution.
- // Uploaded files are attachments and should be shown in their original
- // form, rather than run.
- if (preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) {
- $file->filename .= '.txt';
- $file->filemime = 'text/plain';
- }
-
if ($error['extension'] == count($user->roles) && $user->uid != 1) {
form_set_error('upload', t('The selected file %name can not be attached to this post, because it is only possible to attach files with the following extensions: %files-allowed.', array('%name' => theme('placeholder', $file->filename), '%files-allowed' => theme('placeholder', $extensions))));
}