From 93728e5d325371f6ec92213a6207a6bfddfc261b Mon Sep 17 00:00:00 2001
From: Dries Buytaert
Date: Mon, 27 Jul 2009 20:15:35 +0000
Subject: - Patch #374441 by tic200, Damien Tournoud, scor: refactor Drupal
HTML corrector using PHP5's XML/Dom parser.
---
modules/field/modules/text/text.test | 12 +++---
modules/filter/filter.module | 79 ++++++---------------------------
modules/filter/filter.test | 84 ++++++++++++++++++++++++++++++------
modules/system/system.install | 11 +++++
4 files changed, 102 insertions(+), 84 deletions(-)
(limited to 'modules')
diff --git a/modules/field/modules/text/text.test b/modules/field/modules/text/text.test
index ae8a0a06a..db7c96c7c 100644
--- a/modules/field/modules/text/text.test
+++ b/modules/field/modules/text/text.test
@@ -286,12 +286,12 @@ class TextSummaryTestCase extends DrupalWebTestCase {
// And using a text format WITH the line-break and htmlcorrector filters.
$expected_lb = array(
"\nHi\n
\n\nfolks\n
\n!\n
",
- "<",
- "",
- "",
- "",
- "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
"\nHi
",
"\nHi
",
"\nHi
",
diff --git a/modules/filter/filter.module b/modules/filter/filter.module
index 00c4b5568..6b578a0a0 100644
--- a/modules/filter/filter.module
+++ b/modules/filter/filter.module
@@ -757,74 +757,21 @@ function _filter_url($text, $format) {
* Scan input and make sure that all HTML tags are properly closed and nested.
*/
function _filter_htmlcorrector($text) {
- // Prepare tag lists.
- static $no_nesting, $single_use;
- if (!isset($no_nesting)) {
- // Tags which cannot be nested but are typically left unclosed.
- $no_nesting = drupal_map_assoc(array('li', 'p'));
-
- // Single use tags in HTML4
- $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
+ // Ignore warnings during HTML soup loading.
+ $htmlDom = @DOMDocument::loadHTML('' . $text . '');
+
+ // The result of DOMDocument->saveXML($bodyNode) is a partial (X)HTML document.
+ // We only need what is inside the body tag.
+ $bodyNode = $htmlDom->getElementsByTagName('body')->item(0);
+ if (preg_match("|^]*>(.*)$|s", $htmlDom->saveXML($bodyNode), $matches)) {
+ $body_content = $matches[1];
+ // The XHTML guidelines recommend to include a space before the trailing /
+ // and > of empty elements for better rendering on HTML user agents.
+ return preg_replace('|<([^>]*)/>|i', '<$1 />', $body_content);
}
-
- // Properly entify angles.
- $text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text);
-
- // Split tags from text.
- $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
- // Note: PHP ensures the array consists of alternating delimiters and literals
- // and begins and ends with a literal (inserting $null as required).
-
- $tag = FALSE; // Odd/even counter. Tag or no tag.
- $stack = array();
- $output = '';
- foreach ($split as $value) {
- // Process HTML tags.
- if ($tag) {
- list($tagname) = explode(' ', strtolower($value), 2);
- // Closing tag
- if ($tagname{0} == '/') {
- $tagname = substr($tagname, 1);
- // Discard XHTML closing tags for single use tags.
- if (!isset($single_use[$tagname])) {
- // See if we possibly have a matching opening tag on the stack.
- if (in_array($tagname, $stack)) {
- // Close other tags lingering first.
- do {
- $output .= '' . $stack[0] . '>';
- } while (array_shift($stack) != $tagname);
- }
- // Otherwise, discard it.
- }
- }
- // Opening tag
- else {
- // See if we have an identical 'no nesting' tag already open and close it if found.
- if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
- $output .= '' . array_shift($stack) . '>';
- }
- // Push non-single-use tags onto the stack
- if (!isset($single_use[$tagname])) {
- array_unshift($stack, $tagname);
- }
- // Add trailing slash to single-use tags as per X(HT)ML.
- else {
- $value = rtrim($value, ' /') . ' /';
- }
- $output .= '<' . $value . '>';
- }
- }
- else {
- // Passthrough all text.
- $output .= $value;
- }
- $tag = !$tag;
- }
- // Close remaining tags.
- while (count($stack) > 0) {
- $output .= '' . array_shift($stack) . '>';
+ else {
+ return '';
}
- return $output;
}
/**
diff --git a/modules/filter/filter.test b/modules/filter/filter.test
index 98834d6e4..90ee5e7e1 100644
--- a/modules/filter/filter.test
+++ b/modules/filter/filter.test
@@ -182,24 +182,20 @@ class FilterAdminTestCase extends DrupalWebTestCase {
}
}
-class FilterTestCase extends DrupalWebTestCase {
+/**
+ * Unit tests for core filters.
+ */
+class FilterUnitTest extends DrupalWebTestCase {
protected $format;
public static function getInfo() {
return array(
'name' => 'Core filters',
- 'description' => 'Filter each filter individually: Convert URLs into links, Convert line breaks, Correct broken HTML, Escape all HTML, Limit allowed HTML tags.',
+ 'description' => 'Filter each filter individually: convert line breaks, correct broken HTML.',
'group' => 'Filter',
);
}
- function setUp() {
- parent::setUp();
-
- $admin_user = $this->drupalCreateUser(array('administer filters', 'create page content'));
- $this->drupalLogin($admin_user);
- }
-
/**
* Test the line break filter.
*/
@@ -594,17 +590,17 @@ class FilterTestCase extends DrupalWebTestCase {
}
/**
- * Test the HTML corrector.
+ * Test the HTML corrector filter.
*
* @todo This test could really use some validity checking function.
*/
- function testHtmlCorrector() {
+ function testHtmlCorrectorFilter() {
// Tag closing.
$f = _filter_htmlcorrector('text');
$this->assertEqual($f, '
text
', t('HTML corrector -- tag closing at the end of input.'));
$f = _filter_htmlcorrector('text
text');
- $this->assertEqual($f, '
text
text
', t('HTML corrector -- tag closing.'));
+ $this->assertEqual($f, 'text
text
', t('HTML corrector -- tag closing.'));
$f = _filter_htmlcorrector("- e1
- e2");
$this->assertEqual($f, "", t('HTML corrector -- unclosed list tags.'));
@@ -615,6 +611,70 @@ class FilterTestCase extends DrupalWebTestCase {
// XHTML slash for empty elements.
$f = _filter_htmlcorrector('
');
$this->assertEqual($f, '
', t('HTML corrector -- XHTML closing slash.'));
+
+ $f = _filter_htmlcorrector('test
');
+ $this->assertEqual($f, 'test
', t('HTML corrector -- Convert uppercased tags to proper lowercased ones.'));
+
+ $f = _filter_htmlcorrector('test
');
+ $this->assertEqual($f, 'test
', t('HTML corrector -- Convert uppercased tags to proper lowercased ones.'));
+
+ $f = _filter_htmlcorrector('test
');
+ $this->assertEqual($f, 'test
', t('HTML corrector -- Let proper XHTML pass thru.'));
+
+ $f = _filter_htmlcorrector('test
');
+ $this->assertEqual($f, 'test
', t('HTML corrector -- Let proper XHTML pass thru.'));
+
+ $f = _filter_htmlcorrector('');
+ $this->assertEqual($f, '', t('HTML corrector -- Let proper XHTML pass thru.'));
+
+ $f = _filter_htmlcorrector('test1
test2');
+ $this->assertEqual($f, 'test1
test2', t('HTML corrector -- Automatically close single tags.'));
+
+ $f = _filter_htmlcorrector('line1
line2');
+ $this->assertEqual($f, 'line1
line2', t('HTML corrector -- Automatically close single tags.'));
+
+ $f = _filter_htmlcorrector('line1
line2');
+ $this->assertEqual($f, 'line1
line2', t('HTML corrector -- Automatically close single tags.'));
+
+ $f = _filter_htmlcorrector('
test');
+ $this->assertEqual($f, '
test', t('HTML corrector -- Automatically close single tags.'));
+
+ $f = _filter_htmlcorrector('line1
line2
');
+ $this->assertEqual($f, 'line1
line2', t('HTML corrector -- Move non-inline elements outside of inline containers.'));
+
+ $f = _filter_htmlcorrector('line1
line2
');
+ $this->assertEqual($f, 'line1
line2
', t('HTML corrector -- Move non-inline elements outside of inline containers.'));
+
+ $f = _filter_htmlcorrector('test
test
\n');
+ $this->assertEqual($f, 'test
test
\n', t('HTML corrector -- Auto-close improperly nested tags.'));
+
+ $f = _filter_htmlcorrector('Line1
bold stuff');
+ $this->assertEqual($f, 'Line1
bold stuff
', t('HTML corrector -- Properly close unclosed tags, and remove useless closing tags.'));
+
+ $f = _filter_htmlcorrector('test ');
+ $this->assertEqual($f, 'test ', t('HTML corrector -- Do not touch HTML comments.'));
+
+ $f = _filter_htmlcorrector('test ');
+ $this->assertEqual($f, 'test ', t('HTML corrector -- Do not touch HTML comments.'));
+
+ $f = _filter_htmlcorrector('test ');
+ $this->assertEqual($f, 'test ', t('HTML corrector -- Do not touch HTML comments.'));
+
+ $f = _filter_htmlcorrector('test ');
+ $this->assertEqual($f, 'test ', t('HTML corrector -- Do not touch HTML comments.'));
+
+ $f = _filter_htmlcorrector('test ');
+ $this->assertEqual($f, 'test ', t('HTML corrector -- Do not touch HTML comments.'));
+
+ $f = _filter_htmlcorrector('test\n
\n');
+ $this->assertEqual($f, 'test\n
\n', t('HTML corrector -- New-lines are accepted and kept as-is.'));
+
+ $f = _filter_htmlcorrector('دروبال');
+ $this->assertEqual($f, '
دروبال
', t('HTML corrector -- Encoding is correctly kept.'));
}
function createFormat($filter) {
diff --git a/modules/system/system.install b/modules/system/system.install
index 38640dd7e..6e2465fca 100644
--- a/modules/system/system.install
+++ b/modules/system/system.install
@@ -266,6 +266,17 @@ function system_requirements($phase) {
include_once DRUPAL_ROOT . '/includes/unicode.inc';
$requirements = array_merge($requirements, unicode_requirements());
+ // Verify if the DOM PHP 5 extension is available.
+ $has_dom = class_exists('DOMDocument');
+ if (!$has_dom) {
+ $requirements['php_dom'] = array(
+ 'title' => $t('PHP DOM Extension'),
+ 'value' => $t('Not found'),
+ 'severity' => REQUIREMENT_ERROR,
+ 'description' => $t("The DOM extension is part of PHP 5 core, but doesn't seem to be enabled on your system. You need to enable the DOM extension on your PHP installation."),
+ );
+ }
+
if ($phase == 'runtime') {
// Check for update status module.
if (!module_exists('update')) {
--
cgit v1.2.3