summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2007-05-20 16:44:35 +0000
committerDries Buytaert <dries@buytaert.net>2007-05-20 16:44:35 +0000
commit667bf95308cfbaac5929bee70e9ba98fa92ed3db (patch)
tree23e51f2eabbc0cc69847b987f607c84bdbefeb1f
parentd78badccbc51fde2e21d7799a816b1a64d0058b6 (diff)
downloadbrdo-667bf95308cfbaac5929bee70e9ba98fa92ed3db.tar.gz
brdo-667bf95308cfbaac5929bee70e9ba98fa92ed3db.tar.bz2
- Patch #54833 by Steven: added an HTML corrector.
-rw-r--r--CHANGELOG.txt3
-rw-r--r--modules/filter/filter.module80
-rw-r--r--modules/system/system.install28
3 files changed, 110 insertions, 1 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index dd927d66e..960321085 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -40,6 +40,9 @@ Drupal 6.0, xxxx-xx-xx (development version)
* Added form to provide initial site information during installation.
* Added ability to provide extra installation steps programmatically.
* Made it possible to import interface translations at install time.
+- Added the HTML corrector filter:
+ * Fixes faulty and chopped off HTML in postings.
+ * Tags are now automatically closed at the end of the teaser.
Drupal 5.0, 2007-01-15
----------------------
diff --git a/modules/filter/filter.module b/modules/filter/filter.module
index d891ab2a6..bfc7d1c49 100644
--- a/modules/filter/filter.module
+++ b/modules/filter/filter.module
@@ -955,7 +955,7 @@ function theme_filter_tips_more_info() {
function filter_filter($op, $delta = 0, $format = -1, $text = '') {
switch ($op) {
case 'list':
- return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'));
+ return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector'));
case 'description':
switch ($delta) {
@@ -965,6 +965,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
return t('Converts line breaks into HTML (i.e. &lt;br&gt; and &lt;p&gt; tags).');
case 2:
return t('Turns web and e-mail addresses into clickable links.');
+ case 3:
+ return t('Corrects faulty and chopped off HTML in postings.');
default:
return;
}
@@ -977,6 +979,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
return _filter_autop($text);
case 2:
return _filter_url($text, $format);
+ case 3:
+ return _filter_htmlcorrector($text);
default:
return $text;
}
@@ -1099,6 +1103,80 @@ function _filter_url($text, $format) {
}
/**
+ * Scan input and make sure that all HTML tags are properly closed and nested.
+ */
+function _filter_htmlcorrector($text) {
+ // Prepare tag lists.
+ static $no_nesting, $single_use;
+ if (!isset($no_nesting)) {
+ // Tags which cannot be nested but are typically left unclosed.
+ $no_nesting = drupal_map_assoc(array('li', 'p'));
+
+ // Single use tags in HTML4
+ $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
+ }
+
+ // Properly entify angles.
+ $text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);
+
+ // Split tags from text.
+ $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ // Note: PHP ensures the array consists of alternating delimiters and literals
+ // and begins and ends with a literal (inserting $null as required).
+
+ $tag = false; // Odd/even counter. Tag or no tag.
+ $stack = array();
+ $output = '';
+ foreach ($split as $value) {
+ // Process HTML tags.
+ if ($tag) {
+ list($tagname) = explode(' ', strtolower($value), 2);
+ // Closing tag
+ if ($tagname{0} == '/') {
+ $tagname = substr($tagname, 1);
+ // Discard XHTML closing tags for single use tags.
+ if (!isset($single_use[$tagname])) {
+ // See if we possibly have a matching opening tag on the stack.
+ if (in_array($tagname, $stack)) {
+ // Close other tags lingering first.
+ do {
+ $output .= '</'. $stack[0] .'>';
+ } while (array_shift($stack) != $tagname);
+ }
+ // Otherwise, discard it.
+ }
+ }
+ // Opening tag
+ else {
+ // See if we have an identical 'no nesting' tag already open and close it if found.
+ if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
+ $output .= '</'. array_shift($stack) .'>';
+ }
+ // Push non-single-use tags onto the stack
+ if (!isset($single_use[$tagname])) {
+ array_unshift($stack, $tagname);
+ }
+ // Add trailing slash to single-use tags as per X(HT)ML.
+ else {
+ $value = rtrim($value, ' /') . ' /';
+ }
+ $output .= '<'. $value .'>';
+ }
+ }
+ else {
+ // Passthrough all text.
+ $output .= $value;
+ }
+ $tag = !$tag;
+ }
+ // Close remaining tags.
+ while (count($stack) > 0) {
+ $output .= '</'. array_shift($stack) .'>';
+ }
+ return $output;
+}
+
+/**
* Make links out of absolute URLs.
*/
function _filter_url_parse_full_links($match) {
diff --git a/modules/system/system.install b/modules/system/system.install
index 7bc70779f..30d4a62d3 100644
--- a/modules/system/system.install
+++ b/modules/system/system.install
@@ -1209,12 +1209,16 @@ function system_install() {
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 0, 1)");
// Line break filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 1, 2)");
+ // HTML corrector filter.
+ db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)");
// Full HTML:
// URL filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 2, 0)");
// Line break filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 1, 1)");
+ // HTML corrector filter.
+ db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)");
db_query("INSERT INTO {variable} (name,value) VALUES ('filter_html_1','i:1;')");
@@ -4063,6 +4067,30 @@ function system_update_6017() {
}
/**
+ * Add HTML corrector to HTML formats or replace the old module if it was in use.
+ */
+function system_update_6018() {
+ $ret = array();
+
+ // Disable htmlcorrector.module, if it exists and replace its filter.
+ if (module_exists('htmlcorrector')) {
+ module_disable(array('htmlcorrector'));
+ $ret[] = update_sql("UPDATE {filter_formats} SET module = 'filter', delta = 3 WHERE module = 'htmlcorrector'");
+ $ret[] = t('HTML Corrector module was disabled; this functionality has now been added to core.');
+ return $ret;
+ }
+
+ // Otherwise, find any format with 'HTML' in its name and add the filter at the end.
+ $result = db_query("SELECT format FROM {filter_formats} WHERE name LIKE '%HTML%'");
+ while ($format = db_fetch_object($result)) {
+ $weight = db_result(db_query("SELECT MAX(weight) FROM {filters} WHERE format = %d", $format->format));
+ db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (%d, '%s', %d, %d)", $format->format, 'filter', 3, max(10, $weight + 1));
+ }
+
+ return $ret;
+}
+
+/**
* @} End of "defgroup updates-5.x-to-6.x"
* The next series of updates should start at 7000.
*/