diff options
author | Dries Buytaert <dries@buytaert.net> | 2007-05-20 16:44:35 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2007-05-20 16:44:35 +0000 |
commit | 667bf95308cfbaac5929bee70e9ba98fa92ed3db (patch) | |
tree | 23e51f2eabbc0cc69847b987f607c84bdbefeb1f /modules/filter | |
parent | d78badccbc51fde2e21d7799a816b1a64d0058b6 (diff) | |
download | brdo-667bf95308cfbaac5929bee70e9ba98fa92ed3db.tar.gz brdo-667bf95308cfbaac5929bee70e9ba98fa92ed3db.tar.bz2 |
- Patch #54833 by Steven: added an HTML corrector.
Diffstat (limited to 'modules/filter')
-rw-r--r-- | modules/filter/filter.module | 80 |
1 files changed, 79 insertions, 1 deletions
diff --git a/modules/filter/filter.module b/modules/filter/filter.module index d891ab2a6..bfc7d1c49 100644 --- a/modules/filter/filter.module +++ b/modules/filter/filter.module @@ -955,7 +955,7 @@ function theme_filter_tips_more_info() { function filter_filter($op, $delta = 0, $format = -1, $text = '') { switch ($op) { case 'list': - return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter')); + return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector')); case 'description': switch ($delta) { @@ -965,6 +965,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') { return t('Converts line breaks into HTML (i.e. <br> and <p> tags).'); case 2: return t('Turns web and e-mail addresses into clickable links.'); + case 3: + return t('Corrects faulty and chopped off HTML in postings.'); default: return; } @@ -977,6 +979,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') { return _filter_autop($text); case 2: return _filter_url($text, $format); + case 3: + return _filter_htmlcorrector($text); default: return $text; } @@ -1099,6 +1103,80 @@ function _filter_url($text, $format) { } /** + * Scan input and make sure that all HTML tags are properly closed and nested. + */ +function _filter_htmlcorrector($text) { + // Prepare tag lists. + static $no_nesting, $single_use; + if (!isset($no_nesting)) { + // Tags which cannot be nested but are typically left unclosed. + $no_nesting = drupal_map_assoc(array('li', 'p')); + + // Single use tags in HTML4 + $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame')); + } + + // Properly entify angles. + $text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text); + + // Split tags from text. + $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // Note: PHP ensures the array consists of alternating delimiters and literals + // and begins and ends with a literal (inserting $null as required). + + $tag = false; // Odd/even counter. Tag or no tag. + $stack = array(); + $output = ''; + foreach ($split as $value) { + // Process HTML tags. + if ($tag) { + list($tagname) = explode(' ', strtolower($value), 2); + // Closing tag + if ($tagname{0} == '/') { + $tagname = substr($tagname, 1); + // Discard XHTML closing tags for single use tags. + if (!isset($single_use[$tagname])) { + // See if we possibly have a matching opening tag on the stack. + if (in_array($tagname, $stack)) { + // Close other tags lingering first. + do { + $output .= '</'. $stack[0] .'>'; + } while (array_shift($stack) != $tagname); + } + // Otherwise, discard it. + } + } + // Opening tag + else { + // See if we have an identical 'no nesting' tag already open and close it if found. + if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) { + $output .= '</'. array_shift($stack) .'>'; + } + // Push non-single-use tags onto the stack + if (!isset($single_use[$tagname])) { + array_unshift($stack, $tagname); + } + // Add trailing slash to single-use tags as per X(HT)ML. + else { + $value = rtrim($value, ' /') . ' /'; + } + $output .= '<'. $value .'>'; + } + } + else { + // Passthrough all text. + $output .= $value; + } + $tag = !$tag; + } + // Close remaining tags. + while (count($stack) > 0) { + $output .= '</'. array_shift($stack) .'>'; + } + return $output; +} + +/** * Make links out of absolute URLs. */ function _filter_url_parse_full_links($match) { |