GeSHi update to 1.0.7.22

darcs-hash:20080620160628-7ad00-1bfd8f407b5b38c29c7879d67da2ea3dbd0d1816.gz
author: Andreas Gohr <andi@splitbrain.org> 2008-06-20 18:06:28 +0200
committer: Andreas Gohr <andi@splitbrain.org> 2008-06-20 18:06:28 +0200
commit: bb62d5fbe1aa438a9e655573ea9d8c8d3183006b (patch)
tree: 050838041b33d8e8c5a8d4d9c93472ce099b2ed0 /inc/geshi.php
parent: 715bdf1fe13fba0611cf88311bbc3d7945b4a544 (diff)
download: rpg-bb62d5fbe1aa438a9e655573ea9d8c8d3183006b.tar.gz
rpg-bb62d5fbe1aa438a9e655573ea9d8c8d3183006b.tar.bz2
1 files changed, 554 insertions, 415 deletions
diff --git a/inc/geshi.php b/inc/geshi.php
index 362acd9c6..2fbf35a77 100644
--- a/inc/geshi.php
+++ b/inc/geshi.php
@@ -41,7 +41,7 @@
 //
 
 /** The version of this GeSHi file */
-define('GESHI_VERSION', '1.0.7.21');
+define('GESHI_VERSION', '1.0.7.22');
 
 // Define the root directory for the GeSHi code tree
 if (!defined('GESHI_ROOT')) {
@@ -326,13 +326,13 @@ class GeSHi {
      */
     var $highlight_extra_lines_style = 'color: #cc0; background-color: #ffc;';
 
-	/**
-	 * The line ending
-	 * If null, nl2br() will be used on the result string.
-	 * Otherwise, all instances of \n will be replaced with $line_ending
-	 * @var string
-	 */
-	var $line_ending = null;
+    /**
+     * The line ending
+     * If null, nl2br() will be used on the result string.
+     * Otherwise, all instances of \n will be replaced with $line_ending
+     * @var string
+     */
+    var $line_ending = null;
 
     /**
      * Number at which line numbers should start at
@@ -350,7 +350,7 @@ class GeSHi {
      *  The style for the actual code
      * @var string
      */
-    var $code_style = 'font-family: \'Courier New\', Courier, monospace; font-weight: normal;';
+    var $code_style = 'font-family: \'Courier New\', Courier, monospace; font-weight: normal; font-style: normal;';
 
     /**
      * The overall class for this code block
@@ -368,7 +368,7 @@ class GeSHi {
      * Line number styles
      * @var string
      */
-    var $line_style1 = 'font-family: \'Courier New\', Courier, monospace; color: black; font-weight: normal; font-style: normal;';
+    var $line_style1 = 'font-weight: normal;';
 
     /**
      * Line number styles for fancy lines
@@ -383,6 +383,13 @@ class GeSHi {
     var $line_numbers = GESHI_NO_LINE_NUMBERS;
 
     /**
+     * Flag to decide if multi line spans are allowed. Set it to false to make sure
+     * each tag is closed before and reopened after each linefeed.
+     * @var boolean
+     */
+    var $allow_multiline_span = true;
+
+    /**
      * The "nth" value for fancy line highlighting
      * @var int
      */
@@ -394,11 +401,11 @@ class GeSHi {
      */
     var $tab_width = 8;
 
-	/**
-	 * Should we use language-defined tab stop widths?
-	 * @var int
-	 */
-	var $use_language_tab_width = false;
+    /**
+     * Should we use language-defined tab stop widths?
+     * @var int
+     */
+    var $use_language_tab_width = false;
 
     /**
      * Default target for keyword links
@@ -419,6 +426,13 @@ class GeSHi {
      */
     var $keyword_links = true;
 
+    /**
+     * Currently loaded language file
+     * @var string
+     * @since 1.0.7.22
+     */
+    var $loaded_language = '';
+
     /**#@-*/
 
     /**
@@ -524,7 +538,7 @@ class GeSHi {
      */
     function set_language_path($path) {
         if ($path) {
-            $this->language_path = ('/' == substr($path, strlen($path) - 1, 1)) ? $path : $path . '/';
+            $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
             $this->set_language($this->language);        // otherwise set_language_path has no effect
         }
     }
@@ -685,6 +699,29 @@ class GeSHi {
     }
 
     /**
+     * Sets wether spans and other HTML markup generated by GeSHi can
+     * span over multiple lines or not. Defaults to true to reduce overhead.
+     * Set it to false if you want to manipulate the output or manually display
+     * the code in an ordered list.
+     *
+     * @param boolean Wether multiline spans are allowed or not
+     * @since 1.0.7.22
+     */
+    function enable_multiline_span($flag) {
+        $this->allow_multiline_span = (bool) $flag;
+    }
+
+    /**
+     * Get current setting for multiline spans, see GeSHi::
+     *
+     * @see enable_multiline_span
+     * @return bool
+     */
+    function get_multiline_span() {
+        return $this->allow_multiline_span;
+    }
+
+    /**
      * Sets the style for a keyword group. If $preserve_defaults is
      * true, then styles are merged with the default styles, with the
      * user defined styles having priority
@@ -702,6 +739,9 @@ class GeSHi {
         else {
             $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
         }
+        if(!isset($this->lexic_permissions['KEYWORDS'][$key])) {
+            $this->lexic_permissions['KEYWORDS'][$key] = true;
+        }
     }
 
     /**
@@ -1013,30 +1053,31 @@ class GeSHi {
         }
     }
 
-	/**
-	 * Sets whether or not to use tab-stop width specifed by language
-	 *
-	 * @param boolean Whether to use language-specific tab-stop widths
+    /**
+     * Sets whether or not to use tab-stop width specifed by language
+     *
+     * @param boolean Whether to use language-specific tab-stop widths
      * @since 1.0.7.20
-	 */
-	function set_use_language_tab_width($use) {
-		$this->use_language_tab_width = (bool) $use;
-	}
-
-	/**
-	 * Returns the tab width to use, based on the current language and user
-	 * preference
-	 *
-	 * @return int Tab width
+     */
+    function set_use_language_tab_width($use) {
+        $this->use_language_tab_width = (bool) $use;
+    }
+
+    /**
+     * Returns the tab width to use, based on the current language and user
+     * preference
+     *
+     * @return int Tab width
      * @since 1.0.7.20
-	 */
-	function get_real_tab_width() {
-		if (!$this->use_language_tab_width || !isset($this->language_data['TAB_WIDTH'])) {
-			return $this->tab_width;
-		} else {
-			return $this->language_data['TAB_WIDTH'];
-		}
-	}
+     */
+    function get_real_tab_width() {
+        if (!$this->use_language_tab_width ||
+            !isset($this->language_data['TAB_WIDTH'])) {
+            return $this->tab_width;
+        } else {
+            return $this->language_data['TAB_WIDTH'];
+        }
+    }
 
     /**
      * Enables/disables strict highlighting. Default is off, calling this
@@ -1101,7 +1142,7 @@ class GeSHi {
      * @todo static?
      */
     function get_language_name_from_extension( $extension, $lookup = array() ) {
-        if ( !is_array($lookup) || !count($lookup)) {
+        if ( !is_array($lookup) || empty($lookup)) {
             $lookup = array(
                 'actionscript' => array('as'),
                 'ada' => array('a', 'ada', 'adb', 'ads'),
@@ -1119,6 +1160,7 @@ class GeSHi {
                 'css' => array('css'),
                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
                 'dos' => array('bat', 'cmd'),
+                'gettext' => array('po', 'pot'),
                 'html4strict' => array('html', 'htm'),
                 'java' => array('java'),
                 'javascript' => array('js'),
@@ -1144,10 +1186,8 @@ class GeSHi {
         }
 
         foreach ($lookup as $lang => $extensions) {
-            foreach ($extensions as $ext) {
-                if ($ext == $extension) {
-                    return $lang;
-                }
+            if (in_array($extension, $extensions)) {
+                return $lang;
             }
         }
         return '';
@@ -1172,7 +1212,7 @@ class GeSHi {
      */
     function load_from_file($file_name, $lookup = array()) {
         if (is_readable($file_name)) {
-            $this->set_source(implode('', file($file_name)));
+            $this->set_source(file_get_contents($file_name));
             $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
         }
         else {
@@ -1370,15 +1410,16 @@ class GeSHi {
     function highlight_lines_extra($lines, $style = null) {
         if (is_array($lines)) {
             foreach ($lines as $line) {
-                $this->highlight_lines_extra(line, $style);
+                $this->highlight_lines_extra($line, $style);
             }
         }
         else {
-            $this->highlight_extra_lines[intval($lines)] = intval($lines);
+            $lines = intval($lines);
+            $this->highlight_extra_lines[$lines] = $lines;
             if ($style != null) {
-            	$this->highlight_extra_lines_styles[intval($lines)] = $style;
+                $this->highlight_extra_lines_styles[$lines] = $style;
             } else {
-            	unset($this->highlight_extra_lines_styles[intval($lines)]);
+                unset($this->highlight_extra_lines_styles[$lines]);
             }
         }
     }
@@ -1393,15 +1434,15 @@ class GeSHi {
         $this->highlight_extra_lines_style = $styles;
     }
 
-	/**
-	 * Sets the line-ending
-	 *
-	 * @param string The new line-ending
-	 * @since 1.0.2
-	 */
-	function set_line_ending($line_ending) {
-		$this->line_ending = (string)$line_ending;
-	}
+    /**
+     * Sets the line-ending
+     *
+     * @param string The new line-ending
+     * @since 1.0.2
+     */
+    function set_line_ending($line_ending) {
+        $this->line_ending = (string)$line_ending;
+    }
 
     /**
      * Sets what number line numbers should start at. Should
@@ -1447,7 +1488,7 @@ class GeSHi {
      * @since 1.0.2
      */
     function enable_keyword_links($enable = true) {
-        $this->keyword_links = ($enable) ? true : false;
+        $this->keyword_links = (bool) $enable;
     }
 
     /**
@@ -1480,9 +1521,6 @@ class GeSHi {
 
         // Initialise various stuff
         $length           = strlen($code);
-        $STRING_OPEN      = '';
-        $CLOSE_STRING     = false;
-        $ESCAPE_CHAR_OPEN = false;
         $COMMENT_MATCHED  = false;
         // Turn highlighting on if strict mode doesn't apply to this language
         $HIGHLIGHTING_ON  = ( !$this->strict_mode ) ? true : '';
@@ -1502,53 +1540,38 @@ class GeSHi {
         if ($this->strict_mode) {
             // Break the source into bits. Each bit will be a portion of the code
             // within script delimiters - for example, HTML between < and >
-            $parts = array(0 => array(0 => ''));
+            $parts = array(0 => array(0 => '', 1 => ''));
             $k = 0;
-            for ($i = 0; $i < $length; $i++) {
-                $char = substr($code, $i, 1);
-                if (!$HIGHLIGHTING_ON) {
-                    foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
-                        foreach ($delimiters as $open => $close) {
-                            // Get the next little bit for this opening string
-                            $check = substr($code, $i, strlen($open));
-                            // If it matches...
-                            if ($check == $open) {
-                                // We start a new block with the highlightable
-                                // code in it
-                                $HIGHLIGHTING_ON = $open;
-                                $i += strlen($open) - 1;
-                                $char = $open;
-                                $parts[++$k][0] = $char;
-
-                                // No point going around again...
-                                break(2);
+            for ($i = 0; $i < $length; ++$i) {
+                foreach ($this->language_data['SCRIPT_DELIMITERS'] as $delimiters) {
+                    foreach ($delimiters as $open => $close) {
+                        // Get the next little bit for this opening string
+                        $open_strlen = strlen($open);
+                        $check = substr($code, $i, $open_strlen);
+                        // If it matches...
+                        if ($check == $open) {
+                            // We start a new block with the highlightable
+                            // code in it
+                            ++$k;
+                            $parts[$k][0] = $open;
+                            $close_i = strpos($code, $close, $i + $open_strlen)  + strlen($close);
+                            if ($close_i === false) {
+                                $close_i = $length - 1;
                             }
+                            $parts[$k][1] = substr($code, $i, $close_i - $i);
+                            $i = $close_i - 1;
+                            ++$k;
+                            $parts[$k][0] = '';
+                            $parts[$k][1] = '';
+
+                            // No point going around again...
+                            continue 3;
                         }
                     }
                 }
-                else {
-                    foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
-                        foreach ($delimiters as $open => $close) {
-                            if ($open == $HIGHLIGHTING_ON) {
-                                // Found the closing tag
-                                break(2);
-                            }
-                        }
-                    }
-                    // We check code from our current position BACKWARDS. This is so
-                    // the ending string for highlighting can be included in the block
-                    $check = substr($code, $i - strlen($close) + 1, strlen($close));
-                    if ($check == $close) {
-                        $HIGHLIGHTING_ON = '';
-                        // Add the string to the rest of the string for this part
-                        $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
-                        $parts[++$k][0] = '';
-                        $char = '';
-                    }
-                }
-                $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
+                // only non-highlightable text reaches this point
+                $parts[$k][1] .= $code[$i];
             }
-            $HIGHLIGHTING_ON = '';
         }
         else {
             // Not strict mode - simply dump the source into
@@ -1561,6 +1584,45 @@ class GeSHi {
             );
         }
 
+        //Unset variables we won't need any longer
+        unset($code);
+
+        //Preload some repeatedly used values regarding hardquotes ...
+        $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
+        $hq_strlen = strlen($hq);
+
+        //Preload if line numbers are to be generated afterwards
+        //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
+        $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS
+                                || !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
+
+        //preload the escape char for faster checking ...
+        $escaped_escape_char = GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
+
+        if (!$this->use_classes) {
+            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+            $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
+        }
+        else {
+            $string_attributes = ' class="st0"';
+            $escape_char_attributes = ' class="es0"';
+        }
+
+        // this is used for single-line comments
+        $sc_disallowed_before = "";
+        $sc_disallowed_after = "";
+
+        if(isset($this->language_data['PARSER_CONTROL'])) {
+            if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
+                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
+                    $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
+                }
+                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
+                    $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
+                }
+            }
+        }
+
         // Now we go through each part. We know that even-indexed parts are
         // code that shouldn't be highlighted, and odd-indexed parts should
         // be highlighted
@@ -1571,10 +1633,8 @@ class GeSHi {
                 if ($this->strict_mode) {
                     // Find the class key for this block of code
                     foreach ($this->language_data['SCRIPT_DELIMITERS'] as $script_key => $script_data) {
-                        foreach ($script_data as $open => $close) {
-                            if ($data[0] == $open) {
-                                break(2);
-                            }
+                        if (isset($script_data[$data[0]])) {
+                            break;
                         }
                     }
 
@@ -1598,208 +1658,286 @@ class GeSHi {
                     // Now, highlight the code in this block. This code
                     // is really the engine of GeSHi (along with the method
                     // parse_non_string_part).
+
+                    // cache comment regexps incrementally
+                    $comment_regexp_cache = array();
+                    $next_comment_regexp_pos = -1;
+                    $comment_regexp_cache_per_key = array();
+
                     $length = strlen($part);
-                    for ($i = 0; $i < $length; $i++) {
+                    for ($i = 0; $i < $length; ++$i) {
                         // Get the next char
-                        $char = substr($part, $i, 1);
-                        $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
-                        // Is this char the newline and line numbers being used?
-                        if (($this->line_numbers != GESHI_NO_LINE_NUMBERS
-                            || count($this->highlight_extra_lines) > 0)
-                            && $char == "\n") {
-                            // If so, is there a string open? If there is, we should end it before
-                            // the newline and begin it again (so when <li>s are put in the source
-                            // remains XHTML compliant)
-                            // note to self: This opens up possibility of config files specifying
-                            // that languages can/cannot have multiline strings???
-                            if ($STRING_OPEN) {
-                                if (!$this->use_classes) {
-                                    $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
-                                }
-                                else {
-                                    $attributes = ' class="st0"';
+                        $char = $part[$i];
+
+                        if (in_array($char, $this->language_data['QUOTEMARKS']) && $this->lexic_permissions['STRINGS']) {
+                            // The start of a new string
+
+                            // parse the stuff before this
+                            $result .= $this->parse_non_string_part($stuff_to_parse);
+                            $stuff_to_parse = '';
+
+                            // now handle the string
+                            $string = '';
+
+                            // look for closing quote
+                            $start = $i;
+                            while ($close_pos = strpos($part, $char, $start + 1)) {
+                                $start = $close_pos;
+                                if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
+                                    // this quote is escaped
+                                    continue;
                                 }
-                                $char = '</span>' . $char . "<span$attributes>";
+                                // found closing quote
+                                break;
                             }
-                        }
-                        else if ($char == $STRING_OPEN) {
-                            // A match of a string delimiter
-                            if (($this->lexic_permissions['ESCAPE_CHAR'] && $ESCAPE_CHAR_OPEN) ||
-                                ($this->lexic_permissions['STRINGS'] && !$ESCAPE_CHAR_OPEN)) {
-                                $char = GeSHi::hsc($char) . '</span>';
+                            if (!$close_pos) {
+                              // span till the end of this $part when no closing delimiter is found
+                              $close_pos = $length;
                             }
-                            $escape_me = false;
-                            if ($HARDQUOTE_OPEN) {
-                                if ($ESCAPE_CHAR_OPEN) {
-                                    $escape_me = true;
-                                }
-                                else {
-                                    foreach ($this->language_data['HARDESCAPE'] as $hardesc) {
-                                        if (substr($part, $i, strlen($hardesc)) == $hardesc) {
-                                            $escape_me = true;
-                                            break;
-                                        }
+
+                            $string = substr($part, $i, $close_pos - $i + 1);
+                            $i = $close_pos;
+
+                            // handle escape chars and encode html chars
+                            // (special because when we have escape chars within our string they may not be escaped)
+                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
+                                $start = 0;
+                                $new_string = '';
+                                while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
+                                    $new_string .= GeSHi::hsc(substr($string, $start, $es_pos - $start))
+                                                  . "<span$escape_char_attributes>" . $escaped_escape_char;
+                                    if ($string[$es_pos + 1] == "\n") {
+                                      // don't put a newline around newlines
+                                      $new_string .= "</span>\n";
+                                    } else {
+                                      $new_string .= GeSHi::hsc($string[$es_pos + 1]) . '</span>';
                                     }
+                                    $start = $es_pos + 2;
                                 }
+                                $string = $new_string . GeSHi::hsc(substr($string, $start));
+                                $new_string = '';
+                            } else {
+                                $string = GeSHi::hsc($string);
                             }
 
-                            if (!$ESCAPE_CHAR_OPEN) {
-                                $STRING_OPEN = '';
-                                $CLOSE_STRING = true;
-                            }
-                            if (!$escape_me) {
-                                $HARDQUOTE_OPEN = false;
+                            if ($check_linenumbers) {
+                                // Are line numbers used? If, we should end the string before
+                                // the newline and begin it again (so when <li>s are put in the source
+                                // remains XHTML compliant)
+                                // note to self: This opens up possibility of config files specifying
+                                // that languages can/cannot have multiline strings???
+                                $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
                             }
-                            $ESCAPE_CHAR_OPEN = false;
+
+                            $result .= "<span$string_attributes>" . $string . '</span>';
+                            $string = '';
+                            continue;
                         }
-                        else if (in_array($char, $this->language_data['QUOTEMARKS']) &&
-                            ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
-                            // The start of a new string
-                            $STRING_OPEN = $char;
-                            if (!$this->use_classes) {
-                                $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+                        else if ($hq && substr($part, $i, $hq_strlen) == $hq && $this->lexic_permissions['STRINGS']) {
+                            // The start of a hard quoted string
+
+                            // parse the stuff before this
+                            $result .= $this->parse_non_string_part($stuff_to_parse);
+                            $stuff_to_parse = '';
+
+                            // now handle the string
+                            $string = '';
+
+                            // look for closing quote
+                            $start = $i + $hq_strlen;
+                            while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start + 1)) {
+                                $start = $close_pos;
+                                if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
+                                    // make sure this quote is not escaped
+                                    foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
+                                        if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
+                                            // this quote is escaped
+                                            continue 2;
+                                        }
+                                    }
+                                }
+                                // found closing quote
+                                break;
                             }
-                            else {
-                                $attributes = ' class="st0"';
+                            if (!$close_pos) {
+                              // span till the end of this $part when no closing delimiter is found
+                              $close_pos = $length;
                             }
-                            $char = "<span$attributes>" . GeSHi::hsc($char);
 
-                            $result .= $this->parse_non_string_part( $stuff_to_parse );
-                            $stuff_to_parse = '';
-                        }
-                        else if ($hq && substr($part, $i, strlen($hq)) == $hq &&
-                            ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
-                            // The start of a hard quoted string
-                            $STRING_OPEN = $this->language_data['HARDQUOTE'][1];
-                            if (!$this->use_classes) {
-                                $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+                            $string = substr($part, $i, $close_pos - $i + 1);
+                            $i = $close_pos;
+
+                            // handle escape chars and encode html chars
+                            // (special because when we have escape chars within our string they may not be escaped)
+                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
+                                $start = 0;
+                                $new_string = '';
+                                while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
+                                    // hmtl escape stuff before
+                                    $new_string .= GeSHi::hsc(substr($string, $start, $es_pos - $start));
+                                    // check if this is a hard escape
+                                    foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
+                                        if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
+                                            // indeed, this is a hardescape
+                                          $new_string .= "<span$escape_char_attributes>" . GeSHi::hsc($hardescape)
+                                                        . '</span>';
+                                          $start = $es_pos + strlen($hardescape);
+                                          continue 2;
+                                        }
+                                    }
+                                    // not a hard escape
+                                    $new_string .= $escaped_escape_char;
+                                    $start = $es_pos + 1;
+                                }
+                                $string = $new_string . GeSHi::hsc(substr($string, $start));
+                            } else {
+                                $string = GeSHi::hsc($string);
                             }
-                            else {
-                                $attributes = ' class="st0"';
+
+                            if ($check_linenumbers) {
+                                // Are line numbers used? If, we should end the string before
+                                // the newline and begin it again (so when <li>s are put in the source
+                                // remains XHTML compliant)
+                                // note to self: This opens up possibility of config files specifying
+                                // that languages can/cannot have multiline strings???
+                                $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
                             }
-                            $char = "<span$attributes>" . $hq;
-                            $i += strlen($hq) - 1;
-                            $HARDQUOTE_OPEN = true;
-                            $result .= $this->parse_non_string_part($stuff_to_parse);
-                            $stuff_to_parse = '';
+
+                            $result .= "<span$string_attributes>" . $string . '</span>';
+                            $string = '';
+                            continue;
                         }
-                        else if ($char == $this->language_data['ESCAPE_CHAR'] && $STRING_OPEN != '') {
-                            // An escape character
-                            if (!$ESCAPE_CHAR_OPEN) {
-                                $ESCAPE_CHAR_OPEN = !$HARDQUOTE_OPEN;  // true unless $HARDQUOTE_OPEN
-                                if ($HARDQUOTE_OPEN) {
-                                    foreach ($this->language_data['HARDESCAPE'] as $hard) {
-                                        if (substr($part, $i, strlen($hard)) == $hard) {
-                                            $ESCAPE_CHAR_OPEN = true;
+                        else {
+                            // update regexp comment cache if needed
+                            if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
+                                $next_comment_regexp_pos = $length;
+                                foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
+                                    $match_i = false;
+                                    if (isset($comment_regexp_cache_per_key[$comment_key]) &&
+                                        $comment_regexp_cache_per_key[$comment_key] >= $i) {
+                                        // we have already matched something
+                                        $match_i = $comment_regexp_cache_per_key[$comment_key];
+                                    }
+                                    else if (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
+                                        $match_i = $match[0][1];
+                                        $comment_regexp_cache[$match_i] = array(
+                                            'key' => $comment_key,
+                                            'length' => strlen($match[0][0]),
+                                        );
+                                        $comment_regexp_cache_per_key[$comment_key] = $match_i;
+                                    } else {
+                                        $comment_regexp_cache_per_key[$comment_key] = false;
+                                        continue;
+                                    }
+
+                                    if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
+                                        $next_comment_regexp_pos = $match_i;
+                                        if ($match_i === $i) {
                                             break;
                                         }
                                     }
                                 }
-                                if ($ESCAPE_CHAR_OPEN && $this->lexic_permissions['ESCAPE_CHAR']) {
+                            }
+                            //Have a look for regexp comments
+                            if ($i == $next_comment_regexp_pos) {
+                                $COMMENT_MATCHED = true;
+                                $comment = $comment_regexp_cache[$next_comment_regexp_pos];
+                                $test_str = GeSHi::hsc(substr($part, $i, $comment['length']));
+
+                                //@todo If remove important do remove here
+                                if ($this->lexic_permissions['COMMENTS']['MULTI']) {
                                     if (!$this->use_classes) {
-                                        $attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
+                                        $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
                                     }
                                     else {
-                                        $attributes = ' class="es0"';
+                                        $attributes = ' class="co' . $comment['key'] . '"';
                                     }
-                                    $char = "<span$attributes>" . $char;
-                                    if (substr($code, $i + 1, 1) == "\n") {
-                                        // escaping a newline, what's the point in putting the span around
-                                        // the newline? It only causes hassles when inserting line numbers
-                                        $char .= '</span>';
-                                        $ESCAPE_CHAR_OPEN = false;
+                                    $test_str = "<span$attributes>" . $test_str . "</span>";
+
+                                    // Short-cut through all the multiline code
+                                    if ($check_linenumbers) {
+                                        // strreplace to put close span and open span around multiline newlines
+                                        $test_str = str_replace(
+                                            "\n", "</span>\n<span$attributes>",
+                                            str_replace("\n ", "\n&nbsp;", $test_str)
+                                        );
                                     }
                                 }
+
+                                $i += $comment['length'] - 1;
+
+                                // parse the rest
+                                $result .= $this->parse_non_string_part($stuff_to_parse);
+                                $stuff_to_parse = '';
                             }
-                            else {
-                                $ESCAPE_CHAR_OPEN = false;
-                                if ($this->lexic_permissions['ESCAPE_CHAR']) {
-                                    $char .= '</span>';
-                                }
-                            }
-                        }
-                        else if ($ESCAPE_CHAR_OPEN) {
-                            if ($this->lexic_permissions['ESCAPE_CHAR']) {
-                                $char .= '</span>';
-                            }
-                            $ESCAPE_CHAR_OPEN = false;
-                            $test_str = $char;
-                        }
-                        else if ($STRING_OPEN == '') {
-                            // Is this a multiline comment?
-                            foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
-                                $com_len = strlen($open);
-                                $test_str = substr( $part, $i, $com_len );
-                                $test_str_match = $test_str;
-                                if (strtolower($open) == strtolower($test_str)) {
-                                    $COMMENT_MATCHED = true;
-                                    //@todo If remove important do remove here
-                                    if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
-                                        $test_str == GESHI_START_IMPORTANT) {
-                                        if ($test_str != GESHI_START_IMPORTANT) {
-                                            if (!$this->use_classes) {
-                                                $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
+                            // If we haven't matched a regexp comment, try multi-line comments
+                            if (!$COMMENT_MATCHED) {
+                                // Is this a multiline comment?
+                                foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
+                                    $com_len = strlen($open);
+                                    $test_str = substr( $part, $i, $com_len );
+                                    $test_str_match = $test_str;
+                                    if (strtolower($open) == strtolower($test_str)) {
+                                        $COMMENT_MATCHED = true;
+                                        //@todo If remove important do remove here
+                                        if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
+                                            $test_str == GESHI_START_IMPORTANT) {
+                                            if ($test_str != GESHI_START_IMPORTANT) {
+                                                if (!$this->use_classes) {
+                                                    $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
+                                                }
+                                                else {
+                                                    $attributes = ' class="coMULTI"';
+                                                }
+                                                $test_str = "<span$attributes>" . GeSHi::hsc($test_str);
                                             }
                                             else {
-                                                $attributes = ' class="coMULTI"';
+                                                if (!$this->use_classes) {
+                                                    $attributes = ' style="' . $this->important_styles . '"';
+                                                }
+                                                else {
+                                                    $attributes = ' class="imp"';
+                                                }
+                                                // We don't include the start of the comment if it's an
+                                                // "important" part
+                                                $test_str = "<span$attributes>";
                                             }
-                                            $test_str = "<span$attributes>" . GeSHi::hsc($test_str);
                                         }
                                         else {
-                                            if (!$this->use_classes) {
-                                                $attributes = ' style="' . $this->important_styles . '"';
-                                            }
-                                            else {
-                                                $attributes = ' class="imp"';
-                                            }
-                                            // We don't include the start of the comment if it's an
-                                            // "important" part
-                                            $test_str = "<span$attributes>";
+                                            $test_str = GeSHi::hsc($test_str);
                                         }
-                                    }
-                                    else {
-                                        $test_str = GeSHi::hsc($test_str);
-                                    }
 
-                                    $close_pos = strpos( $part, $close, $i + strlen($close) );
+                                        $close_pos = strpos( $part, $close, $i + strlen($open) );
 
-                                    $oops = false;
-                                    if ($close_pos === false) {
-                                        $close_pos = strlen($part);
-                                        $oops = true;
-                                    }
-                                    else {
-                                        $close_pos -= ($com_len - strlen($close));
-                                    }
+                                        if ($close_pos === false) {
+                                            $close_pos = $length;
+                                        }
 
-                                    // Short-cut through all the multiline code
-                                    $rest_of_comment = GeSHi::hsc(substr($part, $i + $com_len, $close_pos - $i));
-                                    if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
-                                        $test_str_match == GESHI_START_IMPORTANT) &&
-                                        ($this->line_numbers != GESHI_NO_LINE_NUMBERS ||
-                                        count($this->highlight_extra_lines) > 0)) {
-                                        // strreplace to put close span and open span around multiline newlines
-                                        $test_str .= str_replace(
-                                            "\n", "</span>\n<span$attributes>",
-                                            str_replace("\n ", "\n&nbsp;", $rest_of_comment)
-                                        );
-                                    }
-                                    else {
-                                        $test_str .= $rest_of_comment;
-                                    }
+                                        // Short-cut through all the multiline code
+                                        $rest_of_comment = GeSHi::hsc(substr($part, $i + strlen($open), $close_pos - $i - strlen($open) + strlen($close)));
+                                        if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
+                                            $test_str_match == GESHI_START_IMPORTANT) &&
+                                            $check_linenumbers) {
+                                            // strreplace to put close span and open span around multiline newlines
+                                            $test_str .= str_replace(
+                                                "\n", "</span>\n<span$attributes>",
+                                                str_replace("\n ", "\n&nbsp;", $rest_of_comment)
+                                            );
+                                        }
+                                        else {
+                                            $test_str .= $rest_of_comment;
+                                        }
 
-                                    if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
-                                        $test_str_match == GESHI_START_IMPORTANT) {
-                                        $test_str .= '</span>';
-                                        if ($oops) {
-                                            $test_str .= "\n";
+                                        if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
+                                            $test_str_match == GESHI_START_IMPORTANT) {
+                                            $test_str .= '</span>';
                                         }
+                                        $i = $close_pos + strlen($close) - 1;
+                                        // parse the rest
+                                        $result .= $this->parse_non_string_part($stuff_to_parse);
+                                        $stuff_to_parse = '';
+                                        break;
                                     }
-									$i = $close_pos + $com_len - 1;
-                                    // parse the rest
-                                    $result .= $this->parse_non_string_part($stuff_to_parse);
-                                    $stuff_to_parse = '';
-                                    break;
                                 }
                             }
                             // If we haven't matched a multiline comment, try single-line comments
@@ -1815,22 +1953,8 @@ class GeSHi {
                                     }
                                     //This check will find special variables like $# in bash or compiler directives of Delphi beginning {$
                                     if($match) {
-                                        $disallowed_before = "";
-                                        $disallowed_after = "";
-
-                                        if(isset($this->language_data['PARSER_CONTROL'])) {
-                                            if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
-                                                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
-                                                    $disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
-                                                }
-                                                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
-                                                    $disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
-                                                }
-                                            }
-                                        }
-
-                                        $match = $match && (!strlen($disallowed_before) || ((false === strpos($disallowed_before, substr($part, $i-1, 1))) && (0!=$i)));
-                                        $match = $match && (!strlen($disallowed_after) || ((false === strpos($disallowed_after, substr($part, $i+1, 1))) && (strlen($part)-1>$i)));
+                                        $match = $match && (empty($sc_disallowed_before) || ((false === strpos($sc_disallowed_before, $part[$i-1])) && (0 != $i)));
+                                        $match = $match && (empty($sc_disallowed_after) || ((false === strpos($sc_disallowed_after, $part[$i+1])) && ($length-1>$i)));
                                     }
                                     if ($match) {
                                         $COMMENT_MATCHED = true;
@@ -1849,7 +1973,7 @@ class GeSHi {
                                         $close_pos = strpos($part, "\n", $i);
                                         $oops = false;
                                         if ($close_pos === false) {
-                                            $close_pos = strlen($part);
+                                            $close_pos = $length;
                                             $oops = true;
                                         }
                                         $test_str .= GeSHi::hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
@@ -1869,28 +1993,9 @@ class GeSHi {
                                 }
                             }
                         }
-                        else if ($STRING_OPEN != '') {
-                            // Otherwise, convert it to HTML form
-                            if (strtolower($this->encoding) == 'utf-8') {
-                                //only escape <128 (we don't want to break multibyte chars)
-                                if (ord($char) < 128) {
-                                    $char = GeSHi::hsc($char);
-                                }
-                            }
-                            else {
-                                //encode everthing
-                                $char = GeSHi::hsc($char);
-                            }
-                        }
                         // Where are we adding this char?
                         if (!$COMMENT_MATCHED) {
-                            if (($STRING_OPEN == '') && !$CLOSE_STRING) {
-                                $stuff_to_parse .= $char;
-                            }
-                            else {
-                                $result .= $char;
-                                $CLOSE_STRING = false;
-                            }
+                            $stuff_to_parse .= $char;
                         }
                         else {
                             $result .= $test_str;
@@ -1920,17 +2025,16 @@ class GeSHi {
             }
         }
 
-        // Parse the last stuff (redundant?)
-        $result .= $this->parse_non_string_part($stuff_to_parse);
+        //This fix is related to SF#1923020, but has to be applied regardless of
+        //actually highlighting symbols.
+        $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
+
+//        // Parse the last stuff (redundant?)
+//        $result .= $this->parse_non_string_part($stuff_to_parse);
 
         // Lop off the very first and last spaces
         $result = substr($result, 1, -1);
 
-        // Are we still in a string?
-        if ($STRING_OPEN) {
-            $result .= '</span>';
-        }
-
         // We're finished: stop timing
         $this->set_time($start_time, microtime());
 
@@ -1946,24 +2050,27 @@ class GeSHi {
      * @since  1.0.0
      * @access private
      */
-    function indent($result) {
+    function indent(&$result) {
         /// Replace tabs with the correct number of spaces
         if (false !== strpos($result, "\t")) {
             $lines = explode("\n", $result);
-			$tab_width = $this->get_real_tab_width();
-            foreach ($lines as $key => $line) {
+            $result = null;//Save memory while we process the lines individually
+            $tab_width = $this->get_real_tab_width();
+            $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
+
+            for ($key = 0, $n = count($lines); $key < $n; $key++) {
+                $line = $lines[$key];
                 if (false === strpos($line, "\t")) {
-                    $lines[$key] = $line;
                     continue;
                 }
 
                 $pos = 0;
                 $length = strlen($line);
-                $result_line = '';
+                $lines[$key] = ''; // reduce memory
 
                 $IN_TAG = false;
-                for ($i = 0; $i < $length; $i++) {
-                    $char = substr($line, $i, 1);
+                for ($i = 0; $i < $length; ++$i) {
+                    $char = $line[$i];
                     // Simple engine to work out whether we're in a tag.
                     // If we are we modify $pos. This is so we ignore HTML
                     // in the line and only workout the tab replacement
@@ -1971,26 +2078,27 @@ class GeSHi {
                     // This test could be improved to include strings in the
                     // html so that < or > would be allowed in user's styles
                     // (e.g. quotes: '<' '>'; or similar)
-                    if ($IN_TAG && '>' == $char) {
-                        $IN_TAG = false;
-                        $result_line .= '>';
-                        ++$pos;
+                    if ($IN_TAG) {
+                        if('>' == $char) {
+                            $IN_TAG = false;
+                        }
+                        $lines[$key] .= $char;
                     }
-                    else if (!$IN_TAG && '<' == $char) {
+                    else if ('<' == $char) {
                         $IN_TAG = true;
-                        $result_line .= '<';
-                        ++$pos;
+                        $lines[$key] .= '<';
                     }
-                    else if (!$IN_TAG && '&' == $char) {
-                        $substr = substr($line, $i + 3, 4);
-                        //$substr_5 = substr($line, 5, 1);
+                    else if ('&' == $char) {
+                        $substr = substr($line, $i + 3, 5);
                         $posi = strpos($substr, ';');
-                        if (false !== $posi) {
-                            $pos += $posi + 3;
+                        if (false === $posi) {
+                            ++$pos;
+                        } else {
+                            $pos -= $posi+2;
                         }
-                        $result_line .= '&';
+                        $lines[$key] .= $char;
                     }
-                    else if (!$IN_TAG && "\t" == $char) {
+                    else if ("\t" == $char) {
                         $str = '';
                         // OPTIMISE - move $strs out. Make an array:
                         // $tabs = array(
@@ -1998,28 +2106,32 @@ class GeSHi {
                         //  2 => '&nbsp; ',
                         //  3 => '&nbsp; &nbsp;' etc etc
                         // to use instead of building a string every time
-                        $strs = array(0 => '&nbsp;', 1 => ' ');
-                        for ($k = 0; $k < ($tab_width - (($i - $pos) % $tab_width)); $k++) $str .= $strs[$k % 2];
-                        $result_line .= $str;
-                        $pos += ($i - $pos) % $tab_width + 1;
+                        $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
+                        if(($pos%2) || 1 == $tab_end_width) {
+                            $str .= substr($tab_string, 6, $tab_end_width);
+                        } else {
+                            $str .= substr($tab_string, 0, $tab_end_width+5);
+                        }
+                        $lines[$key] .= $str;
+                        $pos += $tab_end_width;
 
                         if (false === strpos($line, "\t", $i + 1)) {
-                            $result_line .= substr($line, $i + 1);
+                            $lines[$key] .= substr($line, $i + 1);
                             break;
                         }
                     }
-                    else if ($IN_TAG) {
+                    else if (0 == $pos && ' ' == $char) {
+                        $lines[$key] .= '&nbsp;';
                         ++$pos;
-                        $result_line .= $char;
                     }
                     else {
-                        $result_line .= $char;
-                        //++$pos;
+                        $lines[$key] .= $char;
+                        ++$pos;
                     }
                 }
-                $lines[$key] = $result_line;
             }
             $result = implode("\n", $lines);
+            unset($lines);//We don't need the lines separated beyond this --- free them!
         }
         // Other whitespace
         // BenBE: Fix to reduce the number of replacements to be done
@@ -2027,12 +2139,12 @@ class GeSHi {
         $result = str_replace('  ', ' &nbsp;', $result);
 
         if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
-			if ($this->line_ending === null) {
-				$result = nl2br($result);
-			} else {
-				$result = str_replace("\n", $this->line_ending, $result);
-			}
-		}
+            if ($this->line_ending === null) {
+                $result = nl2br($result);
+            } else {
+                $result = str_replace("\n", $this->line_ending, $result);
+            }
+        }
         return $result;
     }
 
@@ -2045,13 +2157,14 @@ class GeSHi {
      * @access private
      */
     function change_case($instr) {
-        if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_UPPER) {
-            return strtoupper($instr);
-        }
-        else if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_LOWER) {
-            return strtolower($instr);
+        switch ($this->language_data['CASE_KEYWORDS']) {
+            case GESHI_CAPS_UPPER:
+                return strtoupper($instr);
+            case GESHI_CAPS_LOWER:
+                return strtolower($instr);
+            default:
+                return $instr;
         }
-        return $instr;
     }
 
     /**
@@ -2081,13 +2194,14 @@ class GeSHi {
                     // Old system: strtolower
                     //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
                     // New system: get keyword from language file to get correct case
+                    $lower_keyword = strtolower($keyword);
                     foreach ($this->language_data['KEYWORDS'][$group] as $word) {
-                        if (strtolower($word) == strtolower($keyword)) {
+                        if (strtolower($word) == $lower_keyword) {
                             break;
                         }
                     }
                     $word = ( substr($word, 0, 4) == '&lt;' ) ? substr($word, 4) : $word;
-                    $word = ( substr($word, -4) == '&gt;' ) ? substr($word, 0, strlen($word) - 4) : $word;
+                    $word = ( substr($word, -4) == '&gt;' ) ? substr($word, 0, - 4) : $word;
                     if (!$word) return '';
 
                     return '<|UR1|"' .
@@ -2158,8 +2272,27 @@ class GeSHi {
         // Highlight keywords
         // if there is a couple of alpha symbols there *might* be a keyword
         if (preg_match('#[a-zA-Z]{2,}#', $stuff_to_parse)) {
+            $disallowed_before = "a-zA-Z0-9\$_\|\#;>|^";
+            $disallowed_after = "a-zA-Z0-9_\|%\\-&";
+            if(isset($this->language_data['PARSER_CONTROL'])) {
+                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
+                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
+                        $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
+                    }
+                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
+                        $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
+                    }
+                }
+            }
+
             foreach ($this->language_data['KEYWORDS'] as $k => $keywordset) {
-                if ($this->lexic_permissions['KEYWORDS'][$k]) {
+                if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
+                    $this->lexic_permissions['KEYWORDS'][$k]) {
+
+                    $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
+                    $modifiers = $case_sensitive ? 'e' : 'ie';
+                    $styles = "/$k/";
+
                     foreach ($keywordset as $keyword) {
                         $keyword = preg_quote($keyword, '/');
                         //
@@ -2168,33 +2301,24 @@ class GeSHi {
                         // in just yet - otherwise languages with the keywords "color" or "or" have
                         // a fit.
                         //
-                        if (false !== stristr($stuff_to_parse_pregquote, $keyword )) {
-                            $stuff_to_parse .= ' ';
+                        if ($case_sensitive) {
+                            $keyword_found = strpos($stuff_to_parse_pregquote, $keyword) !== false;
+                        } else {
+                            $keyword_found = stristr($stuff_to_parse_pregquote, $keyword) !== false;
+                        }
+                        if ($keyword_found) {
                             // Might make a more unique string for putting the number in soon
                             // Basically, we don't put the styles in yet because then the styles themselves will
                             // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
-                            $styles = "/$k/";
-                            $modifiers = ($this->language_data['CASE_SENSITIVE'][$k]) ? "e" : "ie";
-
-                            $disallowed_before = "a-zA-Z0-9\$_\|\#;>|^";
-                            $disallowed_after = "a-zA-Z0-9_<\|%\\-&";
-                            if(isset($this->language_data['PARSER_CONTROL'])) {
-                                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
-                                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
-                                        $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
-                                    }
-                                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
-                                        $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
-                                    }
-                                }
-                            }
+
+                            $stuff_to_parse .= ' ';
                             $stuff_to_parse = preg_replace(
-                                "/([^$disallowed_before])($keyword)(?=[^$disallowed_after])/$modifiers",
+                                "/([^$disallowed_before])($keyword)(?!\<DOT\>(?:htm|php))(?=[^$disallowed_after])/$modifiers",
                                 "'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
                                 $stuff_to_parse
                             );
 
-                            $stuff_to_parse = substr($stuff_to_parse, 0, strlen($stuff_to_parse) - 1);
+                            $stuff_to_parse = substr($stuff_to_parse, 0, -1);
                         }
                     }
                 }
@@ -2206,7 +2330,9 @@ class GeSHi {
         //
         foreach ($this->language_data['KEYWORDS'] as $k => $kws) {
             if (!$this->use_classes) {
-                $attributes = ' style="' . $this->language_data['STYLES']['KEYWORDS'][$k] . '"';
+                $attributes = ' style="' .
+                    (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
+                    $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
             }
             else {
                 $attributes = ' class="kw' . $k . '"';
@@ -2248,7 +2374,7 @@ class GeSHi {
         //
         if ($this->lexic_permissions['BRACKETS']) {
             $code_entities_match = array('[', ']', '(', ')', '{', '}');
-            if (!$this->use_classes) {
+            if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
                 $code_entities_replace = array(
                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
@@ -2272,7 +2398,7 @@ class GeSHi {
         }
 
         //FIX for symbol highlighting ...
-        if($this->lexic_permissions['SYMBOLS']) {
+        if($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
             //As this is a costy operation, we avoid doing it for multiple groups ...
             //Instead we perform it for all symbols at once.
             //
@@ -2439,6 +2565,11 @@ class GeSHi {
      * @todo Needs to load keys for lexic permissions for keywords, regexps etc
      */
     function load_language($file_name) {
+        if ($file_name == $this->loaded_language) {
+            // this file is already loaded!
+            return;
+        }
+        $this->loaded_language = $file_name;
         $this->enable_highlighting();
         $language_data = array();
         require $file_name;
@@ -2473,12 +2604,12 @@ class GeSHi {
      * @since  1.0.0
      * @access private
      */
-    function finalise($parsed_code) {
+    function finalise(&$parsed_code) {
         // Remove end parts of important declarations
         // This is BUGGY!! My fault for bad code: fix coming in 1.2
         // @todo Remove this crap
         if ($this->enable_important_blocks &&
-            (strstr($parsed_code, GeSHi::hsc(GESHI_START_IMPORTANT)) === false)) {
+            (strpos($parsed_code, GeSHi::hsc(GESHI_START_IMPORTANT)) === false)) {
             $parsed_code = str_replace(GeSHi::hsc(GESHI_END_IMPORTANT), '', $parsed_code);
         }
 
@@ -2497,28 +2628,31 @@ class GeSHi {
             $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
         }
 
+        // Get code into lines
+        $code = explode("\n", $parsed_code);
+        $parsed_code = '';
+
         // If we're using line numbers, we insert <li>s and appropriate
         // markup to style them (otherwise we don't need to do anything)
         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
             // If we're using the <pre> header, we shouldn't add newlines because
             // the <pre> will line-break them (and the <li>s already do this for us)
             $ls = ($this->header_type != GESHI_HEADER_PRE) ? "\n" : '';
-            // Get code into lines
-            $code = explode("\n", $parsed_code);
+
             // Set vars to defaults for following loop
-            $parsed_code = '';
             $i = 0;
 
             // Foreach line...
-            foreach ($code as $line) {
+            for ($i = 0, $n = count($code); $i < $n;) {
                 //Reset the attributes for a new line ...
                 $attrs = array();
 
                 // Make lines have at least one space in them if they're empty
                 // BenBE: Checking emptiness using trim instead of relying on blanks
-                if ('' == trim($line)) {
-                    $line = '&nbsp;';
+                if ('' == trim($code[$i])) {
+                    $code[$i] = '&nbsp;';
                 }
+
                 // If this is a "special line"...
                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
@@ -2556,13 +2690,15 @@ class GeSHi {
                 }
 
                 ++$i;
+
                 // Are we supposed to use ids? If so, add them
                 if ($this->add_ids) {
                     $attrs['id'][] = "$this->overall_id-$i";
                 }
+
                 if (in_array($i, $this->highlight_extra_lines)) {
                     if ($this->use_classes) {
-                        if(array_key_exists($i, $this->highlight_extra_lines_styles)) {
+                        if (isset($this->highlight_extra_lines_styles[$i])) {
                             $attrs['class'][] = "lx$i";
                         } else {
                             $attrs['class'][] = "ln-xtra";
@@ -2577,24 +2713,22 @@ class GeSHi {
                 foreach ($attrs as $key => $attr) {
                     $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
                 }
-                $parsed_code .= "<li$attr_string>$start$line$end</li>$ls";
+
+                $parsed_code .= "<li$attr_string>$start{$code[$i - 1]}$end</li>$ls";
             }
         }
         else {
             // No line numbers, but still need to handle highlighting lines extra.
             // Have to use divs so the full width of the code is highlighted
-            $code = explode("\n", $parsed_code);
-            $parsed_code = '';
-            $i = 0;
-            foreach ($code as $line) {
+            for ($i = 0, $n = count($code); $i < $n; ++$i) {
                 // Make lines have at least one space in them if they're empty
                 // BenBE: Checking emptiness using trim instead of relying on blanks
-                if ('' == trim($line)) {
-                    $line = '&nbsp;';
+                if ('' == trim($code[$i])) {
+                    $code[$i] = '&nbsp;';
                 }
-                if (in_array(++$i, $this->highlight_extra_lines)) {
+                if (in_array($i + 1, $this->highlight_extra_lines)) {
                     if ($this->use_classes) {
-                        if (array_key_exists($i, $this->highlight_extra_lines_styles)) {
+                        if (isset($this->highlight_extra_lines_styles[$i])) {
                             $parsed_code .= "<div class=\"lx$i\">";
                         } else {
                             $parsed_code .= "<div class=\"ln-xtra\">";
@@ -2603,17 +2737,13 @@ class GeSHi {
                         $parsed_code .= "<div style=\"" . $this->get_line_style($i) . "\">";
                     }
                     // Remove \n because it stuffs up <pre> header
-                    $parsed_code .= $line . "</div>";
+                    $parsed_code .= $code[$i] . "</div>";
                 } else {
-                    $parsed_code .= $line . "\n";
+                    $parsed_code .= $code[$i] . "\n";
                 }
             }
         }
-
-        if ($this->header_type == GESHI_HEADER_PRE) {
-            // enforce line numbers when using pre
-            $parsed_code = str_replace('<li></li>', '<li>&nbsp;</li>', $parsed_code);
-        }
+        unset($code);
 
         return $this->header() . chop($parsed_code) . $this->footer();
     }
@@ -2858,7 +2988,16 @@ class GeSHi {
             '&' => '&amp;',
             '"' => '&quot;',
             '<' => '&lt;',
-            '>' => '&gt;'
+            '>' => '&gt;',
+
+            //This fix is related to SF#1923020, but has to be applied
+            //regardless of actually highlighting symbols.
+
+            //Circumvent a bug with symbol highlighting
+            //This is required as ; would produce undesirable side-effects if it
+            //was not to be processed as an entity.
+            ';' => '<SEMI>', // Force ; to be processed as entity
+            '|' => '<PIPE>' // Force | to be processed as entity
             );                      // ENT_COMPAT set
 
         if (ENT_NOQUOTES == $quote_style)       // don't convert double quotes
@@ -3056,15 +3195,15 @@ class GeSHi {
      * @since 1.0.7.21
      */
     function get_line_style($line) {
-    	//$style = null;
-    	$style = null;
-    	if (array_key_exists($line, $this->highlight_extra_lines_styles)) {
-    		$style = $this->highlight_extra_lines_styles[$line];
-    	} else { // if no "extra" style assigned
-	    	$style = $this->highlight_extra_lines_style;
-    	}
-
-    	return $style;
+        //$style = null;
+        $style = null;
+        if (isset($this->highlight_extra_lines_styles[$line])) {
+            $style = $this->highlight_extra_lines_styles[$line];
+        } else { // if no "extra" style assigned
+            $style = $this->highlight_extra_lines_style;
+        }
+
+        return $style;
     }
 } // End Class GeSHi
author	Andreas Gohr <andi@splitbrain.org>	2008-06-20 18:06:28 +0200
committer	Andreas Gohr <andi@splitbrain.org>	2008-06-20 18:06:28 +0200
commit	bb62d5fbe1aa438a9e655573ea9d8c8d3183006b (patch)
tree	050838041b33d8e8c5a8d4d9c93472ce099b2ed0 /inc/geshi.php
parent	715bdf1fe13fba0611cf88311bbc3d7945b4a544 (diff)
download	rpg-bb62d5fbe1aa438a9e655573ea9d8c8d3183006b.tar.gz rpg-bb62d5fbe1aa438a9e655573ea9d8c8d3183006b.tar.bz2