1 files changed, 373 insertions, 138 deletions
diff --git a/inc/geshi.php b/inc/geshi.php
index a05e1685f..8cf1f9a8d 100644
--- a/inc/geshi.php
+++ b/inc/geshi.php
@@ -41,7 +41,7 @@
 //
 
 /** The version of this GeSHi file */
-define('GESHI_VERSION', '1.0.8');
+define('GESHI_VERSION', '1.0.8.3');
 
 // Define the root directory for the GeSHi code tree
 if (!defined('GESHI_ROOT')) {
@@ -52,6 +52,11 @@ if (!defined('GESHI_ROOT')) {
     @access private */
 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
 
+// Define if GeSHi should be paranoid about security
+if (!defined('GESHI_SECURITY_PARANOID')) {
+    /** Tells GeSHi to be paranoid about security settings */
+    define('GESHI_SECURITY_PARANOID', false);
+}
 
 // Line numbers - use with enable_line_numbers()
 /** Use no line numbers when building the result */
@@ -180,9 +185,14 @@ if (!function_exists('stripos')) {
 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
     regular expressions. Set this to false if your PCRE lib is up to date
     @see GeSHi->optimize_regexp_list()
-    TODO: are really the subpatterns the culprit or the overall length of the pattern?
     **/
 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
+/** it's also important not to generate too long regular expressions
+    be generous here... but keep in mind, that when reaching this limit we
+    still have to close open patterns. 12k should do just fine on a 16k limit.
+    @see GeSHi->optimize_regexp_list()
+    **/
+define('GESHI_MAX_PCRE_LENGTH', 12288);
 
 //Number format specification
 /** Basic number format for integers */
@@ -435,7 +445,7 @@ class GeSHi {
      *  The style for the actual code
      * @var string
      */
-    var $code_style = 'font-family: monospace; font-weight: normal; font-style: normal; margin:0; padding:0; background:inherit;';
+    var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
 
     /**
      * The overall class for this code block
@@ -453,19 +463,19 @@ class GeSHi {
      * Line number styles
      * @var string
      */
-    var $line_style1 = 'font-weight: normal;';
+    var $line_style1 = 'font-weight: normal; vertical-align:top;';
 
     /**
      * Line number styles for fancy lines
      * @var string
      */
-    var $line_style2 = 'font-weight: bold;';
+    var $line_style2 = 'font-weight: bold; vertical-align:top;';
 
     /**
      * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
      * @var string
      */
-    var $table_linenumber_style = 'width:1px;font-weight: normal;text-align:right;margin:0;padding:0 2px;';
+    var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
 
     /**
      * Flag for how line numbers are displayed
@@ -691,9 +701,31 @@ class GeSHi {
      *             so this method will disappear in 1.2.0.
      */
     function set_language_path($path) {
+        if(strpos($path,':')) {
+            //Security Fix to prevent external directories using fopen wrappers.
+            if(DIRECTORY_SEPARATOR == "\\") {
+                if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
+                    return;
+                }
+            } else {
+                return;
+            }
+        }
+        if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
+            //Security Fix to prevent external directories using fopen wrappers.
+            return;
+        }
+        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
+            //Security Fix to prevent external directories using fopen wrappers.
+            return;
+        }
+        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
+            //Security Fix to prevent external directories using fopen wrappers.
+            return;
+        }
         if ($path) {
             $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
-            $this->set_language($this->language);        // otherwise set_language_path has no effect
+            $this->set_language($this->language); // otherwise set_language_path has no effect
         }
     }
 
@@ -951,11 +983,11 @@ class GeSHi {
      *                to overwrite them
      * @since 1.0.0
      */
-    function set_escape_characters_style($style, $preserve_defaults = false) {
+    function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
         if (!$preserve_defaults) {
-            $this->language_data['STYLES']['ESCAPE_CHAR'][0] = $style;
+            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
         } else {
-            $this->language_data['STYLES']['ESCAPE_CHAR'][0] .= $style;
+            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
         }
     }
 
@@ -1300,44 +1332,68 @@ class GeSHi {
                 'actionscript' => array('as'),
                 'ada' => array('a', 'ada', 'adb', 'ads'),
                 'apache' => array('conf'),
-                'asm' => array('ash', 'asm'),
+                'asm' => array('ash', 'asm', 'inc'),
                 'asp' => array('asp'),
                 'bash' => array('sh'),
+                'bf' => array('bf'),
                 'c' => array('c', 'h'),
                 'c_mac' => array('c', 'h'),
                 'caddcl' => array(),
                 'cadlisp' => array(),
                 'cdfg' => array('cdfg'),
                 'cobol' => array('cbl'),
-                'cpp' => array('cpp', 'h', 'hpp'),
-                'csharp' => array(),
+                'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
+                'csharp' => array('cs'),
                 'css' => array('css'),
+                'd' => array('d'),
                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
+                'diff' => array('diff', 'patch'),
                 'dos' => array('bat', 'cmd'),
                 'gettext' => array('po', 'pot'),
+                'gml' => array('gml'),
+                'gnuplot' => array('plt'),
+                'groovy' => array('groovy'),
+                'haskell' => array('hs'),
                 'html4strict' => array('html', 'htm'),
+                'ini' => array('ini', 'desktop'),
                 'java' => array('java'),
                 'javascript' => array('js'),
                 'klonec' => array('kl1'),
                 'klonecpp' => array('klx'),
+                'latex' => array('tex'),
                 'lisp' => array('lisp'),
                 'lua' => array('lua'),
+                'matlab' => array('m'),
                 'mpasm' => array(),
+                'mysql' => array('sql'),
                 'nsis' => array(),
                 'objc' => array(),
                 'oobas' => array(),
                 'oracle8' => array(),
-                'pascal' => array(),
+                'oracle10' => array(),
+                'pascal' => array('pas'),
                 'perl' => array('pl', 'pm'),
                 'php' => array('php', 'php5', 'phtml', 'phps'),
+                'povray' => array('pov'),
+                'providex' => array('pvc', 'pvx'),
+                'prolog' => array('pl'),
                 'python' => array('py'),
                 'qbasic' => array('bi'),
+                'reg' => array('reg'),
+                'ruby' => array('rb'),
                 'sas' => array('sas'),
+                'scala' => array('scala'),
+                'scheme' => array('scm'),
+                'scilab' => array('sci'),
+                'smalltalk' => array('st'),
                 'smarty' => array(),
+                'tcl' => array('tcl'),
                 'vb' => array('bas'),
                 'vbnet' => array(),
                 'visualfoxpro' => array(),
-                'xml' => array('xml')
+                'whitespace' => array('ws'),
+                'xml' => array('xml', 'svg'),
+                'z80' => array('z80', 'asm', 'inc')
             );
         }
 
@@ -1473,6 +1529,25 @@ class GeSHi {
     function optimize_keyword_group($key) {
         $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
             $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
+        $space_as_whitespace = false;
+        if(isset($this->language_data['PARSER_CONTROL'])) {
+            if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
+                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
+                    $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
+                }
+                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
+                    if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
+                        $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
+                    }
+                }
+            }
+        }
+        if($space_as_whitespace) {
+            foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
+                $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
+                    str_replace(" ", "\\s+", $rxv);
+            }
+        }
     }
 
     /**
@@ -1561,7 +1636,7 @@ class GeSHi {
         if (!$target) {
             $this->link_target = '';
         } else {
-            $this->link_target = ' target="' . $target . '" ';
+            $this->link_target = ' target="' . $target . '"';
         }
     }
 
@@ -1812,12 +1887,14 @@ class GeSHi {
             //
             //Now we need to rewrite our array to get a search string that
             $symbol_preg = array();
-            if (!empty($symbol_preg_single)) {
-                $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
-            }
             if (!empty($symbol_preg_multi)) {
+                rsort($symbol_preg_multi);
                 $symbol_preg[] = implode('|', $symbol_preg_multi);
             }
+            if (!empty($symbol_preg_single)) {
+                rsort($symbol_preg_single);
+                $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
+            }
             $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
         }
 
@@ -1868,9 +1945,9 @@ class GeSHi {
             //All this formats are matched case-insensitively!
             static $numbers_format = array(
                 GESHI_NUMBER_INT_BASIC =>
-                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])[1-9]\d*?(?![0-9a-z\.])',
+                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z\.])',
                 GESHI_NUMBER_INT_CSTYLE =>
-                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])[1-9]\d*?l(?![0-9a-z\.])',
+                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\.])',
                 GESHI_NUMBER_BIN_SUFFIX =>
                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])',
                 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
@@ -2233,7 +2310,7 @@ class GeSHi {
                 // parse_non_string_part).
 
                 // cache comment regexps incrementally
-                $comment_regexp_cache = array();
+                $next_comment_regexp_key = '';
                 $next_comment_regexp_pos = -1;
                 $next_comment_multi_pos = -1;
                 $next_comment_single_pos = -1;
@@ -2242,6 +2319,9 @@ class GeSHi {
                 $comment_single_cache_per_key = array();
                 $next_open_comment_multi = '';
                 $next_comment_single_key = '';
+                $escape_regexp_cache_per_key = array();
+                $next_escape_regexp_key = '';
+                $next_escape_regexp_pos = -1;
 
                 $length = strlen($part);
                 for ($i = 0; $i < $length; ++$i) {
@@ -2249,6 +2329,50 @@ class GeSHi {
                     $char = $part[$i];
                     $char_len = 1;
 
+                    // update regexp comment cache if needed
+                    if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
+                        $next_comment_regexp_pos = $length;
+                        foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
+                            $match_i = false;
+                            if (isset($comment_regexp_cache_per_key[$comment_key]) &&
+                                ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
+                                 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
+                                // we have already matched something
+                                if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
+                                    // this comment is never matched
+                                    continue;
+                                }
+                                $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
+                            } else if (
+                                //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
+                                (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
+                                (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
+                                ) {
+                                $match_i = $match[0][1];
+                                if (GESHI_PHP_PRE_433) {
+                                    $match_i += $i;
+                                }
+
+                                $comment_regexp_cache_per_key[$comment_key] = array(
+                                    'key' => $comment_key,
+                                    'length' => strlen($match[0][0]),
+                                    'pos' => $match_i
+                                );
+                            } else {
+                                $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
+                                continue;
+                            }
+
+                            if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
+                                $next_comment_regexp_pos = $match_i;
+                                $next_comment_regexp_key = $comment_key;
+                                if ($match_i === $i) {
+                                    break;
+                                }
+                            }
+                        }
+                    }
+
                     $string_started = false;
 
                     if (isset($is_string_starter[$char])) {
@@ -2278,7 +2402,7 @@ class GeSHi {
                         $char_len = strlen($char);
                     }
 
-                    if ($string_started) {
+                    if ($string_started && $i != $next_comment_regexp_pos) {
                         // Hand out the correct style information for this string
                         $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
                         if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
@@ -2286,70 +2410,123 @@ class GeSHi {
                             $string_key = 0;
                         }
 
+                        // parse the stuff before this
+                        $result .= $this->parse_non_string_part($stuff_to_parse);
+                        $stuff_to_parse = '';
+
                         if (!$this->use_classes) {
                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
-                            $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$string_key] . '"';
                         } else {
                             $string_attributes = ' class="st'.$string_key.'"';
-                            $escape_char_attributes = ' class="es'.$string_key.'"';
                         }
 
-                        // parse the stuff before this
-                        $result .= $this->parse_non_string_part($stuff_to_parse);
-                        $stuff_to_parse = '';
-
                         // now handle the string
-                        $string = '';
+                        $string = "<span$string_attributes>" . GeSHi::hsc($char);
+                        $start = $i + $char_len;
+                        $string_open = true;
 
-                        // look for closing quote
-                        $start = $i;
-                        while ($close_pos = strpos($part, $char, $start + $char_len)) {
-                            $start = $close_pos;
-                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
-                                // check wether this quote is escaped or if it is something like '\\'
-                                $escape_char_pos = $close_pos - 1;
-                                while ($escape_char_pos > 0 &&
-                                        $part[$escape_char_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
-                                    --$escape_char_pos;
+                        if(empty($this->language_data['ESCAPE_REGEXP'])) {
+                            $next_escape_regexp_pos = $length;
+                        }
+
+                        do {
+                            //Get the regular ending pos ...
+                            $close_pos = strpos($part, $char, $start);
+                            if(false === $close_pos) {
+                                $close_pos = $length;
+                            }
+
+                            if($this->lexic_permissions['ESCAPE_CHAR']) {
+                                // update escape regexp cache if needed
+                                if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
+                                    $next_escape_regexp_pos = $length;
+                                    foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
+                                        $match_i = false;
+                                        if (isset($escape_regexp_cache_per_key[$escape_key]) &&
+                                            ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
+                                             $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
+                                            // we have already matched something
+                                            if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
+                                                // this comment is never matched
+                                                continue;
+                                            }
+                                            $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
+                                        } else if (
+                                            //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
+                                            (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
+                                            (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
+                                            ) {
+                                            $match_i = $match[0][1];
+                                            if (GESHI_PHP_PRE_433) {
+                                                $match_i += $start;
+                                            }
+
+                                            $escape_regexp_cache_per_key[$escape_key] = array(
+                                                'key' => $escape_key,
+                                                'length' => strlen($match[0][0]),
+                                                'pos' => $match_i
+                                            );
+                                        } else {
+                                            $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
+                                            continue;
+                                        }
+
+                                        if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
+                                            $next_escape_regexp_pos = $match_i;
+                                            $next_escape_regexp_key = $escape_key;
+                                            if ($match_i === $start) {
+                                                break;
+                                            }
+                                        }
+                                    }
                                 }
-                                if (($close_pos - $escape_char_pos) & 1) {
-                                    // uneven number of escape chars => this quote is escaped
-                                    continue;
+
+                                //Find the next simple escape position
+                                if('' != $this->language_data['ESCAPE_CHAR']) {
+                                    $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
+                                    if(false === $simple_escape) {
+                                        $simple_escape = $length;
+                                    }
+                                } else {
+                                    $simple_escape = $length;
                                 }
+                            } else {
+                                $next_escape_regexp_pos = $length;
+                                $simple_escape = $length;
                             }
 
-                            // found closing quote
-                            break;
-                        }
+                            if($simple_escape < $next_escape_regexp_pos &&
+                                $simple_escape < $length &&
+                                $simple_escape < $close_pos) {
+                                //The nexxt escape sequence is a simple one ...
+                                $es_pos = $simple_escape;
 
-                        //Found the closing delimiter?
-                        if (!$close_pos) {
-                            // span till the end of this $part when no closing delimiter is found
-                            $close_pos = $length;
-                        }
+                                //Add the stuff not in the string yet ...
+                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
 
-                        //Get the actual string
-                        $string = substr($part, $i, $close_pos - $i + $char_len);
-                        $i = $close_pos + $char_len - 1;
+                                //Get the style for this escaped char ...
+                                if (!$this->use_classes) {
+                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
+                                } else {
+                                    $escape_char_attributes = ' class="es0"';
+                                }
 
-                        // handle escape chars and encode html chars
-                        // (special because when we have escape chars within our string they may not be escaped)
-                        if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
-                            $start = 0;
-                            $new_string = '';
-                            while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
-                                $new_string .= $this->hsc(substr($string, $start, $es_pos - $start))
-                                              . "<span$escape_char_attributes>" . $escaped_escape_char;
-                                $es_char = $string[$es_pos + 1];
+                                //Add the style for the escape char ...
+                                $string .= "<span$escape_char_attributes>" .
+                                    GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
+
+                                //Get the byte AFTER the ESCAPE_CHAR we just found
+                                $es_char = $part[$es_pos + 1];
                                 if ($es_char == "\n") {
                                     // don't put a newline around newlines
-                                    $new_string .= "</span>\n";
+                                    $string .= "</span>\n";
+                                    $start = $es_pos + 2;
                                 } else if (ord($es_char) >= 128) {
                                     //This is an non-ASCII char (UTF8 or single byte)
                                     //This code tries to work around SF#2037598 ...
                                     if(function_exists('mb_substr')) {
-                                        $es_char_m = mb_substr(substr($string, $es_pos+1, 16), 0, 1, $this->encoding);
-                                        $new_string .= $es_char_m . '</span>';
+                                        $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
+                                        $string .= $es_char_m . '</span>';
                                     } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
                                         if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
                                             "|\xE0[\xA0-\xBF][\x80-\xBF]".
@@ -2358,26 +2535,50 @@ class GeSHi {
                                             "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
                                             "|[\xF1-\xF3][\x80-\xBF]{3}".
                                             "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
-                                            $string, $es_char_m, null, $es_pos + 1)) {
+                                            $part, $es_char_m, null, $es_pos + 1)) {
                                             $es_char_m = $es_char_m[0];
                                         } else {
                                             $es_char_m = $es_char;
                                         }
-                                        $new_string .= $this->hsc($es_char_m) . '</span>';
+                                        $string .= $this->hsc($es_char_m) . '</span>';
                                     } else {
                                         $es_char_m = $this->hsc($es_char);
                                     }
-                                    $es_pos += strlen($es_char_m) - 1;
+                                    $start = $es_pos + strlen($es_char_m) + 1;
+                                } else {
+                                    $string .= $this->hsc($es_char) . '</span>';
+                                    $start = $es_pos + 2;
+                                }
+                            } else if ($next_escape_regexp_pos < $length &&
+                                $next_escape_regexp_pos < $close_pos) {
+                                $es_pos = $next_escape_regexp_pos;
+                                //Add the stuff not in the string yet ...
+                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
+
+                                //Get the key and length of this match ...
+                                $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
+                                $escape_str = substr($part, $es_pos, $escape['length']);
+                                $escape_key = $escape['key'];
+
+                                //Get the style for this escaped char ...
+                                if (!$this->use_classes) {
+                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
                                 } else {
-                                    $new_string .= $this->hsc($es_char) . '</span>';
+                                    $escape_char_attributes = ' class="es' . $escape_key . '"';
                                 }
-                                $start = $es_pos + 2;
+
+                                //Add the style for the escape char ...
+                                $string .= "<span$escape_char_attributes>" .
+                                    $this->hsc($escape_str) . '</span>';
+
+                                $start = $es_pos + $escape['length'];
+                            } else {
+                                //Copy the remainder of the string ...
+                                $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
+                                $start = $close_pos + $char_len;
+                                $string_open = false;
                             }
-                            $string = $new_string . $this->hsc(substr($string, $start));
-                            $new_string = '';
-                        } else {
-                            $string = $this->hsc($string);
-                        }
+                        } while($string_open);
 
                         if ($check_linenumbers) {
                             // Are line numbers used? If, we should end the string before
@@ -2388,15 +2589,16 @@ class GeSHi {
                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
                         }
 
-                        $result .= "<span$string_attributes>" . $string . '</span>';
+                        $result .= $string;
                         $string = '';
+                        $i = $start - 1;
                         continue;
                     } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
                         substr($part, $i, $hq_strlen) == $hq) {
                         // The start of a hard quoted string
                         if (!$this->use_classes) {
-                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARDQUOTE'] . '"';
-                            $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARDESCAPE'] . '"';
+                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
+                            $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
                         } else {
                             $string_attributes = ' class="st_h"';
                             $escape_char_attributes = ' class="es_h"';
@@ -2412,14 +2614,14 @@ class GeSHi {
                         $start = $i + $hq_strlen;
                         while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
                             $start = $close_pos + 1;
-                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
+                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR']) {
                                 // make sure this quote is not escaped
                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
                                     if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
                                         // check wether this quote is escaped or if it is something like '\\'
                                         $escape_char_pos = $close_pos - 1;
                                         while ($escape_char_pos > 0
-                                                && $part[$escape_char_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
+                                                && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
                                             --$escape_char_pos;
                                         }
                                         if (($close_pos - $escape_char_pos) & 1) {
@@ -2499,48 +2701,10 @@ class GeSHi {
                         $string = '';
                         continue;
                     } else {
-                        // update regexp comment cache if needed
-                        if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
-                            $next_comment_regexp_pos = $length;
-                            foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
-                                $match_i = false;
-                                if (isset($comment_regexp_cache_per_key[$comment_key]) &&
-                                    $comment_regexp_cache_per_key[$comment_key] >= $i) {
-                                    // we have already matched something
-                                    $match_i = $comment_regexp_cache_per_key[$comment_key];
-                                } else if (
-                                    //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
-                                    (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
-                                    (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
-                                    ) {
-                                    $match_i = $match[0][1];
-                                    if (GESHI_PHP_PRE_433) {
-                                        $match_i += $i;
-                                    }
-
-                                    $comment_regexp_cache[$match_i] = array(
-                                        'key' => $comment_key,
-                                        'length' => strlen($match[0][0]),
-                                    );
-
-                                    $comment_regexp_cache_per_key[$comment_key] = $match_i;
-                                } else {
-                                    $comment_regexp_cache_per_key[$comment_key] = false;
-                                    continue;
-                                }
-
-                                if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
-                                    $next_comment_regexp_pos = $match_i;
-                                    if ($match_i === $i) {
-                                        break;
-                                    }
-                                }
-                            }
-                        }
                         //Have a look for regexp comments
                         if ($i == $next_comment_regexp_pos) {
                             $COMMENT_MATCHED = true;
-                            $comment = $comment_regexp_cache[$next_comment_regexp_pos];
+                            $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
                             $test_str = $this->hsc(substr($part, $i, $comment['length']));
 
                             //@todo If remove important do remove here
@@ -2578,8 +2742,13 @@ class GeSHi {
                                 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
                                     $match_i = false;
                                     if (isset($comment_multi_cache_per_key[$open]) &&
-                                        $comment_multi_cache_per_key[$open] >= $i) {
+                                        ($comment_multi_cache_per_key[$open] >= $i ||
+                                         $comment_multi_cache_per_key[$open] === false)) {
                                         // we have already matched something
+                                        if ($comment_multi_cache_per_key[$open] === false) {
+                                            // this comment is never matched
+                                            continue;
+                                        }
                                         $match_i = $comment_multi_cache_per_key[$open];
                                     } else if (($match_i = stripos($part, $open, $i)) !== false) {
                                         $comment_multi_cache_per_key[$open] = $match_i;
@@ -2670,8 +2839,13 @@ class GeSHi {
                                 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
                                     $match_i = false;
                                     if (isset($comment_single_cache_per_key[$comment_key]) &&
-                                        $comment_single_cache_per_key[$comment_key] >= $i) {
+                                        ($comment_single_cache_per_key[$comment_key] >= $i ||
+                                         $comment_single_cache_per_key[$comment_key] === false)) {
                                         // we have already matched something
+                                        if ($comment_single_cache_per_key[$comment_key] === false) {
+                                            // this comment is never matched
+                                            continue;
+                                        }
                                         $match_i = $comment_single_cache_per_key[$comment_key];
                                     } else if (
                                         // case sensitive comments
@@ -2948,9 +3122,16 @@ class GeSHi {
 
                 $before = '<|UR1|"' .
                     str_replace(
-                        array('{FNAME}', '{FNAMEL}', '{FNAMEU}', '.'),
-                        array($this->hsc($word), $this->hsc(strtolower($word)),
-                            $this->hsc(strtoupper($word)), '<DOT>'),
+                        array(
+                            '{FNAME}',
+                            '{FNAMEL}',
+                            '{FNAMEU}',
+                            '.'),
+                        array(
+                            str_replace('+', '%20', urlencode($this->hsc($word))),
+                            str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
+                            str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
+                            '<DOT>'),
                         $this->language_data['URLS'][$k]
                     ) . '">';
                 $after = '</a>';
@@ -3231,8 +3412,13 @@ class GeSHi {
                 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
                     // already highlighted blocks _must_ include either < or >
                     // so if this conditional applies, we have to skip this match
-                    continue;
+                    // BenBE: UNLESS the block contains <SEMI> or <PIPE>
+                    if(strpos($symbol_match, '<SEMI>') === false &&
+                        strpos($symbol_match, '<PIPE>') === false) {
+                        continue;
+                    }
                 }
+
                 // if we reach this point, we have a valid match which needs to be highlighted
 
                 $symbol_length = strlen($symbol_match);
@@ -3278,7 +3464,6 @@ class GeSHi {
                     $symbol_hl .= $symbol_match . '|>';
                 }
 
-
                 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
 
                 // since we replace old text with something of different size,
@@ -3472,6 +3657,12 @@ class GeSHi {
             unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
         }
 
+        //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
+        //You need to set one for HARDESCAPES only in this case.
+        if(!isset($this->language_data['HARDCHAR'])) {
+            $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
+        }
+
         //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
         $style_filename = substr($file_name, 0, -4) . '.style.php';
         if (is_readable($style_filename)) {
@@ -3489,9 +3680,6 @@ class GeSHi {
                     $this->merge_arrays($this->language_data['STYLES'], $style_data);
             }
         }
-
-        // Set default class for CSS
-        $this->overall_class = $this->language;
     }
 
     /**
@@ -3635,17 +3823,50 @@ class GeSHi {
                     } else {
                         $attrs = ' style="'. $this->table_linenumber_style .'"';
                     }
-                    $parsed_code .= '<td'.$attrs.'><pre>';
+                    $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
                     // get linenumbers
                     // we don't merge it with the for below, since it should be better for
                     // memory consumption this way
-                    for ($i = 1; $i <= $n; ++$i) {
-                        $parsed_code .= $i;
-                        if ($i != $n) {
+                    // @todo: but... actually it would still be somewhat nice to merge the two loops
+                    //        the mem peaks are at different positions
+                    for ($i = 0; $i < $n; ++$i) {
+                        $close = 0;
+                        // fancy lines
+                        if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
+                            $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
+                            // Set the attributes to style the line
+                            if ($this->use_classes) {
+                                $parsed_code .= '<span class="xtra li2"><span class="de2">';
+                            } else {
+                                // This style "covers up" the special styles set for special lines
+                                // so that styles applied to special lines don't apply to the actual
+                                // code on that line
+                                $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
+                                                  .'<span style="' . $this->code_style .'">';
+                            }
+                            $close += 2;
+                        }
+                        //Is this some line with extra styles???
+                        if (in_array($i + 1, $this->highlight_extra_lines)) {
+                            if ($this->use_classes) {
+                                if (isset($this->highlight_extra_lines_styles[$i])) {
+                                    $parsed_code .= "<span class=\"xtra lx$i\">";
+                                } else {
+                                    $parsed_code .= "<span class=\"xtra ln-xtra\">";
+                                }
+                            } else {
+                                $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
+                            }
+                            ++$close;
+                        }
+                        $parsed_code .= $this->line_numbers_start + $i;
+                        if ($close) {
+                            $parsed_code .= str_repeat('</span>', $close);
+                        } else if ($i != $n) {
                             $parsed_code .= "\n";
                         }
                     }
-                    $parsed_code .= '</pre></td><td>';
+                    $parsed_code .= '</pre></td><td'.$attributes.'>';
                 }
                 $parsed_code .= '<pre'. $attributes .'>';
             }
@@ -3810,7 +4031,7 @@ class GeSHi {
             } else {
                 $attr = " style=\"{$this->footer_content_style}\"";
             }
-            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->linenumbers != GESHI_NO_LINE_NUMBERS) {
+            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
                 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
             } else {
                 $footer = "<div$attr>$footer</div>";
@@ -4108,7 +4329,7 @@ class GeSHi {
         foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
                 // NEW: since 1.0.8 we have to handle hardescapes
-                if ($group == 'HARD') {
+                if ($group === 'HARD') {
                     $group = '_h';
                 }
                 $stylesheet .= "$selector.es$group {{$styles}}\n";
@@ -4221,7 +4442,15 @@ class GeSHi {
         $tokens = array();
         $prev_keys = array();
         // go through all entries of the list and generate the token list
+        $cur_len = 0;
         for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
+            if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
+                // seems like the length of this pcre is growing exorbitantly
+                $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
+                $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
+                $tokens = array();
+                $cur_len = 0;
+            }
             $level = 0;
             $entry = preg_quote((string) $list[$i], $regexp_delimiter);
             $pointer = &$tokens;
@@ -4248,11 +4477,13 @@ class GeSHi {
                             // only part of the keys match
                             $new_key_part1 = substr($prev_keys[$level], 0, $char);
                             $new_key_part2 = substr($prev_keys[$level], $char);
+
                             if (in_array($new_key_part1[0], $regex_chars)
                                 || in_array($new_key_part2[0], $regex_chars)) {
                                 // this is bad, a regex char as first character
                                 $pointer[$entry] = array('' => true);
                                 array_splice($prev_keys, $level, count($prev_keys), $entry);
+                                $cur_len += strlen($entry);
                                 continue;
                             } else {
                                 // relocate previous tokens
@@ -4261,6 +4492,7 @@ class GeSHi {
                                 $pointer = &$pointer[$new_key_part1];
                                 // recreate key index
                                 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
+                                $cur_len += strlen($new_key_part2);
                             }
                         }
                         ++$level;
@@ -4284,10 +4516,13 @@ class GeSHi {
                         $num_subpatterns += $new_subpatterns;
                     }
                     $tokens = array();
+                    $cur_len = 0;
                 }
                 // no further common denominator found
                 $pointer[$entry] = array('' => true);
                 array_splice($prev_keys, $level, count($prev_keys), $entry);
+
+                $cur_len += strlen($entry);
                 break;
             }
             unset($list[$i]);
@@ -4381,4 +4616,4 @@ if (!function_exists('geshi_highlight')) {
     }
 }
 
-?>
+?>
+\ No newline at end of file