summaryrefslogtreecommitdiff
path: root/inc/geshi.php
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2008-06-20 18:06:28 +0200
committerAndreas Gohr <andi@splitbrain.org>2008-06-20 18:06:28 +0200
commitbb62d5fbe1aa438a9e655573ea9d8c8d3183006b (patch)
tree050838041b33d8e8c5a8d4d9c93472ce099b2ed0 /inc/geshi.php
parent715bdf1fe13fba0611cf88311bbc3d7945b4a544 (diff)
downloadrpg-bb62d5fbe1aa438a9e655573ea9d8c8d3183006b.tar.gz
rpg-bb62d5fbe1aa438a9e655573ea9d8c8d3183006b.tar.bz2
GeSHi update to 1.0.7.22
darcs-hash:20080620160628-7ad00-1bfd8f407b5b38c29c7879d67da2ea3dbd0d1816.gz
Diffstat (limited to 'inc/geshi.php')
-rw-r--r--inc/geshi.php969
1 files changed, 554 insertions, 415 deletions
diff --git a/inc/geshi.php b/inc/geshi.php
index 362acd9c6..2fbf35a77 100644
--- a/inc/geshi.php
+++ b/inc/geshi.php
@@ -41,7 +41,7 @@
//
/** The version of this GeSHi file */
-define('GESHI_VERSION', '1.0.7.21');
+define('GESHI_VERSION', '1.0.7.22');
// Define the root directory for the GeSHi code tree
if (!defined('GESHI_ROOT')) {
@@ -326,13 +326,13 @@ class GeSHi {
*/
var $highlight_extra_lines_style = 'color: #cc0; background-color: #ffc;';
- /**
- * The line ending
- * If null, nl2br() will be used on the result string.
- * Otherwise, all instances of \n will be replaced with $line_ending
- * @var string
- */
- var $line_ending = null;
+ /**
+ * The line ending
+ * If null, nl2br() will be used on the result string.
+ * Otherwise, all instances of \n will be replaced with $line_ending
+ * @var string
+ */
+ var $line_ending = null;
/**
* Number at which line numbers should start at
@@ -350,7 +350,7 @@ class GeSHi {
* The style for the actual code
* @var string
*/
- var $code_style = 'font-family: \'Courier New\', Courier, monospace; font-weight: normal;';
+ var $code_style = 'font-family: \'Courier New\', Courier, monospace; font-weight: normal; font-style: normal;';
/**
* The overall class for this code block
@@ -368,7 +368,7 @@ class GeSHi {
* Line number styles
* @var string
*/
- var $line_style1 = 'font-family: \'Courier New\', Courier, monospace; color: black; font-weight: normal; font-style: normal;';
+ var $line_style1 = 'font-weight: normal;';
/**
* Line number styles for fancy lines
@@ -383,6 +383,13 @@ class GeSHi {
var $line_numbers = GESHI_NO_LINE_NUMBERS;
/**
+ * Flag to decide if multi line spans are allowed. Set it to false to make sure
+ * each tag is closed before and reopened after each linefeed.
+ * @var boolean
+ */
+ var $allow_multiline_span = true;
+
+ /**
* The "nth" value for fancy line highlighting
* @var int
*/
@@ -394,11 +401,11 @@ class GeSHi {
*/
var $tab_width = 8;
- /**
- * Should we use language-defined tab stop widths?
- * @var int
- */
- var $use_language_tab_width = false;
+ /**
+ * Should we use language-defined tab stop widths?
+ * @var int
+ */
+ var $use_language_tab_width = false;
/**
* Default target for keyword links
@@ -419,6 +426,13 @@ class GeSHi {
*/
var $keyword_links = true;
+ /**
+ * Currently loaded language file
+ * @var string
+ * @since 1.0.7.22
+ */
+ var $loaded_language = '';
+
/**#@-*/
/**
@@ -524,7 +538,7 @@ class GeSHi {
*/
function set_language_path($path) {
if ($path) {
- $this->language_path = ('/' == substr($path, strlen($path) - 1, 1)) ? $path : $path . '/';
+ $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
$this->set_language($this->language); // otherwise set_language_path has no effect
}
}
@@ -685,6 +699,29 @@ class GeSHi {
}
/**
+ * Sets wether spans and other HTML markup generated by GeSHi can
+ * span over multiple lines or not. Defaults to true to reduce overhead.
+ * Set it to false if you want to manipulate the output or manually display
+ * the code in an ordered list.
+ *
+ * @param boolean Wether multiline spans are allowed or not
+ * @since 1.0.7.22
+ */
+ function enable_multiline_span($flag) {
+ $this->allow_multiline_span = (bool) $flag;
+ }
+
+ /**
+ * Get current setting for multiline spans, see GeSHi::
+ *
+ * @see enable_multiline_span
+ * @return bool
+ */
+ function get_multiline_span() {
+ return $this->allow_multiline_span;
+ }
+
+ /**
* Sets the style for a keyword group. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
@@ -702,6 +739,9 @@ class GeSHi {
else {
$this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
}
+ if(!isset($this->lexic_permissions['KEYWORDS'][$key])) {
+ $this->lexic_permissions['KEYWORDS'][$key] = true;
+ }
}
/**
@@ -1013,30 +1053,31 @@ class GeSHi {
}
}
- /**
- * Sets whether or not to use tab-stop width specifed by language
- *
- * @param boolean Whether to use language-specific tab-stop widths
+ /**
+ * Sets whether or not to use tab-stop width specifed by language
+ *
+ * @param boolean Whether to use language-specific tab-stop widths
* @since 1.0.7.20
- */
- function set_use_language_tab_width($use) {
- $this->use_language_tab_width = (bool) $use;
- }
-
- /**
- * Returns the tab width to use, based on the current language and user
- * preference
- *
- * @return int Tab width
+ */
+ function set_use_language_tab_width($use) {
+ $this->use_language_tab_width = (bool) $use;
+ }
+
+ /**
+ * Returns the tab width to use, based on the current language and user
+ * preference
+ *
+ * @return int Tab width
* @since 1.0.7.20
- */
- function get_real_tab_width() {
- if (!$this->use_language_tab_width || !isset($this->language_data['TAB_WIDTH'])) {
- return $this->tab_width;
- } else {
- return $this->language_data['TAB_WIDTH'];
- }
- }
+ */
+ function get_real_tab_width() {
+ if (!$this->use_language_tab_width ||
+ !isset($this->language_data['TAB_WIDTH'])) {
+ return $this->tab_width;
+ } else {
+ return $this->language_data['TAB_WIDTH'];
+ }
+ }
/**
* Enables/disables strict highlighting. Default is off, calling this
@@ -1101,7 +1142,7 @@ class GeSHi {
* @todo static?
*/
function get_language_name_from_extension( $extension, $lookup = array() ) {
- if ( !is_array($lookup) || !count($lookup)) {
+ if ( !is_array($lookup) || empty($lookup)) {
$lookup = array(
'actionscript' => array('as'),
'ada' => array('a', 'ada', 'adb', 'ads'),
@@ -1119,6 +1160,7 @@ class GeSHi {
'css' => array('css'),
'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
'dos' => array('bat', 'cmd'),
+ 'gettext' => array('po', 'pot'),
'html4strict' => array('html', 'htm'),
'java' => array('java'),
'javascript' => array('js'),
@@ -1144,10 +1186,8 @@ class GeSHi {
}
foreach ($lookup as $lang => $extensions) {
- foreach ($extensions as $ext) {
- if ($ext == $extension) {
- return $lang;
- }
+ if (in_array($extension, $extensions)) {
+ return $lang;
}
}
return '';
@@ -1172,7 +1212,7 @@ class GeSHi {
*/
function load_from_file($file_name, $lookup = array()) {
if (is_readable($file_name)) {
- $this->set_source(implode('', file($file_name)));
+ $this->set_source(file_get_contents($file_name));
$this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
}
else {
@@ -1370,15 +1410,16 @@ class GeSHi {
function highlight_lines_extra($lines, $style = null) {
if (is_array($lines)) {
foreach ($lines as $line) {
- $this->highlight_lines_extra(line, $style);
+ $this->highlight_lines_extra($line, $style);
}
}
else {
- $this->highlight_extra_lines[intval($lines)] = intval($lines);
+ $lines = intval($lines);
+ $this->highlight_extra_lines[$lines] = $lines;
if ($style != null) {
- $this->highlight_extra_lines_styles[intval($lines)] = $style;
+ $this->highlight_extra_lines_styles[$lines] = $style;
} else {
- unset($this->highlight_extra_lines_styles[intval($lines)]);
+ unset($this->highlight_extra_lines_styles[$lines]);
}
}
}
@@ -1393,15 +1434,15 @@ class GeSHi {
$this->highlight_extra_lines_style = $styles;
}
- /**
- * Sets the line-ending
- *
- * @param string The new line-ending
- * @since 1.0.2
- */
- function set_line_ending($line_ending) {
- $this->line_ending = (string)$line_ending;
- }
+ /**
+ * Sets the line-ending
+ *
+ * @param string The new line-ending
+ * @since 1.0.2
+ */
+ function set_line_ending($line_ending) {
+ $this->line_ending = (string)$line_ending;
+ }
/**
* Sets what number line numbers should start at. Should
@@ -1447,7 +1488,7 @@ class GeSHi {
* @since 1.0.2
*/
function enable_keyword_links($enable = true) {
- $this->keyword_links = ($enable) ? true : false;
+ $this->keyword_links = (bool) $enable;
}
/**
@@ -1480,9 +1521,6 @@ class GeSHi {
// Initialise various stuff
$length = strlen($code);
- $STRING_OPEN = '';
- $CLOSE_STRING = false;
- $ESCAPE_CHAR_OPEN = false;
$COMMENT_MATCHED = false;
// Turn highlighting on if strict mode doesn't apply to this language
$HIGHLIGHTING_ON = ( !$this->strict_mode ) ? true : '';
@@ -1502,53 +1540,38 @@ class GeSHi {
if ($this->strict_mode) {
// Break the source into bits. Each bit will be a portion of the code
// within script delimiters - for example, HTML between < and >
- $parts = array(0 => array(0 => ''));
+ $parts = array(0 => array(0 => '', 1 => ''));
$k = 0;
- for ($i = 0; $i < $length; $i++) {
- $char = substr($code, $i, 1);
- if (!$HIGHLIGHTING_ON) {
- foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
- foreach ($delimiters as $open => $close) {
- // Get the next little bit for this opening string
- $check = substr($code, $i, strlen($open));
- // If it matches...
- if ($check == $open) {
- // We start a new block with the highlightable
- // code in it
- $HIGHLIGHTING_ON = $open;
- $i += strlen($open) - 1;
- $char = $open;
- $parts[++$k][0] = $char;
-
- // No point going around again...
- break(2);
+ for ($i = 0; $i < $length; ++$i) {
+ foreach ($this->language_data['SCRIPT_DELIMITERS'] as $delimiters) {
+ foreach ($delimiters as $open => $close) {
+ // Get the next little bit for this opening string
+ $open_strlen = strlen($open);
+ $check = substr($code, $i, $open_strlen);
+ // If it matches...
+ if ($check == $open) {
+ // We start a new block with the highlightable
+ // code in it
+ ++$k;
+ $parts[$k][0] = $open;
+ $close_i = strpos($code, $close, $i + $open_strlen) + strlen($close);
+ if ($close_i === false) {
+ $close_i = $length - 1;
}
+ $parts[$k][1] = substr($code, $i, $close_i - $i);
+ $i = $close_i - 1;
+ ++$k;
+ $parts[$k][0] = '';
+ $parts[$k][1] = '';
+
+ // No point going around again...
+ continue 3;
}
}
}
- else {
- foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
- foreach ($delimiters as $open => $close) {
- if ($open == $HIGHLIGHTING_ON) {
- // Found the closing tag
- break(2);
- }
- }
- }
- // We check code from our current position BACKWARDS. This is so
- // the ending string for highlighting can be included in the block
- $check = substr($code, $i - strlen($close) + 1, strlen($close));
- if ($check == $close) {
- $HIGHLIGHTING_ON = '';
- // Add the string to the rest of the string for this part
- $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
- $parts[++$k][0] = '';
- $char = '';
- }
- }
- $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
+ // only non-highlightable text reaches this point
+ $parts[$k][1] .= $code[$i];
}
- $HIGHLIGHTING_ON = '';
}
else {
// Not strict mode - simply dump the source into
@@ -1561,6 +1584,45 @@ class GeSHi {
);
}
+ //Unset variables we won't need any longer
+ unset($code);
+
+ //Preload some repeatedly used values regarding hardquotes ...
+ $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
+ $hq_strlen = strlen($hq);
+
+ //Preload if line numbers are to be generated afterwards
+ //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
+ $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS
+ || !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
+
+ //preload the escape char for faster checking ...
+ $escaped_escape_char = GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
+
+ if (!$this->use_classes) {
+ $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+ $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
+ }
+ else {
+ $string_attributes = ' class="st0"';
+ $escape_char_attributes = ' class="es0"';
+ }
+
+ // this is used for single-line comments
+ $sc_disallowed_before = "";
+ $sc_disallowed_after = "";
+
+ if(isset($this->language_data['PARSER_CONTROL'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
+ $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
+ }
+ if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
+ $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
+ }
+ }
+ }
+
// Now we go through each part. We know that even-indexed parts are
// code that shouldn't be highlighted, and odd-indexed parts should
// be highlighted
@@ -1571,10 +1633,8 @@ class GeSHi {
if ($this->strict_mode) {
// Find the class key for this block of code
foreach ($this->language_data['SCRIPT_DELIMITERS'] as $script_key => $script_data) {
- foreach ($script_data as $open => $close) {
- if ($data[0] == $open) {
- break(2);
- }
+ if (isset($script_data[$data[0]])) {
+ break;
}
}
@@ -1598,208 +1658,286 @@ class GeSHi {
// Now, highlight the code in this block. This code
// is really the engine of GeSHi (along with the method
// parse_non_string_part).
+
+ // cache comment regexps incrementally
+ $comment_regexp_cache = array();
+ $next_comment_regexp_pos = -1;
+ $comment_regexp_cache_per_key = array();
+
$length = strlen($part);
- for ($i = 0; $i < $length; $i++) {
+ for ($i = 0; $i < $length; ++$i) {
// Get the next char
- $char = substr($part, $i, 1);
- $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
- // Is this char the newline and line numbers being used?
- if (($this->line_numbers != GESHI_NO_LINE_NUMBERS
- || count($this->highlight_extra_lines) > 0)
- && $char == "\n") {
- // If so, is there a string open? If there is, we should end it before
- // the newline and begin it again (so when <li>s are put in the source
- // remains XHTML compliant)
- // note to self: This opens up possibility of config files specifying
- // that languages can/cannot have multiline strings???
- if ($STRING_OPEN) {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
- }
- else {
- $attributes = ' class="st0"';
+ $char = $part[$i];
+
+ if (in_array($char, $this->language_data['QUOTEMARKS']) && $this->lexic_permissions['STRINGS']) {
+ // The start of a new string
+
+ // parse the stuff before this
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
+
+ // now handle the string
+ $string = '';
+
+ // look for closing quote
+ $start = $i;
+ while ($close_pos = strpos($part, $char, $start + 1)) {
+ $start = $close_pos;
+ if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
+ // this quote is escaped
+ continue;
}
- $char = '</span>' . $char . "<span$attributes>";
+ // found closing quote
+ break;
}
- }
- else if ($char == $STRING_OPEN) {
- // A match of a string delimiter
- if (($this->lexic_permissions['ESCAPE_CHAR'] && $ESCAPE_CHAR_OPEN) ||
- ($this->lexic_permissions['STRINGS'] && !$ESCAPE_CHAR_OPEN)) {
- $char = GeSHi::hsc($char) . '</span>';
+ if (!$close_pos) {
+ // span till the end of this $part when no closing delimiter is found
+ $close_pos = $length;
}
- $escape_me = false;
- if ($HARDQUOTE_OPEN) {
- if ($ESCAPE_CHAR_OPEN) {
- $escape_me = true;
- }
- else {
- foreach ($this->language_data['HARDESCAPE'] as $hardesc) {
- if (substr($part, $i, strlen($hardesc)) == $hardesc) {
- $escape_me = true;
- break;
- }
+
+ $string = substr($part, $i, $close_pos - $i + 1);
+ $i = $close_pos;
+
+ // handle escape chars and encode html chars
+ // (special because when we have escape chars within our string they may not be escaped)
+ if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
+ $start = 0;
+ $new_string = '';
+ while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
+ $new_string .= GeSHi::hsc(substr($string, $start, $es_pos - $start))
+ . "<span$escape_char_attributes>" . $escaped_escape_char;
+ if ($string[$es_pos + 1] == "\n") {
+ // don't put a newline around newlines
+ $new_string .= "</span>\n";
+ } else {
+ $new_string .= GeSHi::hsc($string[$es_pos + 1]) . '</span>';
}
+ $start = $es_pos + 2;
}
+ $string = $new_string . GeSHi::hsc(substr($string, $start));
+ $new_string = '';
+ } else {
+ $string = GeSHi::hsc($string);
}
- if (!$ESCAPE_CHAR_OPEN) {
- $STRING_OPEN = '';
- $CLOSE_STRING = true;
- }
- if (!$escape_me) {
- $HARDQUOTE_OPEN = false;
+ if ($check_linenumbers) {
+ // Are line numbers used? If, we should end the string before
+ // the newline and begin it again (so when <li>s are put in the source
+ // remains XHTML compliant)
+ // note to self: This opens up possibility of config files specifying
+ // that languages can/cannot have multiline strings???
+ $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
}
- $ESCAPE_CHAR_OPEN = false;
+
+ $result .= "<span$string_attributes>" . $string . '</span>';
+ $string = '';
+ continue;
}
- else if (in_array($char, $this->language_data['QUOTEMARKS']) &&
- ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
- // The start of a new string
- $STRING_OPEN = $char;
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+ else if ($hq && substr($part, $i, $hq_strlen) == $hq && $this->lexic_permissions['STRINGS']) {
+ // The start of a hard quoted string
+
+ // parse the stuff before this
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
+
+ // now handle the string
+ $string = '';
+
+ // look for closing quote
+ $start = $i + $hq_strlen;
+ while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start + 1)) {
+ $start = $close_pos;
+ if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['ESCAPE_CHAR']) {
+ // make sure this quote is not escaped
+ foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
+ if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
+ // this quote is escaped
+ continue 2;
+ }
+ }
+ }
+ // found closing quote
+ break;
}
- else {
- $attributes = ' class="st0"';
+ if (!$close_pos) {
+ // span till the end of this $part when no closing delimiter is found
+ $close_pos = $length;
}
- $char = "<span$attributes>" . GeSHi::hsc($char);
- $result .= $this->parse_non_string_part( $stuff_to_parse );
- $stuff_to_parse = '';
- }
- else if ($hq && substr($part, $i, strlen($hq)) == $hq &&
- ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
- // The start of a hard quoted string
- $STRING_OPEN = $this->language_data['HARDQUOTE'][1];
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+ $string = substr($part, $i, $close_pos - $i + 1);
+ $i = $close_pos;
+
+ // handle escape chars and encode html chars
+ // (special because when we have escape chars within our string they may not be escaped)
+ if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
+ $start = 0;
+ $new_string = '';
+ while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
+ // hmtl escape stuff before
+ $new_string .= GeSHi::hsc(substr($string, $start, $es_pos - $start));
+ // check if this is a hard escape
+ foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
+ if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
+ // indeed, this is a hardescape
+ $new_string .= "<span$escape_char_attributes>" . GeSHi::hsc($hardescape)
+ . '</span>';
+ $start = $es_pos + strlen($hardescape);
+ continue 2;
+ }
+ }
+ // not a hard escape
+ $new_string .= $escaped_escape_char;
+ $start = $es_pos + 1;
+ }
+ $string = $new_string . GeSHi::hsc(substr($string, $start));
+ } else {
+ $string = GeSHi::hsc($string);
}
- else {
- $attributes = ' class="st0"';
+
+ if ($check_linenumbers) {
+ // Are line numbers used? If, we should end the string before
+ // the newline and begin it again (so when <li>s are put in the source
+ // remains XHTML compliant)
+ // note to self: This opens up possibility of config files specifying
+ // that languages can/cannot have multiline strings???
+ $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
}
- $char = "<span$attributes>" . $hq;
- $i += strlen($hq) - 1;
- $HARDQUOTE_OPEN = true;
- $result .= $this->parse_non_string_part($stuff_to_parse);
- $stuff_to_parse = '';
+
+ $result .= "<span$string_attributes>" . $string . '</span>';
+ $string = '';
+ continue;
}
- else if ($char == $this->language_data['ESCAPE_CHAR'] && $STRING_OPEN != '') {
- // An escape character
- if (!$ESCAPE_CHAR_OPEN) {
- $ESCAPE_CHAR_OPEN = !$HARDQUOTE_OPEN; // true unless $HARDQUOTE_OPEN
- if ($HARDQUOTE_OPEN) {
- foreach ($this->language_data['HARDESCAPE'] as $hard) {
- if (substr($part, $i, strlen($hard)) == $hard) {
- $ESCAPE_CHAR_OPEN = true;
+ else {
+ // update regexp comment cache if needed
+ if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
+ $next_comment_regexp_pos = $length;
+ foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
+ $match_i = false;
+ if (isset($comment_regexp_cache_per_key[$comment_key]) &&
+ $comment_regexp_cache_per_key[$comment_key] >= $i) {
+ // we have already matched something
+ $match_i = $comment_regexp_cache_per_key[$comment_key];
+ }
+ else if (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
+ $match_i = $match[0][1];
+ $comment_regexp_cache[$match_i] = array(
+ 'key' => $comment_key,
+ 'length' => strlen($match[0][0]),
+ );
+ $comment_regexp_cache_per_key[$comment_key] = $match_i;
+ } else {
+ $comment_regexp_cache_per_key[$comment_key] = false;
+ continue;
+ }
+
+ if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
+ $next_comment_regexp_pos = $match_i;
+ if ($match_i === $i) {
break;
}
}
}
- if ($ESCAPE_CHAR_OPEN && $this->lexic_permissions['ESCAPE_CHAR']) {
+ }
+ //Have a look for regexp comments
+ if ($i == $next_comment_regexp_pos) {
+ $COMMENT_MATCHED = true;
+ $comment = $comment_regexp_cache[$next_comment_regexp_pos];
+ $test_str = GeSHi::hsc(substr($part, $i, $comment['length']));
+
+ //@todo If remove important do remove here
+ if ($this->lexic_permissions['COMMENTS']['MULTI']) {
if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
+ $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
}
else {
- $attributes = ' class="es0"';
+ $attributes = ' class="co' . $comment['key'] . '"';
}
- $char = "<span$attributes>" . $char;
- if (substr($code, $i + 1, 1) == "\n") {
- // escaping a newline, what's the point in putting the span around
- // the newline? It only causes hassles when inserting line numbers
- $char .= '</span>';
- $ESCAPE_CHAR_OPEN = false;
+ $test_str = "<span$attributes>" . $test_str . "</span>";
+
+ // Short-cut through all the multiline code
+ if ($check_linenumbers) {
+ // strreplace to put close span and open span around multiline newlines
+ $test_str = str_replace(
+ "\n", "</span>\n<span$attributes>",
+ str_replace("\n ", "\n&nbsp;", $test_str)
+ );
}
}
+
+ $i += $comment['length'] - 1;
+
+ // parse the rest
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
}
- else {
- $ESCAPE_CHAR_OPEN = false;
- if ($this->lexic_permissions['ESCAPE_CHAR']) {
- $char .= '</span>';
- }
- }
- }
- else if ($ESCAPE_CHAR_OPEN) {
- if ($this->lexic_permissions['ESCAPE_CHAR']) {
- $char .= '</span>';
- }
- $ESCAPE_CHAR_OPEN = false;
- $test_str = $char;
- }
- else if ($STRING_OPEN == '') {
- // Is this a multiline comment?
- foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
- $com_len = strlen($open);
- $test_str = substr( $part, $i, $com_len );
- $test_str_match = $test_str;
- if (strtolower($open) == strtolower($test_str)) {
- $COMMENT_MATCHED = true;
- //@todo If remove important do remove here
- if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
- $test_str == GESHI_START_IMPORTANT) {
- if ($test_str != GESHI_START_IMPORTANT) {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
+ // If we haven't matched a regexp comment, try multi-line comments
+ if (!$COMMENT_MATCHED) {
+ // Is this a multiline comment?
+ foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
+ $com_len = strlen($open);
+ $test_str = substr( $part, $i, $com_len );
+ $test_str_match = $test_str;
+ if (strtolower($open) == strtolower($test_str)) {
+ $COMMENT_MATCHED = true;
+ //@todo If remove important do remove here
+ if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
+ $test_str == GESHI_START_IMPORTANT) {
+ if ($test_str != GESHI_START_IMPORTANT) {
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
+ }
+ else {
+ $attributes = ' class="coMULTI"';
+ }
+ $test_str = "<span$attributes>" . GeSHi::hsc($test_str);
}
else {
- $attributes = ' class="coMULTI"';
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->important_styles . '"';
+ }
+ else {
+ $attributes = ' class="imp"';
+ }
+ // We don't include the start of the comment if it's an
+ // "important" part
+ $test_str = "<span$attributes>";
}
- $test_str = "<span$attributes>" . GeSHi::hsc($test_str);
}
else {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->important_styles . '"';
- }
- else {
- $attributes = ' class="imp"';
- }
- // We don't include the start of the comment if it's an
- // "important" part
- $test_str = "<span$attributes>";
+ $test_str = GeSHi::hsc($test_str);
}
- }
- else {
- $test_str = GeSHi::hsc($test_str);
- }
- $close_pos = strpos( $part, $close, $i + strlen($close) );
+ $close_pos = strpos( $part, $close, $i + strlen($open) );
- $oops = false;
- if ($close_pos === false) {
- $close_pos = strlen($part);
- $oops = true;
- }
- else {
- $close_pos -= ($com_len - strlen($close));
- }
+ if ($close_pos === false) {
+ $close_pos = $length;
+ }
- // Short-cut through all the multiline code
- $rest_of_comment = GeSHi::hsc(substr($part, $i + $com_len, $close_pos - $i));
- if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
- $test_str_match == GESHI_START_IMPORTANT) &&
- ($this->line_numbers != GESHI_NO_LINE_NUMBERS ||
- count($this->highlight_extra_lines) > 0)) {
- // strreplace to put close span and open span around multiline newlines
- $test_str .= str_replace(
- "\n", "</span>\n<span$attributes>",
- str_replace("\n ", "\n&nbsp;", $rest_of_comment)
- );
- }
- else {
- $test_str .= $rest_of_comment;
- }
+ // Short-cut through all the multiline code
+ $rest_of_comment = GeSHi::hsc(substr($part, $i + strlen($open), $close_pos - $i - strlen($open) + strlen($close)));
+ if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
+ $test_str_match == GESHI_START_IMPORTANT) &&
+ $check_linenumbers) {
+ // strreplace to put close span and open span around multiline newlines
+ $test_str .= str_replace(
+ "\n", "</span>\n<span$attributes>",
+ str_replace("\n ", "\n&nbsp;", $rest_of_comment)
+ );
+ }
+ else {
+ $test_str .= $rest_of_comment;
+ }
- if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
- $test_str_match == GESHI_START_IMPORTANT) {
- $test_str .= '</span>';
- if ($oops) {
- $test_str .= "\n";
+ if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
+ $test_str_match == GESHI_START_IMPORTANT) {
+ $test_str .= '</span>';
}
+ $i = $close_pos + strlen($close) - 1;
+ // parse the rest
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
+ break;
}
- $i = $close_pos + $com_len - 1;
- // parse the rest
- $result .= $this->parse_non_string_part($stuff_to_parse);
- $stuff_to_parse = '';
- break;
}
}
// If we haven't matched a multiline comment, try single-line comments
@@ -1815,22 +1953,8 @@ class GeSHi {
}
//This check will find special variables like $# in bash or compiler directives of Delphi beginning {$
if($match) {
- $disallowed_before = "";
- $disallowed_after = "";
-
- if(isset($this->language_data['PARSER_CONTROL'])) {
- if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
- if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
- $disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
- }
- if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
- $disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
- }
- }
- }
-
- $match = $match && (!strlen($disallowed_before) || ((false === strpos($disallowed_before, substr($part, $i-1, 1))) && (0!=$i)));
- $match = $match && (!strlen($disallowed_after) || ((false === strpos($disallowed_after, substr($part, $i+1, 1))) && (strlen($part)-1>$i)));
+ $match = $match && (empty($sc_disallowed_before) || ((false === strpos($sc_disallowed_before, $part[$i-1])) && (0 != $i)));
+ $match = $match && (empty($sc_disallowed_after) || ((false === strpos($sc_disallowed_after, $part[$i+1])) && ($length-1>$i)));
}
if ($match) {
$COMMENT_MATCHED = true;
@@ -1849,7 +1973,7 @@ class GeSHi {
$close_pos = strpos($part, "\n", $i);
$oops = false;
if ($close_pos === false) {
- $close_pos = strlen($part);
+ $close_pos = $length;
$oops = true;
}
$test_str .= GeSHi::hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
@@ -1869,28 +1993,9 @@ class GeSHi {
}
}
}
- else if ($STRING_OPEN != '') {
- // Otherwise, convert it to HTML form
- if (strtolower($this->encoding) == 'utf-8') {
- //only escape <128 (we don't want to break multibyte chars)
- if (ord($char) < 128) {
- $char = GeSHi::hsc($char);
- }
- }
- else {
- //encode everthing
- $char = GeSHi::hsc($char);
- }
- }
// Where are we adding this char?
if (!$COMMENT_MATCHED) {
- if (($STRING_OPEN == '') && !$CLOSE_STRING) {
- $stuff_to_parse .= $char;
- }
- else {
- $result .= $char;
- $CLOSE_STRING = false;
- }
+ $stuff_to_parse .= $char;
}
else {
$result .= $test_str;
@@ -1920,17 +2025,16 @@ class GeSHi {
}
}
- // Parse the last stuff (redundant?)
- $result .= $this->parse_non_string_part($stuff_to_parse);
+ //This fix is related to SF#1923020, but has to be applied regardless of
+ //actually highlighting symbols.
+ $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
+
+// // Parse the last stuff (redundant?)
+// $result .= $this->parse_non_string_part($stuff_to_parse);
// Lop off the very first and last spaces
$result = substr($result, 1, -1);
- // Are we still in a string?
- if ($STRING_OPEN) {
- $result .= '</span>';
- }
-
// We're finished: stop timing
$this->set_time($start_time, microtime());
@@ -1946,24 +2050,27 @@ class GeSHi {
* @since 1.0.0
* @access private
*/
- function indent($result) {
+ function indent(&$result) {
/// Replace tabs with the correct number of spaces
if (false !== strpos($result, "\t")) {
$lines = explode("\n", $result);
- $tab_width = $this->get_real_tab_width();
- foreach ($lines as $key => $line) {
+ $result = null;//Save memory while we process the lines individually
+ $tab_width = $this->get_real_tab_width();
+ $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
+
+ for ($key = 0, $n = count($lines); $key < $n; $key++) {
+ $line = $lines[$key];
if (false === strpos($line, "\t")) {
- $lines[$key] = $line;
continue;
}
$pos = 0;
$length = strlen($line);
- $result_line = '';
+ $lines[$key] = ''; // reduce memory
$IN_TAG = false;
- for ($i = 0; $i < $length; $i++) {
- $char = substr($line, $i, 1);
+ for ($i = 0; $i < $length; ++$i) {
+ $char = $line[$i];
// Simple engine to work out whether we're in a tag.
// If we are we modify $pos. This is so we ignore HTML
// in the line and only workout the tab replacement
@@ -1971,26 +2078,27 @@ class GeSHi {
// This test could be improved to include strings in the
// html so that < or > would be allowed in user's styles
// (e.g. quotes: '<' '>'; or similar)
- if ($IN_TAG && '>' == $char) {
- $IN_TAG = false;
- $result_line .= '>';
- ++$pos;
+ if ($IN_TAG) {
+ if('>' == $char) {
+ $IN_TAG = false;
+ }
+ $lines[$key] .= $char;
}
- else if (!$IN_TAG && '<' == $char) {
+ else if ('<' == $char) {
$IN_TAG = true;
- $result_line .= '<';
- ++$pos;
+ $lines[$key] .= '<';
}
- else if (!$IN_TAG && '&' == $char) {
- $substr = substr($line, $i + 3, 4);
- //$substr_5 = substr($line, 5, 1);
+ else if ('&' == $char) {
+ $substr = substr($line, $i + 3, 5);
$posi = strpos($substr, ';');
- if (false !== $posi) {
- $pos += $posi + 3;
+ if (false === $posi) {
+ ++$pos;
+ } else {
+ $pos -= $posi+2;
}
- $result_line .= '&';
+ $lines[$key] .= $char;
}
- else if (!$IN_TAG && "\t" == $char) {
+ else if ("\t" == $char) {
$str = '';
// OPTIMISE - move $strs out. Make an array:
// $tabs = array(
@@ -1998,28 +2106,32 @@ class GeSHi {
// 2 => '&nbsp; ',
// 3 => '&nbsp; &nbsp;' etc etc
// to use instead of building a string every time
- $strs = array(0 => '&nbsp;', 1 => ' ');
- for ($k = 0; $k < ($tab_width - (($i - $pos) % $tab_width)); $k++) $str .= $strs[$k % 2];
- $result_line .= $str;
- $pos += ($i - $pos) % $tab_width + 1;
+ $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
+ if(($pos%2) || 1 == $tab_end_width) {
+ $str .= substr($tab_string, 6, $tab_end_width);
+ } else {
+ $str .= substr($tab_string, 0, $tab_end_width+5);
+ }
+ $lines[$key] .= $str;
+ $pos += $tab_end_width;
if (false === strpos($line, "\t", $i + 1)) {
- $result_line .= substr($line, $i + 1);
+ $lines[$key] .= substr($line, $i + 1);
break;
}
}
- else if ($IN_TAG) {
+ else if (0 == $pos && ' ' == $char) {
+ $lines[$key] .= '&nbsp;';
++$pos;
- $result_line .= $char;
}
else {
- $result_line .= $char;
- //++$pos;
+ $lines[$key] .= $char;
+ ++$pos;
}
}
- $lines[$key] = $result_line;
}
$result = implode("\n", $lines);
+ unset($lines);//We don't need the lines separated beyond this --- free them!
}
// Other whitespace
// BenBE: Fix to reduce the number of replacements to be done
@@ -2027,12 +2139,12 @@ class GeSHi {
$result = str_replace(' ', ' &nbsp;', $result);
if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
- if ($this->line_ending === null) {
- $result = nl2br($result);
- } else {
- $result = str_replace("\n", $this->line_ending, $result);
- }
- }
+ if ($this->line_ending === null) {
+ $result = nl2br($result);
+ } else {
+ $result = str_replace("\n", $this->line_ending, $result);
+ }
+ }
return $result;
}
@@ -2045,13 +2157,14 @@ class GeSHi {
* @access private
*/
function change_case($instr) {
- if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_UPPER) {
- return strtoupper($instr);
- }
- else if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_LOWER) {
- return strtolower($instr);
+ switch ($this->language_data['CASE_KEYWORDS']) {
+ case GESHI_CAPS_UPPER:
+ return strtoupper($instr);
+ case GESHI_CAPS_LOWER:
+ return strtolower($instr);
+ default:
+ return $instr;
}
- return $instr;
}
/**
@@ -2081,13 +2194,14 @@ class GeSHi {
// Old system: strtolower
//$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
// New system: get keyword from language file to get correct case
+ $lower_keyword = strtolower($keyword);
foreach ($this->language_data['KEYWORDS'][$group] as $word) {
- if (strtolower($word) == strtolower($keyword)) {
+ if (strtolower($word) == $lower_keyword) {
break;
}
}
$word = ( substr($word, 0, 4) == '&lt;' ) ? substr($word, 4) : $word;
- $word = ( substr($word, -4) == '&gt;' ) ? substr($word, 0, strlen($word) - 4) : $word;
+ $word = ( substr($word, -4) == '&gt;' ) ? substr($word, 0, - 4) : $word;
if (!$word) return '';
return '<|UR1|"' .
@@ -2158,8 +2272,27 @@ class GeSHi {
// Highlight keywords
// if there is a couple of alpha symbols there *might* be a keyword
if (preg_match('#[a-zA-Z]{2,}#', $stuff_to_parse)) {
+ $disallowed_before = "a-zA-Z0-9\$_\|\#;>|^";
+ $disallowed_after = "a-zA-Z0-9_\|%\\-&";
+ if(isset($this->language_data['PARSER_CONTROL'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
+ $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
+ }
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
+ $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
+ }
+ }
+ }
+
foreach ($this->language_data['KEYWORDS'] as $k => $keywordset) {
- if ($this->lexic_permissions['KEYWORDS'][$k]) {
+ if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
+ $this->lexic_permissions['KEYWORDS'][$k]) {
+
+ $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
+ $modifiers = $case_sensitive ? 'e' : 'ie';
+ $styles = "/$k/";
+
foreach ($keywordset as $keyword) {
$keyword = preg_quote($keyword, '/');
//
@@ -2168,33 +2301,24 @@ class GeSHi {
// in just yet - otherwise languages with the keywords "color" or "or" have
// a fit.
//
- if (false !== stristr($stuff_to_parse_pregquote, $keyword )) {
- $stuff_to_parse .= ' ';
+ if ($case_sensitive) {
+ $keyword_found = strpos($stuff_to_parse_pregquote, $keyword) !== false;
+ } else {
+ $keyword_found = stristr($stuff_to_parse_pregquote, $keyword) !== false;
+ }
+ if ($keyword_found) {
// Might make a more unique string for putting the number in soon
// Basically, we don't put the styles in yet because then the styles themselves will
// get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
- $styles = "/$k/";
- $modifiers = ($this->language_data['CASE_SENSITIVE'][$k]) ? "e" : "ie";
-
- $disallowed_before = "a-zA-Z0-9\$_\|\#;>|^";
- $disallowed_after = "a-zA-Z0-9_<\|%\\-&";
- if(isset($this->language_data['PARSER_CONTROL'])) {
- if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
- if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
- $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
- }
- if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
- $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
- }
- }
- }
+
+ $stuff_to_parse .= ' ';
$stuff_to_parse = preg_replace(
- "/([^$disallowed_before])($keyword)(?=[^$disallowed_after])/$modifiers",
+ "/([^$disallowed_before])($keyword)(?!\<DOT\>(?:htm|php))(?=[^$disallowed_after])/$modifiers",
"'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
$stuff_to_parse
);
- $stuff_to_parse = substr($stuff_to_parse, 0, strlen($stuff_to_parse) - 1);
+ $stuff_to_parse = substr($stuff_to_parse, 0, -1);
}
}
}
@@ -2206,7 +2330,9 @@ class GeSHi {
//
foreach ($this->language_data['KEYWORDS'] as $k => $kws) {
if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['KEYWORDS'][$k] . '"';
+ $attributes = ' style="' .
+ (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
+ $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
}
else {
$attributes = ' class="kw' . $k . '"';
@@ -2248,7 +2374,7 @@ class GeSHi {
//
if ($this->lexic_permissions['BRACKETS']) {
$code_entities_match = array('[', ']', '(', ')', '{', '}');
- if (!$this->use_classes) {
+ if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
$code_entities_replace = array(
'<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
'<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
@@ -2272,7 +2398,7 @@ class GeSHi {
}
//FIX for symbol highlighting ...
- if($this->lexic_permissions['SYMBOLS']) {
+ if($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
//As this is a costy operation, we avoid doing it for multiple groups ...
//Instead we perform it for all symbols at once.
//
@@ -2439,6 +2565,11 @@ class GeSHi {
* @todo Needs to load keys for lexic permissions for keywords, regexps etc
*/
function load_language($file_name) {
+ if ($file_name == $this->loaded_language) {
+ // this file is already loaded!
+ return;
+ }
+ $this->loaded_language = $file_name;
$this->enable_highlighting();
$language_data = array();
require $file_name;
@@ -2473,12 +2604,12 @@ class GeSHi {
* @since 1.0.0
* @access private
*/
- function finalise($parsed_code) {
+ function finalise(&$parsed_code) {
// Remove end parts of important declarations
// This is BUGGY!! My fault for bad code: fix coming in 1.2
// @todo Remove this crap
if ($this->enable_important_blocks &&
- (strstr($parsed_code, GeSHi::hsc(GESHI_START_IMPORTANT)) === false)) {
+ (strpos($parsed_code, GeSHi::hsc(GESHI_START_IMPORTANT)) === false)) {
$parsed_code = str_replace(GeSHi::hsc(GESHI_END_IMPORTANT), '', $parsed_code);
}
@@ -2497,28 +2628,31 @@ class GeSHi {
$this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
}
+ // Get code into lines
+ $code = explode("\n", $parsed_code);
+ $parsed_code = '';
+
// If we're using line numbers, we insert <li>s and appropriate
// markup to style them (otherwise we don't need to do anything)
if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
// If we're using the <pre> header, we shouldn't add newlines because
// the <pre> will line-break them (and the <li>s already do this for us)
$ls = ($this->header_type != GESHI_HEADER_PRE) ? "\n" : '';
- // Get code into lines
- $code = explode("\n", $parsed_code);
+
// Set vars to defaults for following loop
- $parsed_code = '';
$i = 0;
// Foreach line...
- foreach ($code as $line) {
+ for ($i = 0, $n = count($code); $i < $n;) {
//Reset the attributes for a new line ...
$attrs = array();
// Make lines have at least one space in them if they're empty
// BenBE: Checking emptiness using trim instead of relying on blanks
- if ('' == trim($line)) {
- $line = '&nbsp;';
+ if ('' == trim($code[$i])) {
+ $code[$i] = '&nbsp;';
}
+
// If this is a "special line"...
if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
$i % $this->line_nth_row == ($this->line_nth_row - 1)) {
@@ -2556,13 +2690,15 @@ class GeSHi {
}
++$i;
+
// Are we supposed to use ids? If so, add them
if ($this->add_ids) {
$attrs['id'][] = "$this->overall_id-$i";
}
+
if (in_array($i, $this->highlight_extra_lines)) {
if ($this->use_classes) {
- if(array_key_exists($i, $this->highlight_extra_lines_styles)) {
+ if (isset($this->highlight_extra_lines_styles[$i])) {
$attrs['class'][] = "lx$i";
} else {
$attrs['class'][] = "ln-xtra";
@@ -2577,24 +2713,22 @@ class GeSHi {
foreach ($attrs as $key => $attr) {
$attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
}
- $parsed_code .= "<li$attr_string>$start$line$end</li>$ls";
+
+ $parsed_code .= "<li$attr_string>$start{$code[$i - 1]}$end</li>$ls";
}
}
else {
// No line numbers, but still need to handle highlighting lines extra.
// Have to use divs so the full width of the code is highlighted
- $code = explode("\n", $parsed_code);
- $parsed_code = '';
- $i = 0;
- foreach ($code as $line) {
+ for ($i = 0, $n = count($code); $i < $n; ++$i) {
// Make lines have at least one space in them if they're empty
// BenBE: Checking emptiness using trim instead of relying on blanks
- if ('' == trim($line)) {
- $line = '&nbsp;';
+ if ('' == trim($code[$i])) {
+ $code[$i] = '&nbsp;';
}
- if (in_array(++$i, $this->highlight_extra_lines)) {
+ if (in_array($i + 1, $this->highlight_extra_lines)) {
if ($this->use_classes) {
- if (array_key_exists($i, $this->highlight_extra_lines_styles)) {
+ if (isset($this->highlight_extra_lines_styles[$i])) {
$parsed_code .= "<div class=\"lx$i\">";
} else {
$parsed_code .= "<div class=\"ln-xtra\">";
@@ -2603,17 +2737,13 @@ class GeSHi {
$parsed_code .= "<div style=\"" . $this->get_line_style($i) . "\">";
}
// Remove \n because it stuffs up <pre> header
- $parsed_code .= $line . "</div>";
+ $parsed_code .= $code[$i] . "</div>";
} else {
- $parsed_code .= $line . "\n";
+ $parsed_code .= $code[$i] . "\n";
}
}
}
-
- if ($this->header_type == GESHI_HEADER_PRE) {
- // enforce line numbers when using pre
- $parsed_code = str_replace('<li></li>', '<li>&nbsp;</li>', $parsed_code);
- }
+ unset($code);
return $this->header() . chop($parsed_code) . $this->footer();
}
@@ -2858,7 +2988,16 @@ class GeSHi {
'&' => '&amp;',
'"' => '&quot;',
'<' => '&lt;',
- '>' => '&gt;'
+ '>' => '&gt;',
+
+ //This fix is related to SF#1923020, but has to be applied
+ //regardless of actually highlighting symbols.
+
+ //Circumvent a bug with symbol highlighting
+ //This is required as ; would produce undesirable side-effects if it
+ //was not to be processed as an entity.
+ ';' => '<SEMI>', // Force ; to be processed as entity
+ '|' => '<PIPE>' // Force | to be processed as entity
); // ENT_COMPAT set
if (ENT_NOQUOTES == $quote_style) // don't convert double quotes
@@ -3056,15 +3195,15 @@ class GeSHi {
* @since 1.0.7.21
*/
function get_line_style($line) {
- //$style = null;
- $style = null;
- if (array_key_exists($line, $this->highlight_extra_lines_styles)) {
- $style = $this->highlight_extra_lines_styles[$line];
- } else { // if no "extra" style assigned
- $style = $this->highlight_extra_lines_style;
- }
-
- return $style;
+ //$style = null;
+ $style = null;
+ if (isset($this->highlight_extra_lines_styles[$line])) {
+ $style = $this->highlight_extra_lines_styles[$line];
+ } else { // if no "extra" style assigned
+ $style = $this->highlight_extra_lines_style;
+ }
+
+ return $style;
}
} // End Class GeSHi