diff options
author | Andreas Gohr <andi@splitbrain.org> | 2013-10-28 20:22:05 +0100 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2013-10-28 20:22:05 +0100 |
commit | 23a363f01514464eb2238ac09ec7723d03d57ecb (patch) | |
tree | 42c7dcff8c5b0e506a25d08ee5583ed2a24e8602 /inc/parser/lexer.php | |
parent | 25e48e54df60b3df6efa365daceb3a8966c8f427 (diff) | |
parent | 75cf672f10a71f7dee6d50eb70b742689838bc36 (diff) | |
download | rpg-23a363f01514464eb2238ac09ec7723d03d57ecb.tar.gz rpg-23a363f01514464eb2238ac09ec7723d03d57ecb.tar.bz2 |
Merge branch 'master' into stable
* master: (413 commits)
release preparation
fixed strict violation in ACL plugin
Fix issues from teams:i18n:translation-check in localizations
ensure locale is set back to the original value
skip FS#2867 test if \s doesn't match \xA0 after attempting to change the locale
unittests for auth_loadACL
translation update
allow charset for SSO to be configured FS#2148
Mailer: avoid overlong headers in content ids FS#2868
translation update
translation update
replace \s, \S with [ \t], [^ \t] in regexs used with acls
translation update
translation update
translation update
translation update
Fix handling of the legacy subscription action name
remove obsolete opera handling and session closing
remove no longer used ajax.php
fix proxy CONNECT where HTTP 1.1 answer is given
...
Diffstat (limited to 'inc/parser/lexer.php')
-rw-r--r-- | inc/parser/lexer.php | 414 |
1 files changed, 209 insertions, 205 deletions
diff --git a/inc/parser/lexer.php b/inc/parser/lexer.php index b5bcb9612..2e84eca7c 100644 --- a/inc/parser/lexer.php +++ b/inc/parser/lexer.php @@ -1,18 +1,18 @@ <?php /** -* Author Markus Baker: http://www.lastcraft.com -* Version adapted from Simple Test: http://sourceforge.net/projects/simpletest/ -* For an intro to the Lexer see: -* http://www.phppatterns.com/index.php/article/articleview/106/1/2/ -* @author Marcus Baker -* @package Doku -* @subpackage Lexer -* @version $Id: lexer.php,v 1.1 2005/03/23 23:14:09 harryf Exp $ -*/ + * Author Markus Baker: http://www.lastcraft.com + * Version adapted from Simple Test: http://sourceforge.net/projects/simpletest/ + * For an intro to the Lexer see: + * http://www.phppatterns.com/index.php/article/articleview/106/1/2/ + * @author Marcus Baker + * @package Doku + * @subpackage Lexer + * @version $Id: lexer.php,v 1.1 2005/03/23 23:14:09 harryf Exp $ + */ /** -* Init path constant -*/ + * Init path constant + */ if(!defined('DOKU_INC')) die('meh.'); /**#@+ @@ -26,11 +26,12 @@ define("DOKU_LEXER_SPECIAL", 5); /**#@-*/ /** - * Compounded regular expression. Any of - * the contained patterns could match and - * when one does it's label is returned. - * @package Doku - * @subpackage Lexer + * Compounded regular expression. Any of + * the contained patterns could match and + * when one does it's label is returned. + * + * @package Doku + * @subpackage Lexer */ class Doku_LexerParallelRegex { var $_patterns; @@ -39,10 +40,11 @@ class Doku_LexerParallelRegex { var $_case; /** - * Constructor. Starts with no patterns. - * @param boolean $case True for case sensitive, false - * for insensitive. - * @access public + * Constructor. Starts with no patterns. + * + * @param boolean $case True for case sensitive, false + * for insensitive. + * @access public */ function Doku_LexerParallelRegex($case) { $this->_case = $case; @@ -52,15 +54,16 @@ class Doku_LexerParallelRegex { } /** - * Adds a pattern with an optional label. - * @param mixed $pattern Perl style regex. Must be UTF-8 - * encoded. If its a string, the (, ) - * lose their meaning unless they - * form part of a lookahead or - * lookbehind assertation. - * @param string $label Label of regex to be returned - * on a match. Label must be ASCII - * @access public + * Adds a pattern with an optional label. + * + * @param mixed $pattern Perl style regex. Must be UTF-8 + * encoded. If its a string, the (, ) + * lose their meaning unless they + * form part of a lookahead or + * lookbehind assertation. + * @param string $label Label of regex to be returned + * on a match. Label must be ASCII + * @access public */ function addPattern($pattern, $label = true) { $count = count($this->_patterns); @@ -70,13 +73,13 @@ class Doku_LexerParallelRegex { } /** - * Attempts to match all patterns at once against - * a string. - * @param string $subject String to match against. - * @param string $match First matched portion of - * subject. - * @return boolean True on success. - * @access public + * Attempts to match all patterns at once against a string. + * + * @param string $subject String to match against. + * @param string $match First matched portion of + * subject. + * @return boolean True on success. + * @access public */ function match($subject, &$match) { if (count($this->_patterns) == 0) { @@ -98,14 +101,14 @@ class Doku_LexerParallelRegex { } /** - * Attempts to split the string against all patterns at once + * Attempts to split the string against all patterns at once * - * @param string $subject String to match against. - * @param array $split The split result: array containing, pre-match, match & post-match strings - * @return boolean True on success. - * @access public + * @param string $subject String to match against. + * @param array $split The split result: array containing, pre-match, match & post-match strings + * @return boolean True on success. + * @access public * - * @author Christopher Smith <chris@jalakai.co.uk> + * @author Christopher Smith <chris@jalakai.co.uk> */ function split($subject, &$split) { if (count($this->_patterns) == 0) { @@ -143,12 +146,13 @@ class Doku_LexerParallelRegex { } /** - * Compounds the patterns into a single - * regular expression separated with the - * "or" operator. Caches the regex. - * Will automatically escape (, ) and / tokens. - * @param array $patterns List of patterns in order. - * @access private + * Compounds the patterns into a single + * regular expression separated with the + * "or" operator. Caches the regex. + * Will automatically escape (, ) and / tokens. + * + * @param array $patterns List of patterns in order. + * @access private */ function _getCompoundedRegex() { if ($this->_regex == null) { @@ -176,25 +180,25 @@ class Doku_LexerParallelRegex { */ switch($elt) { - case '(': - $pattern .= '\('; - break; - case ')': - if ($level > 0) - $level--; /* closing (? */ - else - $pattern .= '\\'; - $pattern .= ')'; - break; - case '(?': - $level++; - $pattern .= '(?'; - break; - default: - if (substr($elt, 0, 1) == '\\') - $pattern .= $elt; - else - $pattern .= str_replace('/', '\/', $elt); + case '(': + $pattern .= '\('; + break; + case ')': + if ($level > 0) + $level--; /* closing (? */ + else + $pattern .= '\\'; + $pattern .= ')'; + break; + case '(?': + $level++; + $pattern .= '(?'; + break; + default: + if (substr($elt, 0, 1) == '\\') + $pattern .= $elt; + else + $pattern .= str_replace('/', '\/', $elt); } } $this->_patterns[$i] = "($pattern)"; @@ -205,9 +209,9 @@ class Doku_LexerParallelRegex { } /** - * Accessor for perl regex mode flags to use. - * @return string Perl regex flags. - * @access private + * Accessor for perl regex mode flags to use. + * @return string Perl regex flags. + * @access private */ function _getPerlMatchingFlags() { return ($this->_case ? "msS" : "msSi"); @@ -215,47 +219,47 @@ class Doku_LexerParallelRegex { } /** - * States for a stack machine. - * @package Lexer - * @subpackage Lexer + * States for a stack machine. + * @package Lexer + * @subpackage Lexer */ class Doku_LexerStateStack { var $_stack; /** - * Constructor. Starts in named state. - * @param string $start Starting state name. - * @access public + * Constructor. Starts in named state. + * @param string $start Starting state name. + * @access public */ function Doku_LexerStateStack($start) { $this->_stack = array($start); } /** - * Accessor for current state. - * @return string State. - * @access public + * Accessor for current state. + * @return string State. + * @access public */ function getCurrent() { return $this->_stack[count($this->_stack) - 1]; } /** - * Adds a state to the stack and sets it - * to be the current state. - * @param string $state New state. - * @access public + * Adds a state to the stack and sets it + * to be the current state. + * @param string $state New state. + * @access public */ function enter($state) { array_push($this->_stack, $state); } /** - * Leaves the current state and reverts - * to the previous one. - * @return boolean False if we drop off - * the bottom of the list. - * @access public + * Leaves the current state and reverts + * to the previous one. + * @return boolean False if we drop off + * the bottom of the list. + * @access public */ function leave() { if (count($this->_stack) == 1) { @@ -267,13 +271,13 @@ class Doku_LexerStateStack { } /** - * Accepts text and breaks it into tokens. - * Some optimisation to make the sure the - * content is only scanned by the PHP regex - * parser once. Lexer modes must not start - * with leading underscores. - * @package Doku - * @subpackage Lexer + * Accepts text and breaks it into tokens. + * Some optimisation to make the sure the + * content is only scanned by the PHP regex + * parser once. Lexer modes must not start + * with leading underscores. + * @package Doku + * @subpackage Lexer */ class Doku_Lexer { var $_regexes; @@ -283,13 +287,13 @@ class Doku_Lexer { var $_case; /** - * Sets up the lexer in case insensitive matching - * by default. - * @param Doku_Parser $parser Handling strategy by - * reference. - * @param string $start Starting handler. - * @param boolean $case True for case sensitive. - * @access public + * Sets up the lexer in case insensitive matching + * by default. + * @param Doku_Parser $parser Handling strategy by + * reference. + * @param string $start Starting handler. + * @param boolean $case True for case sensitive. + * @access public */ function Doku_Lexer(&$parser, $start = "accept", $case = false) { $this->_case = $case; @@ -300,15 +304,15 @@ class Doku_Lexer { } /** - * Adds a token search pattern for a particular - * parsing mode. The pattern does not change the - * current mode. - * @param string $pattern Perl style regex, but ( and ) - * lose the usual meaning. - * @param string $mode Should only apply this - * pattern when dealing with - * this type of input. - * @access public + * Adds a token search pattern for a particular + * parsing mode. The pattern does not change the + * current mode. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @access public */ function addPattern($pattern, $mode = "accept") { if (! isset($this->_regexes[$mode])) { @@ -318,17 +322,17 @@ class Doku_Lexer { } /** - * Adds a pattern that will enter a new parsing - * mode. Useful for entering parenthesis, strings, - * tags, etc. - * @param string $pattern Perl style regex, but ( and ) - * lose the usual meaning. - * @param string $mode Should only apply this - * pattern when dealing with - * this type of input. - * @param string $new_mode Change parsing to this new - * nested mode. - * @access public + * Adds a pattern that will enter a new parsing + * mode. Useful for entering parenthesis, strings, + * tags, etc. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @param string $new_mode Change parsing to this new + * nested mode. + * @access public */ function addEntryPattern($pattern, $mode, $new_mode) { if (! isset($this->_regexes[$mode])) { @@ -338,12 +342,12 @@ class Doku_Lexer { } /** - * Adds a pattern that will exit the current mode - * and re-enter the previous one. - * @param string $pattern Perl style regex, but ( and ) - * lose the usual meaning. - * @param string $mode Mode to leave. - * @access public + * Adds a pattern that will exit the current mode + * and re-enter the previous one. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Mode to leave. + * @access public */ function addExitPattern($pattern, $mode) { if (! isset($this->_regexes[$mode])) { @@ -353,16 +357,16 @@ class Doku_Lexer { } /** - * Adds a pattern that has a special mode. Acts as an entry - * and exit pattern in one go, effectively calling a special - * parser handler for this token only. - * @param string $pattern Perl style regex, but ( and ) - * lose the usual meaning. - * @param string $mode Should only apply this - * pattern when dealing with - * this type of input. - * @param string $special Use this mode for this one token. - * @access public + * Adds a pattern that has a special mode. Acts as an entry + * and exit pattern in one go, effectively calling a special + * parser handler for this token only. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @param string $special Use this mode for this one token. + * @access public */ function addSpecialPattern($pattern, $mode, $special) { if (! isset($this->_regexes[$mode])) { @@ -372,24 +376,24 @@ class Doku_Lexer { } /** - * Adds a mapping from a mode to another handler. - * @param string $mode Mode to be remapped. - * @param string $handler New target handler. - * @access public + * Adds a mapping from a mode to another handler. + * @param string $mode Mode to be remapped. + * @param string $handler New target handler. + * @access public */ function mapHandler($mode, $handler) { $this->_mode_handlers[$mode] = $handler; } /** - * Splits the page text into tokens. Will fail - * if the handlers report an error or if no - * content is consumed. If successful then each - * unparsed and parsed token invokes a call to the - * held listener. - * @param string $raw Raw HTML text. - * @return boolean True on success, else false. - * @access public + * Splits the page text into tokens. Will fail + * if the handlers report an error or if no + * content is consumed. If successful then each + * unparsed and parsed token invokes a call to the + * held listener. + * @param string $raw Raw HTML text. + * @return boolean True on success, else false. + * @access public */ function parse($raw) { if (! isset($this->_parser)) { @@ -418,18 +422,18 @@ class Doku_Lexer { } /** - * Sends the matched token and any leading unmatched - * text to the parser changing the lexer to a new - * mode if one is listed. - * @param string $unmatched Unmatched leading portion. - * @param string $matched Actual token match. - * @param string $mode Mode after match. A boolean - * false mode causes no change. - * @param int $pos Current byte index location in raw doc - * thats being parsed - * @return boolean False if there was any error - * from the parser. - * @access private + * Sends the matched token and any leading unmatched + * text to the parser changing the lexer to a new + * mode if one is listed. + * @param string $unmatched Unmatched leading portion. + * @param string $matched Actual token match. + * @param string $mode Mode after match. A boolean + * false mode causes no change. + * @param int $pos Current byte index location in raw doc + * thats being parsed + * @return boolean False if there was any error + * from the parser. + * @access private */ function _dispatchTokens($unmatched, $matched, $mode = false, $initialPos, $matchPos) { if (! $this->_invokeParser($unmatched, DOKU_LEXER_UNMATCHED, $initialPos) ){ @@ -456,50 +460,50 @@ class Doku_Lexer { } /** - * Tests to see if the new mode is actually to leave - * the current mode and pop an item from the matching - * mode stack. - * @param string $mode Mode to test. - * @return boolean True if this is the exit mode. - * @access private + * Tests to see if the new mode is actually to leave + * the current mode and pop an item from the matching + * mode stack. + * @param string $mode Mode to test. + * @return boolean True if this is the exit mode. + * @access private */ function _isModeEnd($mode) { return ($mode === "__exit"); } /** - * Test to see if the mode is one where this mode - * is entered for this token only and automatically - * leaves immediately afterwoods. - * @param string $mode Mode to test. - * @return boolean True if this is the exit mode. - * @access private + * Test to see if the mode is one where this mode + * is entered for this token only and automatically + * leaves immediately afterwoods. + * @param string $mode Mode to test. + * @return boolean True if this is the exit mode. + * @access private */ function _isSpecialMode($mode) { return (strncmp($mode, "_", 1) == 0); } /** - * Strips the magic underscore marking single token - * modes. - * @param string $mode Mode to decode. - * @return string Underlying mode name. - * @access private + * Strips the magic underscore marking single token + * modes. + * @param string $mode Mode to decode. + * @return string Underlying mode name. + * @access private */ function _decodeSpecial($mode) { return substr($mode, 1); } /** - * Calls the parser method named after the current - * mode. Empty content will be ignored. The lexer - * has a parser handler for each mode in the lexer. - * @param string $content Text parsed. - * @param boolean $is_match Token is recognised rather - * than unparsed data. - * @param int $pos Current byte index location in raw doc - * thats being parsed - * @access private + * Calls the parser method named after the current + * mode. Empty content will be ignored. The lexer + * has a parser handler for each mode in the lexer. + * @param string $content Text parsed. + * @param boolean $is_match Token is recognised rather + * than unparsed data. + * @param int $pos Current byte index location in raw doc + * thats being parsed + * @access private */ function _invokeParser($content, $is_match, $pos) { if (($content === "") || ($content === false)) { @@ -513,26 +517,26 @@ class Doku_Lexer { // modes starting with plugin_ are all handled by the same // handler but with an additional parameter if(substr($handler,0,7)=='plugin_'){ - list($handler,$plugin) = explode('_',$handler,2); - return $this->_parser->$handler($content, $is_match, $pos, $plugin); + list($handler,$plugin) = explode('_',$handler,2); + return $this->_parser->$handler($content, $is_match, $pos, $plugin); } return $this->_parser->$handler($content, $is_match, $pos); } /** - * Tries to match a chunk of text and if successful - * removes the recognised chunk and any leading - * unparsed data. Empty strings will not be matched. - * @param string $raw The subject to parse. This is the - * content that will be eaten. - * @return array Three item list of unparsed - * content followed by the - * recognised token and finally the - * action the parser is to take. - * True if no match, false if there - * is a parsing error. - * @access private + * Tries to match a chunk of text and if successful + * removes the recognised chunk and any leading + * unparsed data. Empty strings will not be matched. + * @param string $raw The subject to parse. This is the + * content that will be eaten. + * @return array Three item list of unparsed + * content followed by the + * recognised token and finally the + * action the parser is to take. + * True if no match, false if there + * is a parsing error. + * @access private */ function _reduce(&$raw) { if (! isset($this->_regexes[$this->_mode->getCurrent()])) { @@ -550,9 +554,9 @@ class Doku_Lexer { } /** -* Escapes regex characters other than (, ) and / -* @TODO -*/ + * Escapes regex characters other than (, ) and / + * @TODO + */ function Doku_Lexer_Escape($str) { //$str = addslashes($str); $chars = array( |