diff options
author | andi <andi@splitbrain.org> | 2005-03-31 16:57:49 +0200 |
---|---|---|
committer | andi <andi@splitbrain.org> | 2005-03-31 16:57:49 +0200 |
commit | 0cecf9d507451346a32ddf45a85b425784fbb0f8 (patch) | |
tree | 076dd2d128f55022792b4bfab42c1d2d2bec4fb8 /inc | |
parent | c53ea5f2d3d1019fd4e1956796bc329af499a86d (diff) | |
download | rpg-0cecf9d507451346a32ddf45a85b425784fbb0f8.tar.gz rpg-0cecf9d507451346a32ddf45a85b425784fbb0f8.tar.bz2 |
new parser added (define DOKU_USENEWPARSER to use it)
darcs-hash:20050331145749-9977f-f011ea6c65a278197e9087b685c635c60a204cd2.gz
Diffstat (limited to 'inc')
-rw-r--r-- | inc/common.php | 9 | ||||
-rw-r--r-- | inc/parser/action.php | 83 | ||||
-rw-r--r-- | inc/parser/handler.php | 1510 | ||||
-rw-r--r-- | inc/parser/lexer.php | 555 | ||||
-rw-r--r-- | inc/parser/parser.php | 828 | ||||
-rw-r--r-- | inc/parser/renderer.php | 169 | ||||
-rw-r--r-- | inc/parser/spamcheck.php | 72 | ||||
-rw-r--r-- | inc/parser/xhtml.php | 816 |
8 files changed, 4041 insertions, 1 deletions
diff --git a/inc/common.php b/inc/common.php index 62794d834..ff3fd847c 100644 --- a/inc/common.php +++ b/inc/common.php @@ -506,11 +506,18 @@ function parsedWiki($id,$rev='',$excuse=true){ } }else{ if(@file_exists($file)){ - $ret = io_cacheParse($file); + if(!defined('DOKU_USENEWPARSER')){ + $ret = io_cacheParse($file); + }else{ + msg('using new parser'); + require_once(DOKU_INC.'inc/parser/action.php'); + $ret = render_as_xhtml(parse_to_instructions(io_readFile($file))); + } }elseif($excuse){ $ret = parsedLocale('newpage'); } } + return $ret; } diff --git a/inc/parser/action.php b/inc/parser/action.php new file mode 100644 index 000000000..34d71d3ae --- /dev/null +++ b/inc/parser/action.php @@ -0,0 +1,83 @@ +<?php +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); + +function parse_to_instructions($text){ + require_once DOKU_INC . 'inc/parser/parser.php'; + + // Create the parser + $Parser = & new Doku_Parser(); + + // Add the Handler + $Parser->Handler = & new Doku_Handler(); + + // Load all the modes + $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock()); + $Parser->addMode('preformatted',new Doku_Parser_Mode_Preformatted()); + $Parser->addMode('notoc',new Doku_Parser_Mode_NoToc()); + $Parser->addMode('header',new Doku_Parser_Mode_Header()); + $Parser->addMode('table',new Doku_Parser_Mode_Table()); + + $formats = array ( + 'strong', 'emphasis', 'underline', 'monospace', + 'subscript', 'superscript', 'deleted', + ); + foreach ( $formats as $format ) { + $Parser->addMode($format,new Doku_Parser_Mode_Formatting($format)); + } + + $Parser->addMode('linebreak',new Doku_Parser_Mode_Linebreak()); + $Parser->addMode('footnote',new Doku_Parser_Mode_Footnote()); + $Parser->addMode('hr',new Doku_Parser_Mode_HR()); + + $Parser->addMode('unformatted',new Doku_Parser_Mode_Unformatted()); + $Parser->addMode('php',new Doku_Parser_Mode_PHP()); + $Parser->addMode('html',new Doku_Parser_Mode_HTML()); + $Parser->addMode('code',new Doku_Parser_Mode_Code()); + $Parser->addMode('file',new Doku_Parser_Mode_File()); + $Parser->addMode('quote',new Doku_Parser_Mode_Quote()); + + // FIXME These need data files... + #$Parser->addMode('acronym',new Doku_Parser_Mode_Acronym(array_keys(getAcronyms()))); + #$Parser->addMode('wordblock',new Doku_Parser_Mode_Wordblock(getBadWords())); + #$Parser->addMode('smiley',new Doku_Parser_Mode_Smiley(array_keys(getSmileys()))); + #$Parser->addMode('entity',new Doku_Parser_Mode_Entity(array_keys(getEntities()))); + + $Parser->addMode('multiplyentity',new Doku_Parser_Mode_MultiplyEntity()); + $Parser->addMode('quotes',new Doku_Parser_Mode_Quotes()); + + $Parser->addMode('camelcaselink',new Doku_Parser_Mode_CamelCaseLink()); + $Parser->addMode('internallink',new Doku_Parser_Mode_InternalLink()); + $Parser->addMode('media',new Doku_Parser_Mode_Media()); + $Parser->addMode('externallink',new Doku_Parser_Mode_ExternalLink()); + $Parser->addMode('email',new Doku_Parser_Mode_Email()); + $Parser->addMode('windowssharelink',new Doku_Parser_Mode_WindowsShareLink()); + $Parser->addMode('filelink',new Doku_Parser_Mode_FileLink()); + $Parser->addMode('eol',new Doku_Parser_Mode_Eol()); + + // Do the parsing + return $Parser->parse($text); +} + +function render_as_xhtml($instructions){ + + // Create the renderer + require_once DOKU_INC . 'inc/parser/xhtml.php'; + $Renderer = & new Doku_Renderer_XHTML(); + + //FIXME add data + #$Renderer->smileys = getSmileys(); + #$Renderer->entities = getEntities(); + #$Renderer->acronyms = getAcronyms(); + #$Renderer->interwiki = getInterwiki(); + #$Renderer->badwords = getBadWords(); + + // Loop through the instructions + foreach ( $instructions as $instruction ) { + // Execute the callback against the Renderer + call_user_func_array(array(&$Renderer, $instruction[0]),$instruction[1]); + } + // Return the output + return $Renderer->doc; +} + +?> diff --git a/inc/parser/handler.php b/inc/parser/handler.php new file mode 100644 index 000000000..78826007e --- /dev/null +++ b/inc/parser/handler.php @@ -0,0 +1,1510 @@ +<?php +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); + +class Doku_Handler { + + var $Renderer = NULL; + + var $CallWriter = NULL; + + var $calls = array(); + + var $meta = array( + 'section' => FALSE, + 'toc' => TRUE, + ); + + function Doku_Handler() { + $this->CallWriter = & new Doku_Handler_CallWriter($this); + } + + function __addCall($handler, $args, $pos) { + $call = array($handler,$args, $pos); + $this->CallWriter->writeCall($call); + } + + function __finalize(){ + if ( $this->meta['section'] ) { + $S = & new Doku_Handler_Section(); + $this->calls = $S->process($this->calls); + } + + $B = & new Doku_Handler_Block(); + $this->calls = $B->process($this->calls); + + if ( $this->meta['toc'] ) { + $T = & new Doku_Handler_Toc(); + $this->calls = $T->process($this->calls); + } + + array_unshift($this->calls,array('document_start',array(),0)); + $last_call = end($this->calls); + array_push($this->calls,array('document_end',array(),$last_call[2])); + } + + function fetch() { + $call = each($this->calls); + if ( $call ) { + return $call['value']; + } + return FALSE; + } + + function base($match, $state, $pos) { + switch ( $state ) { + case DOKU_LEXER_UNMATCHED: + $this->__addCall('cdata',array($match), $pos); + return TRUE; + break; + + } + } + + function header($match, $state, $pos) { + $match = trim($match); + $levels = array( + '======'=>1, + '====='=>2, + '===='=>3, + '==='=>4, + '=='=>5, + ); + $hsplit = preg_split( '/(={2,})/u', $match,-1, + PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); + + // Locate the level - default to level 1 if no match (title contains == signs) + if ( isset($hsplit[0]) && array_key_exists($hsplit[0], $levels) ) { + $level = $levels[$hsplit[0]]; + } else { + $level = 1; + } + + // Strip of the marker for the header, based on the level - the rest is the title + $iLevels = array_flip($levels); + $markerLen = strlen($iLevels[$level]); + $title = substr($match, $markerLen, strlen($match)-($markerLen*2)); + + $this->__addCall('header',array($title,$level), $pos); + $this->meta['section'] = TRUE; + return TRUE; + } + + function notoc($match, $state, $pos) { + $this->meta['toc'] = FALSE; + return TRUE; + } + + function linebreak($match, $state, $pos) { + $this->__addCall('linebreak',array(),$pos); + return TRUE; + } + + function eol($match, $state, $pos) { + $this->__addCall('eol',array(),$pos); + return TRUE; + } + + function hr($match, $state, $pos) { + $this->__addCall('hr',array(),$pos); + return TRUE; + } + + function __nestingTag($match, $state, $pos, $name) { + switch ( $state ) { + case DOKU_LEXER_ENTER: + $this->__addCall($name.'_open', array(), $pos); + break; + case DOKU_LEXER_EXIT: + $this->__addCall($name.'_close', array(), $pos); + break; + case DOKU_LEXER_UNMATCHED: + $this->__addCall('cdata',array($match), $pos); + break; + } + } + + function strong($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'strong'); + return TRUE; + } + + function emphasis($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'emphasis'); + return TRUE; + } + + function underline($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'underline'); + return TRUE; + } + + function monospace($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'monospace'); + return TRUE; + } + + function subscript($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'subscript'); + return TRUE; + } + + function superscript($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'superscript'); + return TRUE; + } + + function deleted($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'deleted'); + return TRUE; + } + + + function footnote($match, $state, $pos) { + $this->__nestingTag($match, $state, $pos, 'footnote'); + return TRUE; + } + + function listblock($match, $state, $pos) { + switch ( $state ) { + case DOKU_LEXER_ENTER: + $ReWriter = & new Doku_Handler_List($this->CallWriter); + $this->CallWriter = & $ReWriter; + $this->__addCall('list_open', array($match), $pos); + break; + case DOKU_LEXER_EXIT: + $this->__addCall('list_close', array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + case DOKU_LEXER_MATCHED: + $this->__addCall('list_item', array($match), $pos); + break; + case DOKU_LEXER_UNMATCHED: + $this->__addCall('cdata', array($match), $pos); + break; + } + return TRUE; + } + + function unformatted($match, $state, $pos) { + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->__addCall('unformatted',array($match), $pos); + } + return TRUE; + } + + function php($match, $state, $pos) { + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->__addCall('php',array($match), $pos); + } + return TRUE; + } + + function html($match, $state, $pos) { + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->__addCall('html',array($match), $pos); + } + return TRUE; + } + + function preformatted($match, $state, $pos) { + switch ( $state ) { + case DOKU_LEXER_ENTER: + $ReWriter = & new Doku_Handler_Preformatted($this->CallWriter); + $this->CallWriter = & $ReWriter; + $this->__addCall('preformatted_start',array(), $pos); + break; + case DOKU_LEXER_EXIT: + $this->__addCall('preformatted_end',array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + case DOKU_LEXER_MATCHED: + $this->__addCall('preformatted_newline',array(), $pos); + break; + case DOKU_LEXER_UNMATCHED: + $this->__addCall('preformatted_content',array($match), $pos); + break; + } + + return TRUE; + } + + function file($match, $state, $pos) { + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->__addCall('file',array($match), $pos); + } + return TRUE; + } + + function quote($match, $state, $pos) { + + switch ( $state ) { + + case DOKU_LEXER_ENTER: + $ReWriter = & new Doku_Handler_Quote($this->CallWriter); + $this->CallWriter = & $ReWriter; + $this->__addCall('quote_start',array($match), $pos); + break; + + case DOKU_LEXER_EXIT: + $this->__addCall('quote_end',array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + + case DOKU_LEXER_MATCHED: + $this->__addCall('quote_newline',array($match), $pos); + break; + + case DOKU_LEXER_UNMATCHED: + $this->__addCall('cdata',array($match), $pos); + break; + + } + + return TRUE; + } + + function code($match, $state, $pos) { + switch ( $state ) { + case DOKU_LEXER_UNMATCHED: + $matches = preg_split('/>/u',$match,2); + $matches[0] = trim($matches[0]); + if ( trim($matches[0]) == '' ) { + $matches[0] = NULL; + } + # $matches[0] contains name of programming language + # if available + $this->__addCall( + 'code', + array($matches[1],$matches[0]), + $pos + ); + break; + } + return TRUE; + } + + function acronym($match, $state, $pos) { + $this->__addCall('acronym',array($match), $pos); + return TRUE; + } + + function smiley($match, $state, $pos) { + $this->__addCall('smiley',array($match), $pos); + return TRUE; + } + + function wordblock($match, $state, $pos) { + $this->__addCall('wordblock',array($match), $pos); + return TRUE; + } + + function entity($match, $state, $pos) { + $this->__addCall('entity',array($match), $pos); + return TRUE; + } + + function multiplyentity($match, $state, $pos) { + preg_match_all('/\d+/',$match,$matches); + $this->__addCall('multiplyentity',array($matches[0][0],$matches[0][1]), $pos); + return TRUE; + } + + function singlequoteopening($match, $state, $pos) { + $this->__addCall('singlequoteopening',array(), $pos); + return TRUE; + } + + function singlequoteclosing($match, $state, $pos) { + $this->__addCall('singlequoteclosing',array(), $pos); + return TRUE; + } + + function doublequoteopening($match, $state, $pos) { + $this->__addCall('doublequoteopening',array(), $pos); + return TRUE; + } + + function doublequoteclosing($match, $state, $pos) { + $this->__addCall('doublequoteclosing',array(), $pos); + return TRUE; + } + + function camelcaselink($match, $state, $pos) { + $this->__addCall('camelcaselink',array($match), $pos); + return TRUE; + } + + /* + * @TODO What about email? + */ + function internallink($match, $state, $pos) { + // Strip the opening and closing markup + $link = preg_replace(array('/^\[\[/','/\]\]$/u'),'',$match); + + // Split title from URL + $link = preg_split('/\|/u',$link,2); + if ( !isset($link[1]) ) { + $link[1] = NULL; + + // If the title is an image, convert it to an array containing the image details + } else if ( preg_match('/^\{\{[^\}]+\}\}$/',$link[1]) ) { + $media = Doku_Handler_Parse_Media($link[1]); + + // Check it's really an image, not a link to a file + if ( !strpos($media['type'], 'link') ) { + $link[1] = $media; + } + } + + // Interwiki links + if ( preg_match('/^[a-zA-Z]+>{1}[\w()\/\\#~:.?+=&%@!\-;,]+$/u',$link[0]) ) { + $interwiki = preg_split('/>/u',$link[0]); + $this->__addCall( + 'interwikilink', + // UTF8 problem... + array($link[0],$link[1],strtolower($interwiki[0]),$interwiki[1]), + $pos + ); + + // Link to internal wiki page + } else if ( preg_match('/^[\w:#]+$/u',$link[0]) ) { + $this->__addCall( + 'internallink', + array($link[0],$link[1]), + $pos + ); + + // Otherwise it's some kind of link to elsewhere + } else { + + // file:// + if ( substr($link[0],0,4) == 'file' ) { + $this->__addCall( + 'filelink', + array($link[0],$link[1]), + $pos + ); + + // Check for Windows shares - potential security issues here? + // i.e. mode not loaded but still matched here... + } else if ( preg_match('/\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link[0]) ) { + $this->__addCall( + 'windowssharelink', + array($link[0],$link[1]), + $pos + ); + + // Otherwise it's one of the other valid internal links + } else { + + // Add the scheme as needed... + $leader = substr($link[0],0,3); + if ( $leader == 'www' ) { + $link[0] = 'http://'.$link[0]; + } else if ( $leader == 'ftp' ) { + $link[0] = 'ftp://'.$link[0]; + } + + $this->__addCall( + 'externallink', + array($link[0],$link[1]), + $pos + ); + } + } + return TRUE; + } + + function filelink($match, $state, $pos) { + $this->__addCall('filelink',array($match, NULL), $pos); + return TRUE; + } + + function windowssharelink($match, $state, $pos) { + $this->__addCall('windowssharelink',array($match, NULL), $pos); + return TRUE; + } + + function media($match, $state, $pos) { + $p = Doku_Handler_Parse_Media($match); + + // If it's not an image, build a normal link + if ( strpos($p['type'], 'link') ) { + $this->__addCall($p['type'],array($p['src'], $p['title']), $pos); + } else { + $this->__addCall( + $p['type'], + array($p['src'], $p['title'], $p['align'], $p['width'], $p['height'], $p['cache']), + $pos + ); + } + return TRUE; + } + + function externallink($match, $state, $pos) { + // Prevent use of multibyte strings in URLs + // See: http://www.boingboing.net/2005/02/06/shmoo_group_exploit_.html + // Not worried about other charsets so long as page is output as UTF-8 + /*if ( strlen($match) != utf8_strlen($match) ) { + $this->__addCall('cdata',array($match), $pos); + } else {*/ + + $this->__addCall('externallink',array($match, NULL), $pos); + //} + return TRUE; + } + + function email($match, $state, $pos) { + $email = preg_replace(array('/^</','/>$/'),'',$match); + $this->__addCall('email',array($email, NULL), $pos); + return TRUE; + } + + function table($match, $state, $pos) { + switch ( $state ) { + + case DOKU_LEXER_ENTER: + + $ReWriter = & new Doku_Handler_Table($this->CallWriter); + $this->CallWriter = & $ReWriter; + + $this->__addCall('table_start', array(), $pos); + //$this->__addCall('table_row', array(), $pos); + if ( trim($match) == '^' ) { + $this->__addCall('tableheader', array(), $pos); + } else { + $this->__addCall('tablecell', array(), $pos); + } + break; + + case DOKU_LEXER_EXIT: + $this->__addCall('table_end', array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + + case DOKU_LEXER_UNMATCHED: + if ( trim($match) != '' ) { + $this->__addCall('cdata',array($match), $pos); + } + break; + + case DOKU_LEXER_MATCHED: + if ( preg_match('/.{2}/',$match) ) { + $this->__addCall('table_align', array($match), $pos); + } else if ( $match == "\n|" ) { + $this->__addCall('table_row', array(), $pos); + $this->__addCall('tablecell', array(), $pos); + } else if ( $match == "\n^" ) { + $this->__addCall('table_row', array(), $pos); + $this->__addCall('tableheader', array(), $pos); + } else if ( $match == '|' ) { + $this->__addCall('tablecell', array(), $pos); + } else if ( $match == '^' ) { + $this->__addCall('tableheader', array(), $pos); + } + break; + } + return TRUE; + } +} + +//------------------------------------------------------------------------ +function Doku_Handler_Parse_Media($match) { + + // Strip the opening and closing markup + $link = preg_replace(array('/^\{\{/','/\}\}$/u'),'',$match); + + // Split title from URL + $link = preg_split('/\|/u',$link,2); + + + // Check alignment + $ralign = (bool)preg_match('/^ /',$link[0]); + $lalign = (bool)preg_match('/ $/',$link[0]); + + // Logic = what's that ;)... + if ( $lalign & $ralign ) { + $align = 'center'; + } else if ( $ralign ) { + $align = 'right'; + } else if ( $lalign ) { + $align = 'left'; + } else { + $align = NULL; + } + + // The title... + if ( !isset($link[1]) ) { + $link[1] = NULL; + } + + // img src url from params + // What if it's an external image where URL contains '?' char? + $src = preg_split('/\?/u',$link[0],2); + + // Strip any alignment whitespace + $src[0] = trim($src[0]); + + // Check for width, height and caching params + if ( isset($src[1]) ) { + + if(preg_match('#(\d*)(x(\d*))?#i',$src[1],$matches)){ + + if(isset($matches[1])) { + $width = $matches[1]; + } else { + $width = NULL; + } + + if(isset($matches[3])) { + $height = $matches[3]; + } else { + $height = NULL; + } + + $cache = !(bool)preg_match('/nocache/i',$src[1]); + } + + } else { + $width = NULL; + $height = NULL; + $cache = TRUE; + } + + // Check whether this is a local or remote image + if ( substr($src[0],0,4) == 'http' ) { + $call = 'external'; + } else { + $call = 'internal'; + } + + // Check this is actually an image... + if ( !preg_match('/\.(gif|png|jpe?g)$/',$src[0] ) ) { + // Security implications?... + $call .= 'link'; + } else { + $call .= 'media'; + } + + $params = array( + 'type'=>$call, + 'src'=>$src[0], + 'title'=>$link[1], + 'align'=>$align, + 'width'=>$width, + 'height'=>$height, + 'cache'=>$cache, + ); + + return $params; +} + +//------------------------------------------------------------------------ +class Doku_Handler_CallWriter { + + var $Handler; + + function Doku_Handler_CallWriter(& $Handler) { + $this->Handler = & $Handler; + } + + function writeCall($call) { + $this->Handler->calls[] = $call; + } + + function writeCalls($calls) { + $this->Handler->calls = array_merge($this->Handler->calls, $calls); + } +} + +//------------------------------------------------------------------------ +class Doku_Handler_List { + + var $CallWriter; + + var $calls = array(); + var $listCalls = array(); + var $listStack = array(); + + function Doku_Handler_List(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); + $this->CallWriter->writeCalls($this->calls); + } + + //------------------------------------------------------------------------ + function process() { + foreach ( $this->calls as $call ) { + switch ($call[0]) { + case 'list_item': + $this->listOpen($call); + break; + case 'list_open': + $this->listStart($call); + break; + case 'list_close': + $this->listEnd($call); + break; + default: + $this->listContent($call); + break; + } + } + + $this->CallWriter->writeCalls($this->listCalls); + } + + //------------------------------------------------------------------------ + function listStart($call) { + $depth = $this->interpretSyntax($call[1][0], $listType); + + $this->initialDepth = $depth; + $this->listStack[] = array($listType, $depth); + + $this->listCalls[] = array('list'.$listType.'_open',array(),$call[2]); + $this->listCalls[] = array('listitem_open',array(1),$call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + } + + //------------------------------------------------------------------------ + function listEnd($call) { + $closeContent = TRUE; + + while ( $list = array_pop($this->listStack) ) { + if ( $closeContent ) { + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $closeContent = FALSE; + } + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$list[0].'_close', array(), $call[2]); + } + } + + //------------------------------------------------------------------------ + function listOpen($call) { + $depth = $this->interpretSyntax($call[1][0], $listType); + $end = end($this->listStack); + + // Not allowed to be shallower than initialDepth + if ( $depth < $this->initialDepth ) { + $depth = $this->initialDepth; + } + + //------------------------------------------------------------------------ + if ( $depth == $end[1] ) { + + // Just another item in the list... + if ( $listType == $end[0] ) { + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('listitem_open',array($depth-1),$call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + // Switched list type... + } else { + + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$end[0].'_close', array(), $call[2]); + $this->listCalls[] = array('list'.$listType.'_open', array(), $call[2]); + $this->listCalls[] = array('listitem_open', array($depth-1), $call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + array_pop($this->listStack); + $this->listStack[] = array($listType, $depth); + } + + //------------------------------------------------------------------------ + // Getting deeper... + } else if ( $depth > $end[1] ) { + + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('list'.$listType.'_open', array(), $call[2]); + $this->listCalls[] = array('listitem_open', array($depth-1), $call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + $this->listStack[] = array($listType, $depth); + + //------------------------------------------------------------------------ + // Getting shallower ( $depth < $end[1] ) + } else { + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$end[0].'_close',array(),$call[2]); + + // Throw away the end - done + array_pop($this->listStack); + + while (1) { + $end = end($this->listStack); + + if ( $end[1] <= $depth ) { + + // Normalize depths + $depth = $end[1]; + + $this->listCalls[] = array('listitem_close',array(),$call[2]); + + if ( $end[0] == $listType ) { + $this->listCalls[] = array('listitem_open',array($depth-1),$call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + } else { + // Switching list type... + $this->listCalls[] = array('list'.$end[0].'_close', array(), $call[2]); + $this->listCalls[] = array('list'.$listType.'_open', array(), $call[2]); + $this->listCalls[] = array('listitem_open', array($depth-1), $call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + array_pop($this->listStack); + $this->listStack[] = array($listType, $depth); + } + + break; + + // Haven't dropped down far enough yet.... ( $end[1] > $depth ) + } else { + + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$end[0].'_close',array(),$call[2]); + + array_pop($this->listStack); + + } + + } + + } + } + + //------------------------------------------------------------------------ + function listContent($call) { + $this->listCalls[] = $call; + } + + //------------------------------------------------------------------------ + function interpretSyntax($match, & $type) { + if ( substr($match,-1) == '*' ) { + $type = 'u'; + } else { + $type = 'o'; + } + return count(explode(' ',str_replace("\t",' ',$match))); + } +} + +//------------------------------------------------------------------------ +class Doku_Handler_Preformatted { + + var $CallWriter; + + var $calls = array(); + var $pos; + var $text =''; + + + + function Doku_Handler_Preformatted(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); + $this->CallWriter->writeCalls($this->calls); + } + + function process() { + foreach ( $this->calls as $call ) { + switch ($call[0]) { + case 'preformatted_start': + $this->pos = $call[2]; + break; + case 'preformatted_newline': + $this->text .= "\n"; + break; + case 'preformatted_content': + $this->text .= $call[1][0]; + break; + case 'preformatted_end': + $this->CallWriter->writeCall(array('preformatted',array($this->text),$this->pos)); + break; + } + } + } +} + +//------------------------------------------------------------------------ +class Doku_Handler_Quote { + + var $CallWriter; + + var $calls = array(); + + var $quoteCalls = array(); + + function Doku_Handler_Quote(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); + $this->CallWriter->writeCalls($this->calls); + } + + function process() { + + $quoteDepth = 1; + + foreach ( $this->calls as $call ) { + switch ($call[0]) { + + case 'quote_start': + + $this->quoteCalls[] = array('quote_open',array(),$call[2]); + + case 'quote_newline': + + $quoteLength = $this->getDepth($call[1][0]); + + if ( $quoteLength > $quoteDepth ) { + $quoteDiff = $quoteLength - $quoteDepth; + for ( $i = 1; $i <= $quoteDiff; $i++ ) { + $this->quoteCalls[] = array('quote_open',array(),$call[2]); + } + } else if ( $quoteLength < $quoteDepth ) { + $quoteDiff = $quoteDepth - $quoteLength; + for ( $i = 1; $i <= $quoteDiff; $i++ ) { + $this->quoteCalls[] = array('quote_close',array(),$call[2]); + } + } + + $quoteDepth = $quoteLength; + + break; + + case 'quote_end': + + if ( $quoteDepth > 1 ) { + $quoteDiff = $quoteDepth - 1; + for ( $i = 1; $i <= $quoteDiff; $i++ ) { + $this->quoteCalls[] = array('quote_close',array(),$call[2]); + } + } + + $this->quoteCalls[] = array('quote_close',array(),$call[2]); + + $this->CallWriter->writeCalls($this->quoteCalls); + break; + + default: + $this->quoteCalls[] = $call; + break; + } + } + } + + function getDepth($marker) { + preg_match('/>{1,}/', $marker, $matches); + $quoteLength = strlen($matches[0]); + return $quoteLength; + } +} + +//------------------------------------------------------------------------ +class Doku_Handler_Table { + + var $CallWriter; + + var $calls = array(); + var $tableCalls = array(); + var $maxCols = 0; + var $maxRows = 1; + var $currentCols = 0; + var $firstCell = FALSE; + var $lastCellType = 'tablecell'; + + function Doku_Handler_Table(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); + $this->CallWriter->writeCalls($this->calls); + } + + //------------------------------------------------------------------------ + function process() { + foreach ( $this->calls as $call ) { + switch ( $call[0] ) { + case 'table_start': + $this->tableStart($call); + break; + case 'table_row': + $this->tableRowClose(array('tablerow_close',$call[1],$call[2])); + $this->tableRowOpen(array('tablerow_open',$call[1],$call[2])); + break; + case 'tableheader': + case 'tablecell': + $this->tableCell($call); + break; + case 'table_end': + $this->tableRowClose(array('tablerow_close',$call[1],$call[2])); + $this->tableEnd($call); + break; + default: + $this->tableDefault($call); + break; + } + } + $this->CallWriter->writeCalls($this->tableCalls); + } + + function tableStart($call) { + $this->tableCalls[] = array('table_open',array(),$call[2]); + $this->tableCalls[] = array('tablerow_open',array(),$call[2]); + $this->firstCell = TRUE; + } + + function tableEnd($call) { + $this->tableCalls[] = array('table_close',array(),$call[2]); + $this->finalizeTable(); + } + + function tableRowOpen($call) { + $this->tableCalls[] = $call; + $this->currentCols = 0; + $this->firstCell = TRUE; + $this->lastCellType = 'tablecell'; + $this->maxRows++; + } + + function tableRowClose($call) { + // Strip off final cell opening and anything after it + while ( $discard = array_pop($this->tableCalls ) ) { + + if ( $discard[0] == 'tablecell_open' || $discard[0] == 'tableheader_open') { + + // Its a spanning element - put it back and close it + if ( $discard[1][0] > 1 ) { + + $this->tableCalls[] = $discard; + if ( strstr($discard[0],'cell') ) { + $name = 'tablecell'; + } else { + $name = 'tableheader'; + } + $this->tableCalls[] = array($name.'_close',array(),$call[2]); + } + + break; + } + } + $this->tableCalls[] = $call; + + if ( $this->currentCols > $this->maxCols ) { + $this->maxCols = $this->currentCols; + } + } + + function tableCell($call) { + if ( !$this->firstCell ) { + + // Increase the span + $lastCall = end($this->tableCalls); + + // A cell call which follows an open cell means an empty cell so span + if ( $lastCall[0] == 'tablecell_open' || $lastCall[0] == 'tableheader_open' ) { + $this->tableCalls[] = array('colspan',array(),$call[2]); + + } + + $this->tableCalls[] = array($this->lastCellType.'_close',array(),$call[2]); + $this->tableCalls[] = array($call[0].'_open',array(1,NULL),$call[2]); + $this->lastCellType = $call[0]; + + } else { + + $this->tableCalls[] = array($call[0].'_open',array(1,NULL),$call[2]); + $this->lastCellType = $call[0]; + $this->firstCell = FALSE; + + } + + $this->currentCols++; + } + + function tableDefault($call) { + $this->tableCalls[] = $call; + } + + function finalizeTable() { + + // Add the max cols and rows to the table opening + if ( $this->tableCalls[0][0] == 'table_open' ) { + // Adjust to num cols not num col delimeters + $this->tableCalls[0][1][] = $this->maxCols - 1; + $this->tableCalls[0][1][] = $this->maxRows; + } else { + trigger_error('First element in table call list is not table_open'); + } + + $lastRow = 0; + $lastCell = 0; + $toDelete = array(); + + // Look for the colspan elements and increment the colspan on the + // previous non-empty opening cell. Once done, delete all the cells + // that contain colspans + foreach ( $this->tableCalls as $key => $call ) { + + if ( $call[0] == 'tablerow_open' ) { + + $lastRow = $key; + + } else if ( $call[0] == 'tablecell_open' || $call[0] == 'tableheader_open' ) { + + $lastCell = $key; + + } else if ( $call[0] == 'table_align' ) { + + // If the previous element was a cell open, align right + if ( $this->tableCalls[$key-1][0] == 'tablecell_open' || $this->tableCalls[$key-1][0] == 'tableheader_open' ) { + $this->tableCalls[$key-1][1][1] = 'right'; + + // If the next element if the close of an element, align either center or left + } else if ( $this->tableCalls[$key+1][0] == 'tablecell_close' || $this->tableCalls[$key+1][0] == 'tableheader_close' ) { + if ( $this->tableCalls[$lastCell][1][1] == 'right' ) { + $this->tableCalls[$lastCell][1][1] = 'center'; + } else { + $this->tableCalls[$lastCell][1][1] = 'left'; + } + + } + + // Now convert the whitespace back to cdata + $this->tableCalls[$key][0] = 'cdata'; + + } else if ( $call[0] == 'colspan' ) { + + $this->tableCalls[$key-1][1][0] = FALSE; + + for($i = $key-2; $i > $lastRow; $i--) { + + if ( $this->tableCalls[$i][0] == 'tablecell_open' || $this->tableCalls[$i][0] == 'tableheader_open' ) { + + if ( FALSE !== $this->tableCalls[$i][1][0] ) { + $this->tableCalls[$i][1][0]++; + break; + } + + + } + } + + $toDelete[] = $key-1; + $toDelete[] = $key; + $toDelete[] = $key+1; + } + } + + foreach ( $toDelete as $delete ) { + unset($this->tableCalls[$delete]); + } + + $this->tableCalls = array_values($this->tableCalls); + } +} + +//------------------------------------------------------------------------ +class Doku_Handler_Section { + + function process($calls) { + + $sectionCalls = array(); + $inSection = FALSE; + + foreach ( $calls as $call ) { + + if ( $call[0] == 'header' ) { + + if ( $inSection ) { + $sectionCalls[] = array('section_close',array(), $call[2]); + } + + $sectionCalls[] = $call; + $sectionCalls[] = array('section_open',array($call[1][1]), $call[2]); + $inSection = TRUE; + + } else { + $sectionCalls[] = $call; + } + } + + if ( $inSection ) { + $sectionCalls[] = array('section_close',array(), $call[2]); + } + + return $sectionCalls; + } + +} + +//------------------------------------------------------------------------ +class Doku_Handler_Block { + + var $calls = array(); + + var $blockStack = array(); + + var $inParagraph = FALSE; + var $atStart = TRUE; + + // Blocks don't contain linefeeds + var $blockOpen = array( + 'header', + 'listu_open','listo_open','listitem_open', + 'table_open','tablerow_open','tablecell_open','tableheader_open', + 'quote_open', + 'section_open', // Needed to prevent p_open between header and section_open + 'code','file','php','html','hr','preformatted', + ); + + var $blockClose = array( + 'header', + 'listu_close','listo_close','listitem_close', + 'table_close','tablerow_close','tablecell_close','tableheader_close', + 'quote_close', + 'section_close', // Needed to prevent p_close after section_close + 'code','file','php','html','hr','preformatted', + ); + + // Stacks can contain linefeeds + var $stackOpen = array( + 'footnote_open','section_open', + ); + + var $stackClose = array( + 'footnote_close','section_close', + ); + + function process($calls) { + foreach ( $calls as $key => $call ) { + + // Process blocks which are stack like... (contain linefeeds) + if ( in_array($call[0],$this->stackOpen ) ) { + /* + if ( $this->atStart ) { + $this->calls[] = array('p_open',array(), $call[2]); + $this->atStart = FALSE; + $this->inParagraph = TRUE; + } + */ + $this->calls[] = $call; + + // Hack - footnotes shouldn't immediately contain a p_open + if ( $call[0] != 'footnote_open' ) { + $this->addToStack(); + } else { + $this->addToStack(FALSE); + } + continue; + } + + if ( in_array($call[0],$this->stackClose ) ) { + + if ( $this->inParagraph ) { + $this->calls[] = array('p_close',array(), $call[2]); + } + $this->calls[] = $call; + $this->removeFromStack(); + continue; + } + + if ( !$this->atStart ) { + + if ( $call[0] == 'eol' ) { + + if ( $this->inParagraph ) { + $this->calls[] = array('p_close',array(), $call[2]); + } + $this->calls[] = array('p_open',array(), $call[2]); + $this->inParagraph = TRUE; + + } else { + + $storeCall = TRUE; + + if ( $this->inParagraph && in_array($call[0], $this->blockOpen) ) { + $this->calls[] = array('p_close',array(), $call[2]); + $this->inParagraph = FALSE; + $this->calls[] = $call; + $storeCall = FALSE; + } + + if ( in_array($call[0], $this->blockClose) ) { + if ( $this->inParagraph ) { + $this->calls[] = array('p_close',array(), $call[2]); + $this->inParagraph = FALSE; + } + if ( $storeCall ) { + $this->calls[] = $call; + $storeCall = FALSE; + } + + // This really sucks and suggests this whole class sucks but... + if ( isset($calls[$key+1]) + && + !in_array($calls[$key+1][0], $this->blockOpen) + && + !in_array($calls[$key+1][0], $this->blockClose) + ) { + + $this->calls[] = array('p_open',array(), $call[2]); + $this->inParagraph = TRUE; + } + } + + if ( $storeCall ) { + $this->calls[] = $call; + } + + } + + + } else { + + // Unless there's already a block at the start, start a paragraph + if ( !in_array($call[0],$this->blockOpen) ) { + $this->calls[] = array('p_open',array(), $call[2]); + if ( $call[0] != 'eol' ) { + $this->calls[] = $call; + } + $this->atStart = FALSE; + $this->inParagraph = TRUE; + } else { + $this->calls[] = $call; + $this->atStart = FALSE; + } + + } + + } + + if ( $this->inParagraph ) { + if ( $call[0] == 'p_open' ) { + // Ditch the last call + array_pop($this->calls); + } else if ( !in_array($call[0], $this->blockClose) ) { + $this->calls[] = array('p_close',array(), $call[2]); + } else { + $last_call = array_pop($this->calls); + $this->calls[] = array('p_close',array(), $call[2]); + $this->calls[] = $last_call; + } + } + + return $this->calls; + } + + function addToStack($newStart = TRUE) { + $this->blockStack[] = array($this->atStart, $this->inParagraph); + $this->atStart = $newStart; + $this->inParagraph = FALSE; + } + + function removeFromStack() { + $state = array_pop($this->blockStack); + $this->atStart = $state[0]; + $this->inParagraph = $state[1]; + } +} +//------------------------------------------------------------------------ +define('DOKU_TOC_OPEN',1); +define('DOKU_TOCBRANCH_OPEN',2); +define('DOKU_TOCITEM_OPEN',3); +define('DOKU_TOC_ELEMENT',4); +define('DOKU_TOCITEM_CLOSE',5); +define('DOKU_TOCBRANCH_CLOSE',6); +define('DOKU_TOC_CLOSE',7); + +class Doku_Handler_Toc { + + var $calls = array(); + var $tocStack = array(); + var $toc = array(); + var $numHeaders = 0; + + function process($calls) { + + foreach ( $calls as $call ) { + if ( $call[0] == 'header' && $call[1][1] < 4 ) { + $this->numHeaders++; + $this->addToToc($call); + } + $this->calls[] = $call; + } + + // Complete the table of contents then prepend to the calls + $this->finalizeToc($call); + return $this->calls; + } + + function addToToc($call) { + + $depth = $call[1][1]; + + // If it's the opening item... + if ( count ( $this->toc) == 0 ) { + + $this->addTocCall($call, DOKU_TOC_OPEN); + + for ( $i = 1; $i <= $depth; $i++ ) { + + $this->addTocCall(array($call[0],array($call[1][0],$i),$call[2]), DOKU_TOCBRANCH_OPEN); + + if ( $i != $depth ) { + $this->addTocCall(array($call[0],array($call[1][0], $i, TRUE),$call[2]), DOKU_TOCITEM_OPEN); + } else { + $this->addTocCall(array($call[0],array($call[1][0], $i),$call[2]), DOKU_TOCITEM_OPEN); + $this->addTocCall(array($call[0],array($call[1][0], $i),$call[2]), DOKU_TOC_ELEMENT); + } + + $this->tocStack[] = $i; + + } + return; + } + + $currentDepth = end($this->tocStack); + $initialDepth = $currentDepth; + + // Create new branches as needed + if ( $depth > $currentDepth ) { + + for ($i = $currentDepth+1; $i <= $depth; $i++ ) { + $this->addTocCall(array($call[0],array($call[1][0],$i),$call[2]), DOKU_TOCBRANCH_OPEN); + // It's just a filler + if ( $i != $depth ) { + $this->addTocCall(array($call[0],array($call[1][0], $i, TRUE),$call[2]), DOKU_TOCITEM_OPEN); + } else { + $this->addTocCall(array($call[0],array($call[1][0], $i),$call[2]), DOKU_TOCITEM_OPEN); + } + $this->tocStack[] = $i; + } + + $currentDepth = $i-1; + + } + + // Going down + if ( $depth < $currentDepth ) { + for ( $i = $currentDepth; $i >= $depth; $i-- ) { + if ( $i != $depth ) { + array_pop($this->tocStack); + $this->addTocCall(array($call[0],array($call[1][0],$i),$call[2]), DOKU_TOCITEM_CLOSE); + $this->addTocCall(array($call[0],array($call[1][0],$i),$call[2]), DOKU_TOCBRANCH_CLOSE); + } else { + $this->addTocCall(array($call[0],array($call[1][0],$i),$call[2]), DOKU_TOCITEM_CLOSE); + $this->addTocCall(array($call[0],array($call[1][0],$i),$call[2]), DOKU_TOCITEM_OPEN); + $this->addTocCall($call, DOKU_TOC_ELEMENT); + return; + } + } + } + + if ( $depth == $initialDepth ) { + $this->addTocCall($call, DOKU_TOCITEM_CLOSE); + $this->addTocCall($call, DOKU_TOCITEM_OPEN); + } + + $this->addTocCall($call, DOKU_TOC_ELEMENT); + + + } + + function addTocCall($call, $type) { + switch ( $type ) { + case DOKU_TOC_OPEN: + $this->toc[] = array('toc_open',array(),$call[2]); + break; + + case DOKU_TOCBRANCH_OPEN: + $this->toc[] = array('tocbranch_open',array($call[1][1]),$call[2]); + break; + + case DOKU_TOCITEM_OPEN: + if ( isset( $call[1][2] ) ) { + $this->toc[] = array('tocitem_open',array($call[1][1], TRUE),$call[2]); + } else { + $this->toc[] = array('tocitem_open',array($call[1][1]),$call[2]); + } + break; + + case DOKU_TOC_ELEMENT: + $this->toc[] = array('tocelement',array($call[1][1],$call[1][0]),$call[2]); + break; + + case DOKU_TOCITEM_CLOSE: + $this->toc[] = array('tocitem_close',array($call[1][1]),$call[2]); + break; + + case DOKU_TOCBRANCH_CLOSE: + $this->toc[] = array('tocbranch_close',array($call[1][1]),$call[2]); + break; + + case DOKU_TOC_CLOSE: + if ( count($this->toc) > 0 ) { + $this->toc[] = array('toc_close',array(),$call[2]); + } + break; + } + } + + function finalizeToc($call) { + if ( $this->numHeaders < 3 ) { + return; + } + if ( count ($this->tocStack) > 0 ) { + while ( NULL !== ($toc = array_pop($this->tocStack)) ) { + $this->addTocCall(array($call[0],array('',$toc),$call[2]), DOKU_TOCITEM_CLOSE); + $this->addTocCall(array($call[0],array('',$toc),$call[2]), DOKU_TOCBRANCH_CLOSE); + } + } + $this->addTocCall($call, DOKU_TOC_CLOSE); + $this->calls = array_merge($this->toc, $this->calls); + } + +} + +?> diff --git a/inc/parser/lexer.php b/inc/parser/lexer.php new file mode 100644 index 000000000..13c7a181d --- /dev/null +++ b/inc/parser/lexer.php @@ -0,0 +1,555 @@ +<?php +/** +* Author Markus Baker: http://www.lastcraft.com +* Version adapted from Simple Test: http://sourceforge.net/projects/simpletest/ +* For an intro to the Lexer see: +* http://www.phppatterns.com/index.php/article/articleview/106/1/2/ +* @author Marcus Baker +* @package Doku +* @subpackage Lexer +* @version $Id: lexer.php,v 1.1 2005/03/23 23:14:09 harryf Exp $ +*/ + +/** +* Init path constant +*/ +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); + +/**#@+ + * lexer mode constant + */ +define("DOKU_LEXER_ENTER", 1); +define("DOKU_LEXER_MATCHED", 2); +define("DOKU_LEXER_UNMATCHED", 3); +define("DOKU_LEXER_EXIT", 4); +define("DOKU_LEXER_SPECIAL", 5); +/**#@-*/ + +/** + * Compounded regular expression. Any of + * the contained patterns could match and + * when one does it's label is returned. + * @package Doku + * @subpackage Lexer + */ +class Doku_LexerParallelRegex { + var $_patterns; + var $_labels; + var $_regex; + var $_case; + + /** + * Constructor. Starts with no patterns. + * @param boolean $case True for case sensitive, false + * for insensitive. + * @access public + */ + function Doku_LexerParallelRegex($case) { + $this->_case = $case; + $this->_patterns = array(); + $this->_labels = array(); + $this->_regex = null; + } + + /** + * Adds a pattern with an optional label. + * @param mixed $pattern Perl style regex. Must be UTF-8 + * encoded. If its a string, the (, ) + * lose their meaning unless they + * form part of a lookahead or + * lookbehind assertation. + * @param string $label Label of regex to be returned + * on a match. Label must be ASCII + * @access public + */ + function addPattern($pattern, $label = true) { + $count = count($this->_patterns); + $this->_patterns[$count] = $pattern; + $this->_labels[$count] = $label; + $this->_regex = null; + } + + /** + * Attempts to match all patterns at once against + * a string. + * @param string $subject String to match against. + * @param string $match First matched portion of + * subject. + * @return boolean True on success. + * @access public + */ + function match($subject, &$match) { + if (count($this->_patterns) == 0) { + return false; + } + if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) { + $match = ""; + return false; + } + + $match = $matches[0]; + $size = count($matches); + for ($i = 1; $i < $size; $i++) { + if ($matches[$i] && isset($this->_labels[$i - 1])) { + return $this->_labels[$i - 1]; + } + } + return true; + } + + /** + * Compounds the patterns into a single + * regular expression separated with the + * "or" operator. Caches the regex. + * Will automatically escape (, ) and / tokens. + * @param array $patterns List of patterns in order. + * @access private + */ + function _getCompoundedRegex() { + if ($this->_regex == null) { + for ($i = 0; $i < count($this->_patterns); $i++) { + + // Replace lookaheads / lookbehinds with marker + $pattern = preg_replace( + array ( + '/\(\?(i|m|s|x|U)\)/U', + '/\(\?(\-[i|m|s|x|U])\)/U', + '/\(\?\=(.*)\)/sU', + '/\(\?\!(.*)\)/sU', + '/\(\?\<\=(.*)\)/sU', + '/\(\?\<\!(.*)\)/sU', + ), + array ( + '<<<<SO:\\1>>>>', + '<<<<SOR:\\1>>>>', + '<<<<LA:IS:\\1>>>>', + '<<<<LA:NOT:\\1>>>>', + '<<<<LB:IS:\\1>>>>', + '<<<<LB:NOT:\\1>>>>', + ), + $this->_patterns[$i] + ); + // Quote the rest + $pattern = str_replace( + array('/', '(', ')'), + array('\/', '\(', '\)'), + $pattern + ); + + // Restore lookaheads / lookbehinds + $pattern = preg_replace( + array ( + // Why five? + '<<<<<SO:(.{1})>>>>>', + '<<<<<SOR:(.{2})>>>>>', + '/<<<<LA:IS:(.*)>>>>/sU', + '/<<<<LA:NOT:(.*)>>>>/sU', + '/<<<<LB:IS:(.*)>>>>/sU', + '/<<<<LB:NOT:(.*)>>>>/sU', + ), + array ( + '(?\\1)', + '(?\\1)', + '(?=\\1)', + '(?!\\1)', + '(?<=\\1)', + '(?<!\\1)', + ), + $pattern + ); + + $this->_patterns[$i] = '('.$pattern.')'; + } + $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags(); + } + return $this->_regex; + } + + /** + * Accessor for perl regex mode flags to use. + * @return string Perl regex flags. + * @access private + */ + function _getPerlMatchingFlags() { + return ($this->_case ? "msS" : "msSi"); + } +} + +/** + * States for a stack machine. + * @package Lexer + * @subpackage Lexer + */ +class Doku_LexerStateStack { + var $_stack; + + /** + * Constructor. Starts in named state. + * @param string $start Starting state name. + * @access public + */ + function Doku_LexerStateStack($start) { + $this->_stack = array($start); + } + + /** + * Accessor for current state. + * @return string State. + * @access public + */ + function getCurrent() { + return $this->_stack[count($this->_stack) - 1]; + } + + /** + * Adds a state to the stack and sets it + * to be the current state. + * @param string $state New state. + * @access public + */ + function enter($state) { + array_push($this->_stack, $state); + } + + /** + * Leaves the current state and reverts + * to the previous one. + * @return boolean False if we drop off + * the bottom of the list. + * @access public + */ + function leave() { + if (count($this->_stack) == 1) { + return false; + } + array_pop($this->_stack); + return true; + } +} + +/** + * Accepts text and breaks it into tokens. + * Some optimisation to make the sure the + * content is only scanned by the PHP regex + * parser once. Lexer modes must not start + * with leading underscores. + * @package Doku + * @subpackage Lexer + */ +class Doku_Lexer { + var $_regexes; + var $_parser; + var $_mode; + var $_mode_handlers; + var $_case; + + /** + * Sets up the lexer in case insensitive matching + * by default. + * @param Doku_Parser $parser Handling strategy by + * reference. + * @param string $start Starting handler. + * @param boolean $case True for case sensitive. + * @access public + */ + function Doku_Lexer(&$parser, $start = "accept", $case = false) { + $this->_case = $case; + $this->_regexes = array(); + $this->_parser = &$parser; + $this->_mode = &new Doku_LexerStateStack($start); + $this->_mode_handlers = array(); + } + + /** + * Adds a token search pattern for a particular + * parsing mode. The pattern does not change the + * current mode. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @access public + */ + function addPattern($pattern, $mode = "accept") { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern); + } + + /** + * Adds a pattern that will enter a new parsing + * mode. Useful for entering parenthesis, strings, + * tags, etc. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @param string $new_mode Change parsing to this new + * nested mode. + * @access public + */ + function addEntryPattern($pattern, $mode, $new_mode) { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern, $new_mode); + } + + /** + * Adds a pattern that will exit the current mode + * and re-enter the previous one. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Mode to leave. + * @access public + */ + function addExitPattern($pattern, $mode) { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern, "__exit"); + } + + /** + * Adds a pattern that has a special mode. Acts as an entry + * and exit pattern in one go, effectively calling a special + * parser handler for this token only. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @param string $special Use this mode for this one token. + * @access public + */ + function addSpecialPattern($pattern, $mode, $special) { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern, "_$special"); + } + + /** + * Adds a mapping from a mode to another handler. + * @param string $mode Mode to be remapped. + * @param string $handler New target handler. + * @access public + */ + function mapHandler($mode, $handler) { + $this->_mode_handlers[$mode] = $handler; + } + + /** + * Splits the page text into tokens. Will fail + * if the handlers report an error or if no + * content is consumed. If successful then each + * unparsed and parsed token invokes a call to the + * held listener. + * @param string $raw Raw HTML text. + * @return boolean True on success, else false. + * @access public + */ + function parse($raw) { + if (! isset($this->_parser)) { + return false; + } + $initialLength = strlen($raw); + $length = $initialLength; + $pos = 0; + while (is_array($parsed = $this->_reduce($raw))) { + list($unmatched, $matched, $mode) = $parsed; + $currentLength = strlen($raw); + $matchPos = $initialLength - $currentLength - strlen($matched); + if (! $this->_dispatchTokens($unmatched, $matched, $mode, $pos, $matchPos)) { + return false; + } + if ($currentLength == $length) { + return false; + } + $length = $currentLength; + $pos = $initialLength - $currentLength; + } + if (!$parsed) { + return false; + } + return $this->_invokeParser($raw, DOKU_LEXER_UNMATCHED, $pos); + } + + /** + * Sends the matched token and any leading unmatched + * text to the parser changing the lexer to a new + * mode if one is listed. + * @param string $unmatched Unmatched leading portion. + * @param string $matched Actual token match. + * @param string $mode Mode after match. A boolean + * false mode causes no change. + * @param int $pos Current byte index location in raw doc + * thats being parsed + * @return boolean False if there was any error + * from the parser. + * @access private + */ + function _dispatchTokens($unmatched, $matched, $mode = false, $initialPos, $matchPos) { + if (! $this->_invokeParser($unmatched, DOKU_LEXER_UNMATCHED, $initialPos) ){ + return false; + } + if ($this->_isModeEnd($mode)) { + if (! $this->_invokeParser($matched, DOKU_LEXER_EXIT, $matchPos)) { + return false; + } + return $this->_mode->leave(); + } + if ($this->_isSpecialMode($mode)) { + $this->_mode->enter($this->_decodeSpecial($mode)); + if (! $this->_invokeParser($matched, DOKU_LEXER_SPECIAL, $matchPos)) { + return false; + } + return $this->_mode->leave(); + } + if (is_string($mode)) { + $this->_mode->enter($mode); + return $this->_invokeParser($matched, DOKU_LEXER_ENTER, $matchPos); + } + return $this->_invokeParser($matched, DOKU_LEXER_MATCHED, $matchPos); + } + + /** + * Tests to see if the new mode is actually to leave + * the current mode and pop an item from the matching + * mode stack. + * @param string $mode Mode to test. + * @return boolean True if this is the exit mode. + * @access private + */ + function _isModeEnd($mode) { + return ($mode === "__exit"); + } + + /** + * Test to see if the mode is one where this mode + * is entered for this token only and automatically + * leaves immediately afterwoods. + * @param string $mode Mode to test. + * @return boolean True if this is the exit mode. + * @access private + */ + function _isSpecialMode($mode) { + return (strncmp($mode, "_", 1) == 0); + } + + /** + * Strips the magic underscore marking single token + * modes. + * @param string $mode Mode to decode. + * @return string Underlying mode name. + * @access private + */ + function _decodeSpecial($mode) { + return substr($mode, 1); + } + + /** + * Calls the parser method named after the current + * mode. Empty content will be ignored. The lexer + * has a parser handler for each mode in the lexer. + * @param string $content Text parsed. + * @param boolean $is_match Token is recognised rather + * than unparsed data. + * @param int $pos Current byte index location in raw doc + * thats being parsed + * @access private + */ + function _invokeParser($content, $is_match, $pos) { + if (($content === "") || ($content === false)) { + return true; + } + $handler = $this->_mode->getCurrent(); + if (isset($this->_mode_handlers[$handler])) { + $handler = $this->_mode_handlers[$handler]; + } + return $this->_parser->$handler($content, $is_match, $pos); + } + + /** + * Tries to match a chunk of text and if successful + * removes the recognised chunk and any leading + * unparsed data. Empty strings will not be matched. + * @param string $raw The subject to parse. This is the + * content that will be eaten. + * @return array Three item list of unparsed + * content followed by the + * recognised token and finally the + * action the parser is to take. + * True if no match, false if there + * is a parsing error. + * @access private + */ + function _reduce(&$raw) { + if (! isset($this->_regexes[$this->_mode->getCurrent()])) { + return false; + } + if ($raw === "") { + return true; + } + if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) { + $unparsed_character_count = strpos($raw, $match); + $unparsed = substr($raw, 0, $unparsed_character_count); + $raw = substr($raw, $unparsed_character_count + strlen($match)); + return array($unparsed, $match, $action); + } + return true; + } +} + +/** +* Escapes regex characters other than (, ) and / +* @TODO +*/ +function Doku_Lexer_Escape($str) { + //$str = addslashes($str); + $chars = array( + '/\./', + '/\\\\/', + '/\+/', + '/\*/', + '/\?/', + '/\[/', + '/\^/', + '/\]/', + '/\$/', + '/\{/', + '/\}/', + '/\=/', + '/\!/', + '/\</', + '/\>/', + '/\|/', + '/\:/' + ); + + $escaped = array( + '\.', + '\\\\\\\\', + '\+', + '\*', + '\?', + '\[', + '\^', + '\]', + '\$', + '\{', + '\}', + '\=', + '\!', + '\<', + '\>', + '\|', + '\:' + ); + return preg_replace($chars, $escaped, $str); +} + +?> diff --git a/inc/parser/parser.php b/inc/parser/parser.php new file mode 100644 index 000000000..9a67ce1ba --- /dev/null +++ b/inc/parser/parser.php @@ -0,0 +1,828 @@ +<?php + +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); + +require_once DOKU_INC . 'inc/parser/lexer.php'; +require_once DOKU_INC . 'inc/parser/handler.php'; + +//------------------------------------------------------------------- + +/** +* Sets up the Lexer with modes and points it to the Handler +* For an intro to the Lexer see: wiki:parser +*/ +class Doku_Parser { + + var $Handler; + + var $Lexer; + + var $modes = array(); + + var $connected = FALSE; + + function addBaseMode() { + $this->modes['base'] = & new Doku_Parser_Mode_Base(); + if ( !$this->Lexer ) { + $this->Lexer = & new Doku_Lexer($this->Handler,'base', TRUE); + } + $this->modes['base']->Lexer = & $this->Lexer; + } + + /** + * PHP preserves order of associative elements + * Mode sequence is important + */ + function addMode($name, & $Mode) { + if ( !isset($this->modes['base']) ) { + $this->addBaseMode(); + } + $Mode->Lexer = & $this->Lexer; + $this->modes[$name] = & $Mode; + } + + function connectModes() { + + if ( $this->connected ) { + return; + } + + foreach ( array_keys($this->modes) as $mode ) { + + // Base isn't connected to anything + if ( $mode == 'base' ) { + continue; + } + + $this->modes[$mode]->preConnect(); + + foreach ( array_keys($this->modes) as $cm ) { + + if ( $this->modes[$cm]->accepts($mode) ) { + $this->modes[$mode]->connectTo($cm); + } + + } + + $this->modes[$mode]->postConnect(); + } + + $this->connected = TRUE; + } + + function parse($doc) { + if ( $this->Lexer ) { + $this->connectModes(); + // Normalize CRs and pad doc + $doc = "\n".str_replace("\r\n","\n",$doc)."\n"; + $this->Lexer->parse($doc); + $this->Handler->__finalize(); + return $this->Handler->calls; + } else { + return FALSE; + } + } + +} + +//------------------------------------------------------------------- +/** +* This class and all the subclasses below are +* used to reduce the effort required to register +* modes with the Lexer. For performance these +* could all be eliminated later perhaps, or +* the Parser could be serialized to a file once +* all modes are registered +*/ +class Doku_Parser_Mode { + + var $Lexer; + + var $allowedModes = array(); + + // Called before any calls to connectTo + function preConnect() {} + + function connectTo($mode) {} + + // Called after all calls to connectTo + function postConnect() {} + + function accepts($mode) { + return in_array($mode, $this->allowedModes ); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Base extends Doku_Parser_Mode { + + function Doku_Parser_Mode_Base() { + + $this->allowedModes = array_merge ( + Doku_Parser_BlockContainers(), + Doku_Parser_BaseOnly(), + Doku_Parser_Paragraphs(), + Doku_Parser_Formatting(), + Doku_Parser_Substition(), + Doku_Parser_Protected(), + Doku_Parser_Disabled() + ); + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Footnote extends Doku_Parser_Mode { + + function Doku_Parser_Mode_Footnote() { + + $this->allowedModes = array_merge ( + Doku_Parser_BlockContainers(), + Doku_Parser_Formatting(), + Doku_Parser_Substition(), + Doku_Parser_Protected(), + Doku_Parser_Disabled() + ); + + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern( + '\x28\x28(?=.*\x29\x29)',$mode,'footnote' + ); + } + + function postConnect() { + $this->Lexer->addExitPattern( + '\x29\x29','footnote' + ); + + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Header extends Doku_Parser_Mode { + + function preConnect() { + + // Header 1 is special case - match 6 or more + $this->Lexer->addSpecialPattern( + '[ \t]*={6,}[^\n]+={6,}[ \t]*\n', + 'base', + 'header' + ); + + // For the rest, match exactly + for ( $i = 5; $i > 1; $i--) { + $this->Lexer->addSpecialPattern( + '[ \t]*={'.$i.'}[^\n]+={'.$i.'}[ \t]*\n', + 'base', + 'header' + ); + } + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_NoToc extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('~~NOTOC~~',$mode,'notoc'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Linebreak extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('\x5C{2}\s',$mode,'linebreak'); + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Eol extends Doku_Parser_Mode { + + function connectTo($mode) { + $badModes = array('listblock','table'); + if ( in_array($mode, $badModes) ) { + return; + } + $this->Lexer->addSpecialPattern('\n',$mode,'eol'); + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_HR extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('\n[ \t]*-{4,}[ \t]*\n',$mode,'hr'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Formatting extends Doku_Parser_Mode { + + var $type; + + var $formatting = array ( + 'strong' => array ( + 'entry'=>'\*\*(?=.*\*\*)', + 'exit'=>'\*\*', + ), + + 'emphasis'=> array ( + 'entry'=>'//(?=.*//)', + 'exit'=>'//', + ), + + 'underline'=> array ( + 'entry'=>'__(?=.*__)', + 'exit'=>'__', + ), + + 'monospace'=> array ( + 'entry'=>'\x27\x27(?=.*\x27\x27)', + 'exit'=>'\x27\x27', + ), + + 'subscript'=> array ( + 'entry'=>'<sub>(?=.*\x3C/sub\x3E)', + 'exit'=>'</sub>', + ), + + 'superscript'=> array ( + 'entry'=>'<sup>(?=.*\x3C/sup\x3E)', + 'exit'=>'</sup>', + ), + + 'deleted'=> array ( + 'entry'=>'<del>(?=.*\x3C/del\x3E)', + 'exit'=>'</del>', + ), + ); + + function Doku_Parser_Mode_Formatting($type) { + + if ( !array_key_exists($type, $this->formatting) ) { + trigger_error('Invalid formatting type '.$type, E_USER_WARNING); + } + + $this->type = $type; + + $this->allowedModes = array_merge ( + Doku_Parser_Formatting($type), + Doku_Parser_Substition(), + Doku_Parser_Disabled() + ); + + } + + function connectTo($mode) { + + // Can't nest formatting in itself + if ( $mode == $this->type ) { + return; + } + + $this->Lexer->addEntryPattern( + $this->formatting[$this->type]['entry'], + $mode, + $this->type + ); + } + + function postConnect() { + + $this->Lexer->addExitPattern( + $this->formatting[$this->type]['exit'], + $this->type + ); + + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_ListBlock extends Doku_Parser_Mode { + + function Doku_Parser_Mode_ListBlock() { + + $this->allowedModes = array_merge ( + Doku_Parser_Formatting(), + Doku_Parser_Substition(), + Doku_Parser_Disabled() + ); + $this->allowedModes[] = 'footnote'; + $this->allowedModes[] = 'preformatted'; + $this->allowedModes[] = 'unformatted'; + + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n {2,}[\-\*]',$mode,'listblock'); + $this->Lexer->addEntryPattern('\n\t{1,}[\-\*]',$mode,'listblock'); + + $this->Lexer->addPattern('\n {2,}[\-\*]','listblock'); + $this->Lexer->addPattern('\n\t{1,}[\-\*]','listblock'); + + } + + function postConnect() { + $this->Lexer->addExitPattern('\n','listblock'); + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Table extends Doku_Parser_Mode { + + function Doku_Parser_Mode_Table() { + + $this->allowedModes = array_merge ( + Doku_Parser_Formatting(), + Doku_Parser_Substition(), + Doku_Parser_Disabled() + ); + $this->allowedModes[] = 'footnote'; + $this->allowedModes[] = 'preformatted'; + $this->allowedModes[] = 'unformatted'; + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n\^',$mode,'table'); + $this->Lexer->addEntryPattern('\n\|',$mode,'table'); + } + + function postConnect() { + $this->Lexer->addPattern('\n\^','table'); + $this->Lexer->addPattern('\n\|','table'); + $this->Lexer->addPattern(' {2,}','table'); + $this->Lexer->addPattern('\^','table'); + $this->Lexer->addPattern('\|','table'); + $this->Lexer->addExitPattern('\n','table'); + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Unformatted extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<nowiki>(?=.*\x3C/nowiki\x3E)',$mode,'unformatted'); + $this->Lexer->addEntryPattern('%%(?=.*%%)',$mode,'unformattedalt'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</nowiki>','unformatted'); + $this->Lexer->addExitPattern('%%','unformattedalt'); + $this->Lexer->mapHandler('unformattedalt','unformatted'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_PHP extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<php>(?=.*\x3C/php\x3E)',$mode,'php'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</php>','php'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_HTML extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<html>(?=.*\x3C/html\x3E)',$mode,'html'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</html>','html'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Preformatted extends Doku_Parser_Mode { + + function connectTo($mode) { + // Has hard coded awareness of lists... + $this->Lexer->addEntryPattern('\n (?![\*\-])',$mode,'preformatted'); + $this->Lexer->addEntryPattern('\n\t(?![\*\-])',$mode,'preformatted'); + + // How to effect a sub pattern with the Lexer! + $this->Lexer->addPattern('\n ','preformatted'); + $this->Lexer->addPattern('\n\t','preformatted'); + + } + + function postConnect() { + $this->Lexer->addExitPattern('\n','preformatted'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Code extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<code(?=.*\x3C/code\x3E)',$mode,'code'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</code>','code'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_File extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<file>(?=.*\x3C/file\x3E)',$mode,'file'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</file>','file'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Quote extends Doku_Parser_Mode { + + function Doku_Parser_Mode_Quote() { + + $this->allowedModes = array_merge ( + Doku_Parser_Formatting(), + Doku_Parser_Substition(), + Doku_Parser_Disabled() + ); + $this->allowedModes[] = 'footnote'; + $this->allowedModes[] = 'preformatted'; + $this->allowedModes[] = 'unformatted'; + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n>{1,}',$mode,'quote'); + } + + function postConnect() { + $this->Lexer->addPattern('\n>{1,}','quote'); + $this->Lexer->addExitPattern('\n','quote'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Acronym extends Doku_Parser_Mode { + // A list + var $acronyms = array(); + var $pattern = ''; + + function Doku_Parser_Mode_Acronym($acronyms) { + $this->acronyms = $acronyms; + } + + function preConnect() { + $sep = ''; + foreach ( $this->acronyms as $acronym ) { + $this->pattern .= $sep.'(?<=\b)'.Doku_Lexer_Escape($acronym).'(?=\b)'; + $sep = '|'; + } + } + + function connectTo($mode) { + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'acronym'); + } + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Smiley extends Doku_Parser_Mode { + // A list + var $smileys = array(); + var $pattern = ''; + + function Doku_Parser_Mode_Smiley($smileys) { + $this->smileys = $smileys; + } + + function preConnect() { + $sep = ''; + foreach ( $this->smileys as $smiley ) { + $this->pattern .= $sep.Doku_Lexer_Escape($smiley); + $sep = '|'; + } + } + + function connectTo($mode) { + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'smiley'); + } + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Wordblock extends Doku_Parser_Mode { + // A list + var $badwords = array(); + var $pattern = ''; + + function Doku_Parser_Mode_Wordblock($badwords) { + $this->badwords = $badwords; + } + + function preConnect() { + + if ( count($this->badwords) == 0 ) { + return; + } + + $sep = ''; + foreach ( $this->badwords as $badword ) { + $this->pattern .= $sep.'(?<=\b)(?i)'.Doku_Lexer_Escape($badword).'(?-i)(?=\b)'; + $sep = '|'; + } + + } + + function connectTo($mode) { + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'wordblock'); + } + } + +} + +//------------------------------------------------------------------- +/** +* @TODO Quotes and 640x480 are note supported - just straight replacements here +*/ +class Doku_Parser_Mode_Entity extends Doku_Parser_Mode { + // A list + var $entities = array(); + var $pattern = ''; + + function Doku_Parser_Mode_Entity($entities) { + $this->entities = $entities; + } + + function preConnect() { + $sep = ''; + foreach ( $this->entities as $entity ) { + $this->pattern .= $sep.Doku_Lexer_Escape($entity); + $sep = '|'; + } + } + + function connectTo($mode) { + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'entity'); + } + } + +} + +//------------------------------------------------------------------- +// Implements the 640x480 replacement +class Doku_Parser_Mode_MultiplyEntity extends Doku_Parser_Mode { + + function connectTo($mode) { + + $this->Lexer->addSpecialPattern( + '(?<=\b)\d+[x|X]\d+(?=\b)',$mode,'multiplyentity' + ); + + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Quotes extends Doku_Parser_Mode { + + function connectTo($mode) { + + $this->Lexer->addSpecialPattern( + '(?<=\s)\'(?=\S)',$mode,'singlequoteopening' + ); + $this->Lexer->addSpecialPattern( + '(?<=\S)\'',$mode,'singlequoteclosing' + ); + $this->Lexer->addSpecialPattern( + '(?<=\s)"(?=\S)',$mode,'doublequoteopening' + ); + $this->Lexer->addSpecialPattern( + '(?<=\S)"',$mode,'doublequoteclosing' + ); + + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_CamelCaseLink extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + '\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b',$mode,'camelcaselink' + ); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_InternalLink extends Doku_Parser_Mode { + + function connectTo($mode) { + // Word boundaries? + $this->Lexer->addSpecialPattern("\[\[[^\]]+?\]\]",$mode,'internallink'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Media extends Doku_Parser_Mode { + + function connectTo($mode) { + // Word boundaries? + $this->Lexer->addSpecialPattern("\{\{[^\}]+\}\}",$mode,'media'); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_ExternalLink extends Doku_Parser_Mode { + var $schemes = array('http','https','telnet','gopher','wais','ftp','ed2k','irc'); + var $patterns = array(); + + function preConnect() { + + $ltrs = '\w'; + $gunk = '/\#~:.?+=&%@!\-'; + $punc = '.:?\-;,'; + $host = $ltrs.$punc; + $any = $ltrs.$gunk.$punc; + + foreach ( $this->schemes as $scheme ) { + $this->patterns[] = '\b(?i)'.$scheme.'(?-i)://['.$any.']+?['.$punc.']*[^'.$any.']'; + } + + $this->patterns[] = '\b(?i)www?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?['.$punc.']*[^'.$any.']'; + $this->patterns[] = '\b(?i)ftp?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?['.$punc.']*[^'.$any.']'; + + } + + function connectTo($mode) { + foreach ( $this->patterns as $pattern ) { + $this->Lexer->addSpecialPattern($pattern,$mode,'externallink'); + } + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_FileLink extends Doku_Parser_Mode { + + var $pattern; + + function preConnect() { + + $ltrs = '\w'; + $gunk = '/\#~:.?+=&%@!\-'; + $punc = '.:?\-;,'; + $host = $ltrs.$punc; + $any = $ltrs.$gunk.$punc; + + $this->pattern = '\b(?i)file(?-i)://['.$any.']+?['. + $punc.']*[^'.$any.']'; + } + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + $this->pattern,$mode,'filelink'); + } + + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_WindowsShareLink extends Doku_Parser_Mode { + + var $pattern; + + function preConnect() { + + $ltrs = '\w'; + $gunk = '/\#~:.?+=&%@!\-'; + $punc = '.:?\-;,'; + $host = $ltrs.$punc; + $any = $ltrs.$gunk.$punc; + + $this->pattern = "[$gunk$punc\s]\\\\\\\\[$host]+?\\\\[$any]+?[$punc]*[^$any]"; + } + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + $this->pattern,$mode,'windowssharelink'); + } + + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_Email extends Doku_Parser_Mode { + + function connectTo($mode) { + //<([\w0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)> + $this->Lexer->addSpecialPattern("<[\w0-9\-_.]+?@[\w\-]+\.[\w\-\.]+\.*[\w]+>",$mode,'email'); + } + +} + +//------------------------------------------------------------------- +// Help fns to keep mode lists - used to make it easier to populate +// the list of modes another mode accepts + +// Can contain many other modes +// E.g. a footnote can containing formatting etc. +function Doku_Parser_BlockContainers() { + $modes = array( + 'footnote', 'listblock', 'table','quote', + // hr breaks the principle but HRs should not be used in tables / lists + // so put it here + 'hr', + ); + return $modes; +} + +// Used to mark paragraph boundaries +function Doku_Parser_Paragraphs() { + $modes = array( + 'eol' + ); + return $modes; +} + +// Can only be used by the base mode +function Doku_Parser_BaseOnly() { + $modes = array( + 'header' + ); + return $modes; +} + +// "Styling" modes that format text. +function Doku_Parser_Formatting($remove = '') { + $modes = array( + 'strong', 'emphasis', 'underline', 'monospace', + 'subscript', 'superscript', 'deleted', + ); + $key = array_search($remove, $modes); + if ( is_int($key) ) { + unset($modes[$key]); + } + + return $modes; +} + +// Modes where the token is simply replaced - contain no +// other modes +function Doku_Parser_Substition() { + $modes = array( + 'acronym','smiley','wordblock','entity','camelcaselink', + 'internallink','media','externallink','linebreak','email', + 'windowssharelink','filelink','notoc','multiplyentity', + 'quotes', + + ); + return $modes; +} + +// Modes which have a start and end token but inside which +// no other modes should be applied +function Doku_Parser_Protected() { + $modes = array( + 'preformatted','code','file', + 'php','html','quote', + ); + return $modes; +} + +// Disable wiki markup inside this mode +function Doku_Parser_Disabled() { + $modes = array( + 'unformatted' + ); + return $modes; +} + +?> diff --git a/inc/parser/renderer.php b/inc/parser/renderer.php new file mode 100644 index 000000000..c52ddf6cd --- /dev/null +++ b/inc/parser/renderer.php @@ -0,0 +1,169 @@ +<?php +class Doku_Renderer { + + function document_start() {} + + function document_end() {} + + function toc_open() {} + + function tocbranch_open($level) {} + + function tocitem_open($level, $empty = FALSE) {} + + function tocelement($level, $title) {} + + function tocitem_close($level) {} + + function tocbranch_close($level) {} + + function toc_close() {} + + function header($text, $level) {} + + function section_open($level) {} + + function section_close() {} + + function cdata($text) {} + + function p_open() {} + + function p_close() {} + + function linebreak() {} + + function hr() {} + + function strong_open() {} + + function strong_close() {} + + function emphasis_open() {} + + function emphasis_close() {} + + function underline_open() {} + + function underline_close() {} + + function monospace_open() {} + + function monospace_close() {} + + function subscript_open() {} + + function subscript_close() {} + + function superscript_open() {} + + function superscript_close() {} + + function deleted_open() {} + + function deleted_close() {} + + function footnote_open() {} + + function footnote_close() {} + + function listu_open() {} + + function listu_close() {} + + function listo_open() {} + + function listo_close() {} + + function listitem_open($level) {} + + function listitem_close() {} + + function listcontent_open($level) {} + + function listcontent_close() {} + + function unformatted($text) {} + + function php($text) {} + + function html($text) {} + + function preformatted($text) {} + + function file($text) {} + + function quote_open() {} + + function quote_close() {} + + function code($text, $lang = NULL) {} + + function acronym($acronym) {} + + function smiley($smiley) {} + + function wordblock($word) {} + + function entity($entity) {} + + // 640x480 ($x=640, $y=480) + function multiplyentity($x, $y) {} + + function singlequoteopening() {} + + function singlequoteclosing() {} + + function doublequoteopening() {} + + function doublequoteclosing() {} + + // $link like 'SomePage' + function camelcaselink($link) {} + + // $link like 'wikie:syntax', $title could be an array (media) + function internallink($link, $title = NULL) {} + + // $link is full URL with scheme, $title could be an array (media) + function externallink($link, $title = NULL) {} + + // $link is the original link - probably not much use + // $wikiName is an indentifier for the wiki + // $wikiUri is the URL fragment to append to some known URL + function interwikilink($link, $title = NULL, $wikiName, $wikiUri) {} + + // Link to file on users OS, $title could be an array (media) + function filelink($link, $title = NULL) {} + + // Link to a Windows share, , $title could be an array (media) + function windowssharelink($link, $title = NULL) {} + + function email($address, $title = NULL) {} + + function internalmedialink ( + $src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL + ) {} + + function externalmedialink( + $src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL + ) {} + + function table_open($maxcols = NULL, $numrows = NULL){} + + function table_close(){} + + function tablerow_open(){} + + function tablerow_close(){} + + function tableheader_open($colspan = 1, $align = NULL){} + + function tableheader_close(){} + + function tablecell_open($colspan = 1, $align = NULL){} + + function tablecell_close(){} + +} + +?> diff --git a/inc/parser/spamcheck.php b/inc/parser/spamcheck.php new file mode 100644 index 000000000..1c2958e6f --- /dev/null +++ b/inc/parser/spamcheck.php @@ -0,0 +1,72 @@ +<?php +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); + +require_once DOKU_INC . 'inc/parser/renderer.php'; + +class Doku_Renderer_SpamCheck extends Doku_Renderer { + + // This should be populated by the code executing the instructions + var $currentCall; + + // An array of instructions that contain spam + var $spamFound = array(); + + // pcre pattern for finding spam + var $spamPattern = '#^$#'; + + function internallink($link, $title = NULL) { + $this->__checkTitle($title); + } + + function externallink($link, $title = NULL) { + $this->__checkLinkForSpam($link); + $this->__checkTitle($title); + } + + function interwikilink($link, $title = NULL) { + $this->__checkTitle($title); + } + + function filelink($link, $title = NULL) { + $this->__checkLinkForSpam($link); + $this->__checkTitle($title); + } + + function windowssharelink($link, $title = NULL) { + $this->__checkLinkForSpam($link); + $this->__checkTitle($title); + } + + function email($address, $title = NULL) { + $this->__checkLinkForSpam($address); + $this->__checkTitle($title); + } + + function internalmedialink ($src) { + $this->__checkLinkForSpam($src); + } + + function externalmedialink($src) { + $this->__checkLinkForSpam($src); + } + + function __checkTitle($title) { + if ( is_array($title) && isset($title['src'])) { + $this->__checkLinkForSpam($title['src']); + } + } + + // Pattern matching happens here + /** + * @TODO What about links like www.google.com - no http:// + */ + function __checkLinkForSpam($link) { + if( preg_match($this->spamPattern,$link) ) { + $spam = $this->currentCall; + $spam[3] = $link; + $this->spamFound[] = $spam; + } + } +} + +?> diff --git a/inc/parser/xhtml.php b/inc/parser/xhtml.php new file mode 100644 index 000000000..ca8f08990 --- /dev/null +++ b/inc/parser/xhtml.php @@ -0,0 +1,816 @@ +<?php +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); + +if ( !defined('DOKU_LF') ) { + // Some whitespace to help View > Source + define ('DOKU_LF',"\n"); +} + +if ( !defined('DOKU_TAB') ) { + // Some whitespace to help View > Source + define ('DOKU_TAB',"\t"); +} + +/** +* @TODO Probably useful for have constant for linefeed formatting +*/ +class Doku_Renderer_XHTML { + + var $doc = ''; + + var $headers = array(); + + var $footnotes = array(); + + var $footnoteIdStack = array(); + + var $acronyms = array(); + var $smileys = array(); + var $badwords = array(); + var $entities = array(); + var $interwiki = array(); + + //@todo why ob? + function document_start() { + ob_start(); + } + + function document_end() { + + if ( count ($this->footnotes) > 0 ) { + echo '<div class="footnotes">'.DOKU_LF; + foreach ( $this->footnotes as $footnote ) { + echo $footnote; + } + echo '</div>'.DOKU_LF; + } + + $this->doc .= ob_get_contents(); + ob_end_clean(); + + } + + function toc_open() { + echo '<div class="toc">'.DOKU_LF; + echo '<div class="tocheader">Table of Contents <script type="text/javascript">showTocToggle("+","-")</script></div>'.DOKU_LF; + echo '<div id="tocinside">'.DOKU_LF; + } + + function tocbranch_open($level) { + echo '<ul class="toc">'.DOKU_LF; + } + + function tocitem_open($level, $empty = FALSE) { + if ( !$empty ) { + echo '<li class="level'.$level.'">'; + } else { + echo '<li class="clear">'; + } + } + + function tocelement($level, $title) { + echo '<span class="li"><a href="#'.$this->__headerToLink($title).'" class="toc">'; + echo $this->__xmlEntities($title); + echo '</a></span>'; + } + + function tocitem_close($level) { + echo '</li>'.DOKU_LF; + } + + function tocbranch_close($level) { + echo '</ul>'.DOKU_LF; + } + + function toc_close() { + echo '</div>'.DOKU_LF.'</div>'.DOKU_LF; + } + + function header($text, $level) { + echo DOKU_LF.'<a name="'.$this->__headerToLink($text).'"></a><h'.$level.'>'; + echo $this->__xmlEntities($text); + echo "</h$level>".DOKU_LF; + } + + function section_open($level) { + echo "<div class=\"level$level\">".DOKU_LF; + } + + function section_close() { + echo DOKU_LF.'</div>'.DOKU_LF; + } + + function cdata($text) { + echo $this->__xmlEntities($text); + } + + function p_open() { + echo DOKU_LF.'<p>'.DOKU_LF; + } + + function p_close() { + echo DOKU_LF.'</p>'.DOKU_LF; + } + + function linebreak() { + echo '<br/>'.DOKU_LF; + } + + function hr() { + echo '<hr noshade="noshade" size="1" />'.DOKU_LF; + } + + function strong_open() { + echo '<strong>'; + } + + function strong_close() { + echo '</strong>'; + } + + function emphasis_open() { + echo '<em>'; + } + + function emphasis_close() { + echo '</em>'; + } + + function underline_open() { + echo '<u>'; + } + + function underline_close() { + echo '</u>'; + } + + function monospace_open() { + echo '<code>'; + } + + function monospace_close() { + echo '</code>'; + } + + function subscript_open() { + echo '<sub>'; + } + + function subscript_close() { + echo '</sub>'; + } + + function superscript_open() { + echo '<sup>'; + } + + function superscript_close() { + echo '</sup>'; + } + + function deleted_open() { + echo '<del>'; + } + + function deleted_close() { + echo '</del>'; + } + + function footnote_open() { + $id = $this->__newFootnoteId(); + echo '<a href="#fn'.$id.'" name="fnt'.$id.'" class="fn_top">'.$id.')</a>'; + $this->footnoteIdStack[] = $id; + ob_start(); + } + + function footnote_close() { + $contents = ob_get_contents(); + ob_end_clean(); + $id = array_pop($this->footnoteIdStack); + + $contents = '<div class="fn"><a href="#fnt'. + $id.'" name="fn'.$id.'" class="fn_bot">'. + $id.')</a> ' .DOKU_LF .$contents. "\n" . '</div>' . DOKU_LF; + $this->footnotes[$id] = $contents; + } + + function listu_open() { + echo '<ul>'.DOKU_LF; + } + + function listu_close() { + echo '</ul>'.DOKU_LF; + } + + function listo_open() { + echo '<ol>'.DOKU_LF; + } + + function listo_close() { + echo '</ol>'.DOKU_LF; + } + + function listitem_open($level) { + echo '<li class="level'.$level.'">'; + } + + function listitem_close() { + echo '</li>'.DOKU_LF; + } + + function listcontent_open() { + echo '<span class="li">'; + } + + function listcontent_close() { + echo '</span>'.DOKU_LF; + } + + function unformatted($text) { + echo $this->__xmlEntities($text); + } + + /** + * @TODO Support optional eval of code depending on conf/dokuwiki.php + */ + function php($text) { + $this->preformatted($text); + } + + /** + * @TODO Support optional echo of HTML depending on conf/dokuwiki.php + */ + function html($text) { + $this->file($text); + } + + function preformatted($text) { + echo '<pre class="code">' . $this->__xmlEntities($text) . '</pre>'. DOKU_LF; + } + + function file($text) { + echo '<pre class="file">' . $this->__xmlEntities($text). '</pre>'. DOKU_LF; + } + + /** + * @TODO Shouldn't this output <blockquote?? + */ + function quote_open() { + echo '<div class="quote">'.DOKU_LF; + } + + /** + * @TODO Shouldn't this output </blockquote>? + */ + function quote_close() { + echo '</div>'.DOKU_LF; + } + + /** + * @TODO Hook up correctly with Geshi + */ + function code($text, $language = NULL) { + + if ( is_null($language) ) { + $this->preformatted($text); + } else { + + // Handle with Geshi here (needs tuning) + require_once(DOKU_INC . 'geshi.php'); + $geshi = new GeSHi($text, strtolower($language), DOKU_INC . 'geshi'); + $geshi->enable_classes(); + $geshi->set_header_type(GESHI_HEADER_PRE); + $geshi->set_overall_class('code'); + + // Fix this + $geshi->set_link_target('_blank'); + + $text = $geshi->parse_code(); + echo $text; + } + } + + function acronym($acronym) { + + if ( array_key_exists($acronym, $this->acronyms) ) { + + $title = $this->__xmlEntities($this->acronyms[$acronym]); + + echo '<acronym title="'.$title + .'">'.$this->__xmlEntities($acronym).'</acronym>'; + + } else { + echo $this->__xmlEntities($acronym); + } + } + + /** + * @TODO Remove hard coded link to splitbrain.org + */ + function smiley($smiley) { + + if ( array_key_exists($smiley, $this->smileys) ) { + $title = $this->__xmlEntities($this->smileys[$smiley]); + echo '<img src="http://wiki.splitbrain.org/smileys/'.$this->smileys[$smiley]. + '" align="middle" alt="'. + $this->__xmlEntities($smiley).'" />'; + } else { + echo $this->__xmlEntities($smiley); + } + } + + /** + * @TODO localization? + */ + function wordblock($word) { + if ( array_key_exists($word, $this->badwords) ) { + echo '** BLEEP **'; + } else { + echo $this->__xmlEntities($word); + } + } + + function entity($entity) { + if ( array_key_exists($entity, $this->entities) ) { + echo $this->entities[$entity]; + } else { + echo $this->__xmlEntities($entity); + } + } + + function multiplyentity($x, $y) { + echo "$x×$y"; + } + + function singlequoteopening() { + echo "‘"; + } + + function singlequoteclosing() { + echo "’"; + } + + function doublequoteopening() { + echo "“"; + } + + function doublequoteclosing() { + echo "”"; + } + + /** + * @TODO Handle local vs. global namespace checks + */ + function camelcaselink($link) { + + echo '<a href="'.$link.'"'; + + if ( wikiPageExists($link) ) { + echo ' class="wikilink1"'; + } else { + echo ' class="wikilink2"'; + } + + // Probably dont need to convert entities - parser would have rejected it + echo ' onclick="return svchk()" onkeypress="return svchk()">'; + echo $this->__xmlEntities($link); + echo '</a>'; + } + + /** + * @TODO Hook up with page resolver. + * @TODO Support media + * @TODO correct attributes + */ + function internallink($link, $title = NULL) { + + echo '<a'; + + $title = $this->__getLinkTitle($title,$link, $isImage); + + if ( !$isImage ) { + + if ( wikiPageExists($link) ) { + echo ' class="wikilink1"'; + } else { + echo ' class="wikilink2"'; + } + + } else { + echo ' class="media"'; + } + + echo ' href="http://wiki.splitbrain.org/'.$this->__xmlEntities($link).'"'; + + echo ' onclick="return svchk()" onkeypress="return svchk()">'; + + echo $title; + + echo '</a>'; + } + + + /** + * @TODO Should list assume blacklist check already made? + * @TODO External link icon + * @TODO correct attributes + */ + function externallink($link, $title = NULL) { + + echo '<a'; + + $title = $this->__getLinkTitle($title, $link, $isImage); + + if ( !$isImage ) { + echo ' class="urlextern"'; + } else { + echo ' class="media"'; + } + + echo ' target="_blank" href="'.$this->__xmlEntities($link).'"'; + + echo ' onclick="return svchk()" onkeypress="return svchk()">'; + + echo $title; + + echo '</a>'; + } + + /** + * @TODO Remove hard coded link to splitbrain.org on style + */ + function interwikilink($link, $title = NULL, $wikiName, $wikiUri) { + + // RESOLVE THE URL + if ( isset($this->interwiki[$wikiName]) ) { + + $wikiUriEnc = urlencode($wikiUri); + + if ( strstr($this->interwiki[$wikiName],'{URL}' ) !== FALSE ) { + + $url = str_replace('{URL}', $wikiUriEnc, $this->interwiki[$wikiName] ); + + } else if ( strstr($this->interwiki[$wikiName],'{NAME}' ) !== FALSE ) { + + $url = str_replace('{NAME}', $wikiUriEnc, $this->interwiki[$wikiName] ); + + } else { + + $url = $this->interwiki[$wikiName] . urlencode($wikiUri); + + } + + } else { + // Default to Google I'm feeling lucky + $url = 'http://www.google.com/search?q='.urlencode($wikiUri).'&btnI=lucky'; + } + + // BUILD THE LINK + echo '<a'; + + $title = $this->__getLinkTitle($title, $wikiUri, $isImage); + + if ( !$isImage ) { + echo ' class="interwiki"'; + } else { + echo ' class="media"'; + } + + echo ' href="'.$this->__xmlEntities($url).'"'; + + if ( FALSE !== ( $type = interwikiImgExists($wikiName) ) ) { + echo ' style="background: transparent url(http://wiki.splitbrain.org/interwiki/'. + $wikiName.'.'.$type.') 0px 1px no-repeat;"'; + } + + echo ' onclick="return svchk()" onkeypress="return svchk()">'; + + echo $title; + + echo '</a>'; + } + + /** + * @TODO Correct the CSS class for files? (not windows) + * @TODO Remove hard coded URL to splitbrain.org + */ + function filelink($link, $title = NULL) { + echo '<a'; + + $title = $this->__getLinkTitle($title, $link, $isImage); + + if ( !$isImage ) { + echo ' class="windows"'; + } else { + echo ' class="media"'; + } + + echo ' href="'.$this->__xmlEntities($link).'"'; + + echo ' style="background: transparent url(http://wiki.splitbrain.org/images/windows.gif) 0px 1px no-repeat;"'; + + echo ' onclick="return svchk()" onkeypress="return svchk()">'; + + echo $title; + + echo '</a>'; + } + + /** + * @TODO Remove hard coded URL to splitbrain.org + * @TODO Add error message for non-IE users + */ + function windowssharelink($link, $title = NULL) { + echo '<a'; + + $title = $this->__getLinkTitle($title, $link, $isImage); + + if ( !$isImage ) { + echo ' class="windows"'; + } else { + echo ' class="media"'; + } + + $link = str_replace('\\','/',$link); + $link = 'file:///'.$link; + echo ' href="'.$this->__xmlEntities($link).'"'; + + echo ' style="background: transparent url(http://wiki.splitbrain.org/images/windows.gif) 0px 1px no-repeat;"'; + + echo ' onclick="return svchk()" onkeypress="return svchk()">'; + + echo $title; + + echo '</a>'; + } + + /** + * @TODO Protect email address from harvesters + * @TODO Remove hard coded link to splitbrain.org + */ + function email($address, $title = NULL) { + echo '<a'; + + $title = $this->__getLinkTitle($title, $address, $isImage); + + if ( !$isImage ) { + echo ' class="mail"'; + } else { + echo ' class="media"'; + } + + echo ' href="mailto:'.$this->__xmlEntities($address).'"'; + + echo ' style="background: transparent url(http://wiki.splitbrain.org/images/mail_icon.gif) 0px 1px no-repeat;"'; + + echo ' onclick="return svchk()" onkeypress="return svchk()">'; + + echo $title; + + echo '</a>'; + + } + + /** + * @TODO Resolve namespaces + * @TODO Add image caching + * @TODO Remove hard coded link to splitbrain.org + */ + function internalmedia ( + $src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL + ) { + + // Sort out the namespace here... + if ( strpos($src,':') ) { + $src = explode(':',$src); + $src = $src[1]; + } + echo '<img class="media" src="http://wiki.splitbrain.org/media/wiki/'.$this->__xmlEntities($src).'"'; + + if ( !is_null($title) ) { + echo ' title="'.$this->__xmlEntities($title).'"'; + } + + if ( !is_null($align) ) { + echo ' align="'.$align.'"'; + } + + if ( !is_null($width) ) { + echo ' width="'.$this->__xmlEntities($width).'"'; + } + + if ( !is_null($height) ) { + echo ' height="'.$this->__xmlEntities($height).'"'; + } + + echo '/>'; + + } + + /** + * @TODO Add image caching + */ + function externalmedia ( + $src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL + ) { + + echo '<img class="media" src="'.$this->__xmlEntities($src).'"'; + + if ( !is_null($title) ) { + echo ' title="'.$this->__xmlEntities($title).'"'; + } + + if ( !is_null($align) ) { + echo ' align="'.$align.'"'; + } + + if ( !is_null($width) ) { + echo ' width="'.$this->__xmlEntities($width).'"'; + } + + if ( !is_null($height) ) { + echo ' height="'.$this->__xmlEntities($height).'"'; + } + + echo '/>'; + } + + // $numrows not yet implemented + function table_open($maxcols = NULL, $numrows = NULL){ + echo '<table class="inline">'.DOKU_LF; + } + + function table_close(){ + echo '</table>'.DOKU_LF.'<br />'.DOKU_LF; + } + + function tablerow_open(){ + echo DOKU_TAB . '<tr>' . DOKU_LF . DOKU_TAB . DOKU_TAB; + } + + function tablerow_close(){ + echo DOKU_LF . DOKU_TAB . '</tr>' . DOKU_LF; + } + + function tableheader_open($colspan = 1, $align = NULL){ + echo '<th'; + if ( !is_null($align) ) { + echo ' class="'.$align.'align"'; + } + if ( $colspan > 1 ) { + echo ' colspan="'.$colspan.'"'; + } + echo '>'; + } + + function tableheader_close(){ + echo '</th>'; + } + + function tablecell_open($colspan = 1, $align = NULL){ + echo '<td'; + if ( !is_null($align) ) { + echo ' class="'.$align.'align"'; + } + if ( $colspan > 1 ) { + echo ' colspan="'.$colspan.'"'; + } + echo '>'; + } + + function tablecell_close(){ + echo '</td>'; + } + + //---------------------------------------------------------- + // Utils + + function __newFootnoteId() { + static $id = 1; + return $id++; + } + + function __xmlEntities($string) { + return htmlspecialchars($string); + } + + /** + * @TODO Tuning needed - e.g. utf8 strtolower ? + */ + function __headerToLink($title) { + return preg_replace('/\W/','_',trim($title)); + } + + function __getLinkTitle($title, $default, & $isImage) { + $isImage = FALSE; + + if ( is_null($title) ) { + + return $this->__xmlEntities($default); + + } else if ( is_string($title) ) { + + return $this->__xmlEntities($title); + + } else if ( is_array($title) ) { + + $isImage = TRUE; + return $this->__imageTitle($title); + + } + } + + /** + * @TODO Resolve namespace on internal images + * @TODO Remove hard coded url to splitbrain.org + * @TODO Image caching + */ + function __imageTitle($img) { + + if ( $img['type'] == 'internalmedia' ) { + + // Resolve here... + if ( strpos($img['src'],':') ) { + $src = explode(':',$img['src']); + $src = $src[1]; + } else { + $src = $img['src']; + } + + $imgStr = '<img class="media" src="http://wiki.splitbrain.org/media/wiki/'.$this->__xmlEntities($src).'"'; + + } else { + + $imgStr = '<img class="media" src="'.$this->__xmlEntities($img['src']).'"'; + + } + + if ( !is_null($img['title']) ) { + $imgStr .= ' alt="'.$this->__xmlEntities($img['title']).'"'; + } else { + $imgStr .= ' alt=""'; + } + + if ( !is_null($img['align']) ) { + $imgStr .= ' align="'.$img['align'].'"'; + } + + if ( !is_null($img['width']) ) { + $imgStr .= ' width="'.$this->__xmlEntities($img['width']).'"'; + } + + if ( !is_null($img['height']) ) { + $imgStr .= ' height="'.$this->__xmlEntities($img['height']).'"'; + } + + $imgStr .= '/>'; + + return $imgStr; + } +} + +/** +* Test whether there's an image to display with this interwiki link +*/ +function interwikiImgExists($name) { + + static $exists = array(); + + if ( array_key_exists($name,$exists) ) { + return $exists[$name]; + } + + if( @file_exists( DOKU. 'interwiki/'.$name.'.png') ) { + $exists[$name] = 'png'; + } else if ( @file_exists( DOKU . 'interwiki/'.$name.'.gif') ) { + $exists[$name] = 'gif'; + } else { + $exists[$name] = FALSE; + } + + return $exists[$name]; +} + +/** +* For determining whether to use CSS class "wikilink1" or "wikilink2" +* @todo use configinstead of DOKU_DATA +*/ +function wikiPageExists($name) { + + static $pages = array(); + + if ( array_key_exists($name,$pages) ) { + return $pages[$name]; + } + + $file = str_replace(':','/',$name).'.txt'; + + if ( @file_exists( DOKU_DATA . $file ) ) { + $pages[$name] = TRUE; + } else { + $pages[$name] = FALSE; + } + + return $pages[$name]; +} + |