From 5aa9b7cc57045bfaa14f646ff78c51317da98b8b Mon Sep 17 00:00:00 2001 From: andi Date: Tue, 17 May 2005 22:45:40 +0200 Subject: removed bakfiles darcs-hash:20050517204540-9977f-ad23f3daa6503158c1068c3e28808a7cd2147e11.gz --- inc/parser.php.bak | 862 ----------------------------------------------------- 1 file changed, 862 deletions(-) delete mode 100644 inc/parser.php.bak (limited to 'inc/parser.php.bak') diff --git a/inc/parser.php.bak b/inc/parser.php.bak deleted file mode 100644 index 8f4a188b7..000000000 --- a/inc/parser.php.bak +++ /dev/null @@ -1,862 +0,0 @@ - - * @deprecated replaced by the new parser - */ - -trigger_error('deprecated parser.php included'); - - if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); - include_once(DOKU_INC.'inc/common.php'); - include_once(DOKU_INC.'inc/html.php'); - include_once(DOKU_INC.'inc/format.php'); - require_once(DOKU_INC.'lang/en/lang.php'); - require_once(DOKU_INC.'lang/'.$conf['lang'].'/lang.php'); - -/** - * The main parser function. - * - * Accepts raw data and returns valid xhtml - * - * @author Andreas Gohr - */ -function parse($text){ - global $parser; - global $conf; - $table = array(); - $hltable = array(); - - //preparse - $text = preparse($text,$table,$hltable); - - //padding with a newline - $text = "\n".$text."\n"; - - #for link matching - $urls = '(https?|telnet|gopher|file|wais|ftp|ed2k|irc)'; - $ltrs = '\w'; - $gunk = '/\#~:.?+=&%@!\-'; - $punc = '.:?\-;,'; - $host = $ltrs.$punc; - $any = $ltrs.$gunk.$punc; - - /* first pass */ - - //preformated texts - firstpass($table,$text,"#(.*?)#se","preformat('\\1','nowiki')"); - firstpass($table,$text,"#%%(.*?)%%#se","preformat('\\1','nowiki')"); - firstpass($table,$text,"#(.*?)#se","preformat('\\3','code','\\2')"); - firstpass($table,$text,"#(.*?)#se","preformat('\\1','file')"); - - // html and php includes - firstpass($table,$text,"#(.*?)#se","preformat('\\1','html')"); - firstpass($table,$text,"#(.*?)#se","preformat('\\1','php')"); - - // codeblocks - firstpass($table,$text,"/(\n( {2,}|\t)[^\*\-\n ][^\n]+)(\n( {2,}|\t)[^\n]*)*/se","preformat('\\0','block')","\n"); - - //check if toc is wanted - if(!isset($parser['toc'])){ - if(strpos($text,'~~NOTOC~~')!== false){ - $text = str_replace('~~NOTOC~~','',$text); - $parser['toc'] = false; - }else{ - $parser['toc'] = true; - } - } - - //check if this file may be cached - if(!isset($parser['cache'])){ - if(strpos($text,'~~NOCACHE~~')!=false){ - $text = str_replace('~~NOCACHE~~','',$text); - $parser['cache'] = false; - }else{ - $parser['cache'] = true; - } - } - - //headlines - format_headlines($table,$hltable,$text); - - //links - firstpass($table,$text,"#\[\[([^\]]+?)\]\]#ie","linkformat('\\1')"); - - //media - firstpass($table,$text,"/\{\{([^\}]+)\}\}/se","mediaformat('\\1')"); - - //match full URLs (adapted from Perl cookbook) - firstpass($table,$text,"#(\b)($urls://[$any]+?)([$punc]*[^$any])#ie","linkformat('\\2')",'\1','\4'); - - //short www URLs - firstpass($table,$text,"#(\b)(www\.[$host]+?\.[$host]+?[$any]+?)([$punc]*[^$any])#ie","linkformat('http://\\2|\\2')",'\1','\3'); - - //windows shares - firstpass($table,$text,"#([$gunk$punc\s])(\\\\\\\\[$host]+?\\\\[$any]+?)([$punc]*[^$any])#ie","linkformat('\\2')",'\1','\3'); - - //short ftp URLs - firstpass($table,$text,"#(\b)(ftp\.[$host]+?\.[$host]+?[$any]+?)([$punc]*[^$any])#ie","linkformat('ftp://\\2')",'\1','\3'); - - // email@domain.tld - firstpass($table,$text,"#<([\w0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)>#ie", "linkformat('\\1@\\2')"); - - //CamelCase if wanted - if($conf['camelcase']){ - firstpass($table,$text,"#(\b)([A-Z]+[a-z]+[A-Z][A-Za-z]*)(\b)#se","linkformat('\\2')",'\1','\3'); - } - - $text = htmlspecialchars($text); - - //smileys - smileys($table,$text); - - //acronyms - acronyms($table,$text); - - /* second pass for simple formating */ - $text = simpleformat($text); - - /* third pass - insert the matches from 1st pass */ - reset($table); - while (list($key, $val) = each($table)) { - $text = str_replace($key,$val,$text); - } - - /* remove empty paragraphs */ - $text = preg_replace('"

\n*

"','',$text); - - /* remove padding */ - $text = trim($text); - return $text; -} - -/** - * Line by line preparser - * - * This preparses the text by walking it line by line. This - * is the only place where linenumbers are still available (needed - * for section edit. Some precautions have to be taken to not change - * any noparse block. - * - * @author Andreas Gohr - */ -function preparse($text,&$table,&$hltable){ - $lines = split("\n",$text); - - //prepare a tokens for paragraphs - $po = mkToken(); - $table[$po] = "

"; - $pc = mkToken(); - $table[$pc] = "

"; - - for ($l=0; $l(.*?)#","",$line); - $line = preg_replace("#%%(.*?)%%#","",$line); - $line = preg_replace("#(.*?)#","",$line); - $line = preg_replace("#<(file|html|php)>(.*?)#","",$line); - //check for start of multiline noparse areas - if(preg_match('#^.*?<(nowiki|code|php|html|file)( (\w+))?>#',$line,$matches)){ - list($noparse) = split(" ",$matches[1]); //remove options - $noparse = ''; - continue; - }elseif(preg_match('#^.*?%%#',$line)){ - $noparse = '%%'; - continue; - } - } - - //handle headlines - if(preg_match('/^(\s)*(==+)(.+?)(==+)(\s*)$/',$lines[$l],$matches)){ - //get token - $tk = tokenize_headline($hltable,$matches[2],$matches[3],$l); - //replace line with token - $lines[$l] = $tk; - } - - //handle paragraphs - if(empty($lines[$l])){ - $lines[$l] = "$pc\n$po"; - } - } - - //reassemble full text - $text = join("\n",$lines); - //open first and close last paragraph - $text = "$po\n$text\n$pc"; - - return $text; -} - -/** - * Build TOC lookuptable - * - * This function adds some information about the given headline - * to a lookuptable to be processed later. Returns a unique token - * that idetifies the headline later - * - * @author Andreas Gohr - */ -function tokenize_headline(&$hltable,$pre,$hline,$lno){ - switch (strlen($pre)){ - case 2: - $lvl = 5; - break; - case 3: - $lvl = 4; - break; - case 4: - $lvl = 3; - break; - case 5: - $lvl = 2; - break; - default: - $lvl = 1; - break; - } - $token = mkToken(); - $hltable[] = array( 'name' => htmlspecialchars(trim($hline)), - 'level' => $lvl, - 'line' => $lno, - 'token' => $token ); - return $token; -} - -/** - * Headline formatter - * - * @author Andreas Gohr - */ -function format_headlines(&$table,&$hltable,&$text){ - global $parser; - global $conf; - global $lang; - global $ID; - - // walk the headline table prepared in preparsing - $last = 0; - $cnt = 0; - $hashs = array(); - foreach($hltable as $hl){ - $cnt++; - - //make unique headlinehash - $hash = cleanID($hl['name']); - $i=2; - while(in_array($hash,$hashs)) - $hash = cleanID($hl['name']).$i++; - $hashs[] = $hash; - - // build headline - $headline = "

\n"; //close paragraph - if($cnt - 1) $headline .= ''; //no close on first HL - $headline .= ''; - $headline .= ''; - $headline .= $hl['name']; - $headline .= ''; - $headline .= '
'; - $headline .= "\n

"; //open new paragraph - - //remember for autoTOC - if($hl['level'] <= $conf['maxtoclevel']){ - $content[] = array('id' => $hash, - 'name' => $hl['name'], - 'level' => $hl['level']); - } - - //add link for section edit for HLs 1, and 3 - if( ($hl['level'] <= $conf['maxseclevel']) && - ($hl['line'] - $last > 1)){ - $secedit = ''; - $headline = $secedit.$headline; - $last = $hl['line']; - } - - //put headline into firstpasstable - $table[$hl['token']] = $headline; - } - - //add link for editing the last section - if($last){ - $secedit = ''; - $token = mktoken(); - $text .= $token; - $table[$token] = $secedit; - } - - //close last div - if ($cnt){ - $token = mktoken(); - $text .= $token; - $table[$token] = '

'; - } - - //prepend toc - if ($parser['toc'] && count($content) > 2){ - $token = mktoken(); - $text = $token.$text; - $table[$token] = html_toc($content); - } -} - -/** - * Formats various link types using the functions from format.php - * - * @author Andreas Gohr - */ -function linkformat($match){ - global $conf; - //unescape - $match = str_replace('\\"','"',$match); - - //prepare variables for the formaters - $link = array(); - list($link['url'],$link['name']) = split('\|',$match,2); - $link['url'] = trim($link['url']); - $link['name'] = trim($link['name']); - $link['class'] = ''; - $link['target'] = ''; - $link['style'] = ''; - $link['pre'] = ''; - $link['suf'] = ''; - $link['more'] = ''; - - //save real name for image check - $realname = $link['name']; - - /* put it into the right formater */ - if(strpos($link['url'],'>')){ - // InterWiki - $link = format_link_interwiki($link); - }elseif(preg_match('#^([a-z0-9]+?)://#i',$link['url'])){ - // external URL - $link = format_link_externalurl($link); - }elseif(preg_match("/^\\\\\\\\([a-z0-9\-_.]+)\\\\(.+)$/",$link['url'])){ - // windows shares - $link = format_link_windows($link); - }elseif(preg_match('#([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i',$link['url'])){ - // email - $link = format_link_email($link); - }else{ - // wiki link - $link = format_link_wiki($link); - } - - //is realname an image? use media formater - if(preg_match('#^{{.*?\.(gif|png|jpe?g)(\?.*?)?\s*(\|.*?)?}}$#',$realname)){ - $link['name'] = substr($realname,2,-2); - $link = format_link_media($link); - } - - // build the replacement with the variables set by the formaters - return format_link_build($link); -} - -/** - * Simple text formating and typography is done here - * - * @author Andreas Gohr - */ -function simpleformat($text){ - global $conf; - - $text = preg_replace('/__(.+?)__/s','\1',$text); //underline - $text = preg_replace('/\/\/(.+?)\/\//s','\1',$text); //emphasize - $text = preg_replace('/\*\*(.+?)\*\*/s','\1',$text); //bold - $text = preg_replace('/\'\'(.+?)\'\'/s','\1',$text); //code - $text = preg_replace('#<del>(.*?)</del>#is','\1',$text); //deleted - $text = preg_replace('/^\s*----+\s*$/m',"

\n
\n

",$text); //hr - - //sub and superscript - $text = preg_replace('#<su([bp])>(.*?)</su\1>#is','\2',$text); - - //do quoting - $text = preg_replace("/\n((>)[^\n]*?\n)+/se","'\n'.quoteformat('\\0').'\n'",$text); - - // Typography - if($conf['typography']){ - $text = preg_replace('/(?> - $text = preg_replace('/<</i','«',$text); // << - - $text = preg_replace('/<->/i','↔',$text); // <-> - $text = preg_replace('/<-/i','←',$text); // <- - $text = preg_replace('/->/i','→',$text); // -> - - $text = preg_replace('/<=>/i','⇔',$text); // <=> - $text = preg_replace('/<=/i','⇐',$text); // <= - $text = preg_replace('/=>/i','⇒',$text); // => - - $text = preg_replace('/\(c\)/i','©',$text); // copyrigtht - $text = preg_replace('/\(r\)/i','®',$text); // registered - $text = preg_replace('/\(tm\)/i','™',$text); // trademark - } - - //forced linebreaks - $text = preg_replace('#\\\\\\\\(?=\s)#',"
",$text); - - // lists (blocks leftover after blockformat) - $text = preg_replace("/(\n( {2,}|\t)[\*\-][^\n]+)(\n( {2,}|\t)[^\n]*)*/se","\"\\n\".listformat('\\0')",$text); - - // tables - $text = preg_replace("/\n(([\|\^][^\n]*?)+[\|\^] *\n)+/se","\"\\n\".tableformat('\\0')",$text); - - // footnotes - $text = footnotes($text); - - // run custom text replacements - $text = customs($text); - - return $text; -} - -/** - * Footnote formating - * - * @author Andreas Gohr - */ -function footnotes($text){ - $num = 0; - while (preg_match('/\(\((.+?)\)\)/s',$text,$match)){ - $num++; - $fn = $match[1]; - $linkt = ''.$num.')'; - $linkb = ''.$num.')'; - - $text = preg_replace('/ ?\(\((.+?)\)\)/s',$linkt,$text,1); - if($num == 1) $text .= '

'; - $text .= '
'.$linkb.' '.$fn.'
'; - } - - if($num) $text .= '
'; - return $text; -} - -/** - * Replace smileys with their graphic equivalents - * - * @author Andreas Gohr - */ -function smileys(&$table,&$text){ - $smileys = file('conf/smileys.conf'); - foreach($smileys as $smiley){ - $smiley = preg_replace('/#.*$/','',$smiley); //ignore comments - $smiley = trim($smiley); - if(empty($smiley)) continue; - $sm = preg_split('/\s+/',$smiley,2); - $sm[1] = ''.$sm[0].''; - $sm[0] = preg_quote($sm[0],'/'); - firstpass($table,$text,'/(? - */ -function acronyms(&$table,&$text){ - $acronyms = file('conf/acronyms.conf'); - foreach($acronyms as $acro){ - $acro = preg_replace('/#.*$/','',$acro); //ignore comments - $acro = trim($acro); - if(empty($acro)) continue; - list($ac,$desc) = preg_split('/\s+/',$acro,2); - $ac = preg_quote($ac,'/'); - firstpass($table,$text,'/\b('.$ac.')\b/s',"\\1"); - } -} - -/** - * Apply custom text replacements - * - * @author Andreas Gohr - */ -function customs($text){ - $reps = file ('conf/custom.conf'); - foreach($reps as $rep){ - //strip comments only outside a regexp - $rep = preg_replace('/#[^\/]*$/','',$rep); //ignore comments - $rep = trim($rep); - if(empty($rep)) continue; - if(preg_match('#^(/.+/\w*)\s+\'(.*)\'$#',$rep,$matches)){ - $text = preg_replace($matches[1],$matches[2],$text); - } - } - return $text; -} - -/** - * Replace regexp with token - * - * @author Andreas Gohr - */ -function firstpass(&$table,&$text,$regexp,$replace,$lpad='',$rpad=''){ - //extended regexps have to be disabled for inserting the token - //and later reenabled when handling the actual code: - $ext=''; - if(substr($regexp,-1) == 'e'){ - $ext='e'; - $regexp = substr($regexp,0,-1); - } - - while(preg_match($regexp,$text,$matches)){ - $token = mkToken(); - $match = $matches[0]; - $text = preg_replace($regexp,$lpad.$token.$rpad,$text,1); - $table[$token] = preg_replace($regexp.$ext,$replace,$match); - } -} - -/** - * create a random and hopefully unique token - * - * @author Andreas Gohr - */ -function mkToken(){ - return '~'.md5(uniqid(rand(), true)).'~'; -} - -/** - * Do quote blocks - * - * @author Andreas Gohr - */ -function quoteformat($block){ - $block = trim($block); - $lines = split("\n",$block); - - $lvl = 0; - $ret = ""; - foreach ($lines as $line){ - //remove '>' and count them - $cnt = 0; - while(substr($line,0,4) == '>'){ - $line = substr($line,4); - $cnt++; - } - //compare to last level and open or close new divs if needed - if($cnt > $lvl){ - $ret .= "

\n"; - for ($i=0; $i< $cnt - $lvl; $i++){ - $ret .= '
'; - } - $ret .= "\n

"; - }elseif($cnt < $lvl){ - $ret .= "\n

"; - for ($i=0; $i< $lvl - $cnt; $i++){ - $ret .= "
\n"; - } - $ret .= "

\n"; - }elseif(empty($line)){ - $ret .= "

\n

"; - } - //keep rest of line but trim left whitespaces - $ret .= ltrim($line)."\n"; - //remember level - $lvl = $cnt; - } - - //close remaining divs - $ret .= "

\n"; - for ($i=0; $i< $lvl; $i++){ - $ret .= "\n"; - } - $ret .= "

\n"; - - return "$ret"; -} - -/** - * format inline tables - * - * @author Andreas Gohr - * @author Aaron Evans - */ -function tableformat($block) { - $block = trim($block); - $lines = split("\n",$block); - $ret = ""; - //build a row array - $rows = array(); - for($r=0; $r < count($lines); $r++){ - $line = $lines[$r]; - //remove last seperator and trailing whitespace - $line = preg_replace('/[\|\^]\s*$/', '', $line); - $c = -1; //prepare colcounter) - for($chr=0; $chr < strlen($line); $chr++){ - if($line[$chr] == '^'){ - $c++; - $rows[$r][$c]['head'] = true; - $rows[$r][$c]['data'] = ''; - }elseif($line[$chr] == '|'){ - $c++; - $rows[$r][$c]['head'] = false; - $rows[$r][$c]['data'] = ''; - }else{ - $rows[$r][$c]['data'].= $line[$chr]; - } - } - } - - //build table - $ret .= "

\n\n"; - for($r=0; $r < count($rows); $r++){ - $ret .= " \n"; - - for ($c=0; $c < count($rows[$r]); $c++){ - $cspan=1; - $data = $rows[$r][$c]['data']; - $head = $rows[$r][$c]['head']; - - //join cells if next is empty - while($c < count($rows[$r])-1 && $rows[$r][$c+1]['data'] == ''){ - $c++; - $cspan++; - } - if($cspan > 1){ - $cspan = 'colspan="'.$cspan.'"'; - }else{ - $cspan = ''; - } - - //determine alignment from whitespace - if (preg_match('/^\s\s/', $data)) { // right indentation - $td_class = "rightalign"; - if (preg_match('/\s\s$/', $data)) { // both left and right indentation - $td_class = "centeralign"; - } - } else { // left indentation (default) - $td_class = "leftalign"; - } - - $data = trim($data); - if ($head) { - $ret .= " \n"; // set css class for alignment - } else { - $ret .= " \n"; // set css class for alignment - } - } - $ret .= " \n"; - } - $ret .= "
$data $data

\n

"; - - return $ret; -} - -/** - * format lists - * - * @author Andreas Gohr - */ -function listformat($block){ - //remove 1st newline - $block = substr($block,1); - //unescape - $block = str_replace('\\"','"',$block); - -//dbg($block); - - //walk line by line - $ret=''; - $lst=0; - $lvl=0; - $enc=0; - $lines = split("\n",$block); - - //build an item array - $cnt=0; - $items = array(); - foreach ($lines as $line){ - //get intendion level - $lvl = 0; - $lvl += floor(strspn($line,' ')/2); - $lvl += strspn($line,"\t"); - //remove indents - $line = preg_replace('/^[ \t]+/','',$line); - //get type of list - (substr($line,0,1) == '-') ? $type='ol' : $type='ul'; - // remove bullet and following spaces - $line = preg_replace('/^[*\-]\s*/','',$line); - //add item to the list - $items[$cnt]['level'] = $lvl; - $items[$cnt]['type'] = $type; - $items[$cnt]['text'] = $line; - //increase counter - $cnt++; - } - - $level = 0; - $opens = array(); - - foreach ($items as $item){ - if( $item['level'] > $level ){ - //open new list - $ret .= "\n<".$item['type'].">\n"; - array_push($opens,$item['type']); - }elseif( $item['level'] < $level ){ - //close last item - $ret .= "\n"; - for ($i=0; $i<($level - $item['level']); $i++){ - //close higher lists - $ret .= '\n\n"; - } - }elseif($item['type'] != $opens[count($opens)-1]){ - //close last list and open new - $ret .= '\n\n"; - $ret .= "\n<".$item['type'].">\n"; - array_push($opens,$item['type']); - }else{ - //close last item - $ret .= "\n"; - } - - //remember current level and type - $level = $item['level']; - - //print item - $ret .= '

  • '; - $ret .= ''.$item['text'].''; - } - - //close remaining items and lists - while ($open = array_pop($opens)){ - $ret .= "
  • \n"; - $ret .= '\n"; - } - return "

    \n".$ret."\n

    "; -} - -/** - * Handle preformatted blocks - * - * Uses GeSHi for syntax highlighting - * - * @author Andreas Gohr - */ -function preformat($text,$type,$option=''){ - global $conf; - global $lang; - //unescape - $text = str_replace('\\"','"',$text); - - if($type == 'php' && !$conf['phpok']) $type='file'; - if($type == 'html' && !$conf['htmlok']) $type='file'; - - switch ($type){ - case 'php': - ob_start(); - eval($text); - $text = ob_get_contents(); - ob_end_clean(); - break; - case 'html': - break; - case 'nowiki': - $text = htmlspecialchars($text); - break; - case 'file': - $text = htmlspecialchars($text); - $text = "

    \n
    ".$text."
    \n

    "; - break; - case 'code': - if(empty($option)){ - $text = htmlspecialchars($text); - $text = '

    '.$text.'
    '; - }else{ - //strip leading blank line - $text = preg_replace('/^\s*?\n/','',$text); - //use geshi for highlighting - require_once("inc/geshi.php"); - $geshi = new GeSHi($text, strtolower($option), "inc/geshi"); - $geshi->set_encoding($lang['encoding']); - $geshi->enable_classes(); - $geshi->set_header_type(GESHI_HEADER_PRE); - $geshi->set_overall_class('code'); - $geshi->set_link_target($conf['target']['extern']); - $text = $geshi->parse_code(); - } - $text = "

    \n".$text."\n

    "; - break; - case 'block': - $text = substr($text,1); //remove 1st newline - $lines = split("\n",$text); //break into lines - $text = ''; - foreach($lines as $line){ - $text .= substr($line,2)."\n"; //remove indents - } - $text = htmlspecialchars($text); - $text = "

    \n
    ".$text."
    \n

    "; - break; - } - return $text; -} - -/** - * Format embedded media (images) - * - * @author Andreas Gohr - */ -function mediaformat($text){ - global $conf; - - //unescape - $text = str_replace('\\"','"',$text); - - // format RSS - if(substr($text,0,4) == 'rss>'){ - return format_rss(substr($text,4)); - } - - //handle normal media stuff - $link = array(); - $link['name'] = $text; - $link = format_link_media($link); - return format_link_build($link); -} - -/** - * Get first heading, to be used as a page title - * - * @author Jan Decaluwe - */ -function getFirstHeading($text){ - $title = ''; - $lines = split("\n",$text); - foreach($lines as $line){ - if(preg_match('/^\s*==+(.+?)==+\s*$/',$line,$matches)){ - $title = $matches[1]; - break; - } - } - return $title; -} - - -//Setup VIM: ex: et ts=2 enc=utf-8 : -- cgit v1.2.3