diff options
Diffstat (limited to 'inc/parser/metadata.php')
-rw-r--r-- | inc/parser/metadata.php | 575 |
1 files changed, 396 insertions, 179 deletions
diff --git a/inc/parser/metadata.php b/inc/parser/metadata.php index 82a268fd6..25bf3fe3d 100644 --- a/inc/parser/metadata.php +++ b/inc/parser/metadata.php @@ -6,129 +6,198 @@ */ if(!defined('DOKU_INC')) die('meh.'); -if ( !defined('DOKU_LF') ) { +if(!defined('DOKU_LF')) { // Some whitespace to help View > Source - define ('DOKU_LF',"\n"); + define ('DOKU_LF', "\n"); } -if ( !defined('DOKU_TAB') ) { +if(!defined('DOKU_TAB')) { // Some whitespace to help View > Source - define ('DOKU_TAB',"\t"); + define ('DOKU_TAB', "\t"); } /** - * The Renderer + * The MetaData Renderer + * + * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content + * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and + * $persistent. + * + * Some simplified rendering to $doc is done to gather the page's (text-only) abstract. */ class Doku_Renderer_metadata extends Doku_Renderer { + /** the approximate byte lenght to capture for the abstract */ + const ABSTRACT_LEN = 250; + + /** the maximum UTF8 character length for the abstract */ + const ABSTRACT_MAX = 500; + + /** @var array transient meta data, will be reset on each rendering */ + public $meta = array(); + + /** @var array persistent meta data, will be kept until explicitly deleted */ + public $persistent = array(); - var $doc = ''; - var $meta = array(); - var $persistent = array(); + /** @var array the list of headers used to create unique link ids */ + protected $headers = array(); - var $headers = array(); - var $capture = true; - var $store = ''; - var $firstimage = ''; + /** @var string temporary $doc store */ + protected $store = ''; - function getFormat(){ + /** @var string keeps the first image reference */ + protected $firstimage = ''; + + /** @var bool determines if enough data for the abstract was collected, yet */ + public $capture = true; + + /** @var int number of bytes captured for abstract */ + protected $captured = 0; + + /** + * Returns the format produced by this renderer. + * + * @return string always 'metadata' + */ + function getFormat() { return 'metadata'; } - function document_start(){ + /** + * Initialize the document + * + * Sets up some of the persistent info about the page if it doesn't exist, yet. + */ + function document_start() { global $ID; $this->headers = array(); // external pages are missing create date - if(!$this->persistent['date']['created']){ + if(!$this->persistent['date']['created']) { $this->persistent['date']['created'] = filectime(wikiFN($ID)); } - if(!isset($this->persistent['user'])){ + if(!isset($this->persistent['user'])) { $this->persistent['user'] = ''; } - if(!isset($this->persistent['creator'])){ + if(!isset($this->persistent['creator'])) { $this->persistent['creator'] = ''; } // reset metadata to persistent values $this->meta = $this->persistent; } - function document_end(){ + /** + * Finalize the document + * + * Stores collected data in the metadata + */ + function document_end() { global $ID; // store internal info in metadata (notoc,nocache) $this->meta['internal'] = $this->info; - if (!isset($this->meta['description']['abstract'])){ + if(!isset($this->meta['description']['abstract'])) { // cut off too long abstracts $this->doc = trim($this->doc); - if (strlen($this->doc) > 500) - $this->doc = utf8_substr($this->doc, 0, 500).'…'; + if(strlen($this->doc) > self::ABSTRACT_MAX) { + $this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…'; + } $this->meta['description']['abstract'] = $this->doc; } $this->meta['relation']['firstimage'] = $this->firstimage; - if(!isset($this->meta['date']['modified'])){ + if(!isset($this->meta['date']['modified'])) { $this->meta['date']['modified'] = filemtime(wikiFN($ID)); } } + /** + * Render plain text data + * + * This function takes care of the amount captured data and will stop capturing when + * enough abstract data is available + * + * @param $text + */ + function cdata($text) { + if(!$this->capture) return; + + $this->doc .= $text; + + $this->captured += strlen($text); + if($this->captured > self::ABSTRACT_LEN) $this->capture = false; + } + + /** + * Add an item to the TOC + * + * @param string $id the hash link + * @param string $text the text to display + * @param int $level the nesting level + */ function toc_additem($id, $text, $level) { global $conf; //only add items within configured levels - if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']){ + if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) { // the TOC is one of our standard ul list arrays ;-) $this->meta['description']['tableofcontents'][] = array( - 'hid' => $id, - 'title' => $text, - 'type' => 'ul', - 'level' => $level-$conf['toptoclevel']+1 + 'hid' => $id, + 'title' => $text, + 'type' => 'ul', + 'level' => $level - $conf['toptoclevel'] + 1 ); } } + /** + * Render a heading + * + * @param string $text the text to display + * @param int $level header level + * @param int $pos byte position in the original source + */ function header($text, $level, $pos) { - if (!isset($this->meta['title'])) $this->meta['title'] = $text; + if(!isset($this->meta['title'])) $this->meta['title'] = $text; // add the header to the TOC - $hid = $this->_headerToLink($text,'true'); + $hid = $this->_headerToLink($text, 'true'); $this->toc_additem($hid, $text, $level); // add to summary - if ($this->capture && ($level > 1)) $this->doc .= DOKU_LF.$text.DOKU_LF; + $this->cdata(DOKU_LF.$text.DOKU_LF); } - function section_open($level){} - function section_close(){} - - function cdata($text){ - if ($this->capture) $this->doc .= $text; - } - - function p_open(){ - if ($this->capture) $this->doc .= DOKU_LF; + /** + * Open a paragraph + */ + function p_open() { + $this->cdata(DOKU_LF); } - function p_close(){ - if ($this->capture){ - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Close a paragraph + */ + function p_close() { + $this->cdata(DOKU_LF); } - function linebreak(){ - if ($this->capture) $this->doc .= DOKU_LF; + /** + * Create a line break + */ + function linebreak() { + $this->cdata(DOKU_LF); } - function hr(){ - if ($this->capture){ - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF.'----------'.DOKU_LF; - } + /** + * Create a horizontal line + */ + function hr() { + $this->cdata(DOKU_LF.'----------'.DOKU_LF); } /** @@ -141,7 +210,7 @@ class Doku_Renderer_metadata extends Doku_Renderer { * @author Andreas Gohr <andi@splitbrain.org> */ function footnote_open() { - if ($this->capture){ + if($this->capture) { // move current content to store and record footnote $this->store = $this->doc; $this->doc = ''; @@ -157,141 +226,214 @@ class Doku_Renderer_metadata extends Doku_Renderer { * @author Andreas Gohr */ function footnote_close() { - if ($this->capture){ + if($this->capture) { // restore old content - $this->doc = $this->store; + $this->doc = $this->store; $this->store = ''; } } - function listu_open(){ - if ($this->capture) $this->doc .= DOKU_LF; - } - - function listu_close(){ - if ($this->capture && (strlen($this->doc) > 250)) $this->capture = false; - } - - function listo_open(){ - if ($this->capture) $this->doc .= DOKU_LF; - } - - function listo_close(){ - if ($this->capture && (strlen($this->doc) > 250)) $this->capture = false; + /** + * Open an unordered list + */ + function listu_open() { + $this->cdata(DOKU_LF); } - function listitem_open($level){ - if ($this->capture) $this->doc .= str_repeat(DOKU_TAB, $level).'* '; + /** + * Open an ordered list + */ + function listo_open() { + $this->cdata(DOKU_LF); } - function listitem_close(){ - if ($this->capture) $this->doc .= DOKU_LF; + /** + * Open a list item + * + * @param int $level the nesting level + */ + function listitem_open($level) { + $this->cdata(str_repeat(DOKU_TAB, $level).'* '); } - function listcontent_open(){} - function listcontent_close(){} - - function unformatted($text){ - if ($this->capture) $this->doc .= $text; + /** + * Close a list item + */ + function listitem_close() { + $this->cdata(DOKU_LF); } - function preformatted($text){ - if ($this->capture) $this->doc .= $text; + /** + * Output preformatted text + * + * @param string $text + */ + function preformatted($text) { + $this->cdata($text); } - function file($text, $lang = null, $file = null){ - if ($this->capture){ - $this->doc .= DOKU_LF.$text; - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Start a block quote + */ + function quote_open() { + $this->cdata(DOKU_LF.DOKU_TAB.'"'); } - function quote_open(){ - if ($this->capture) $this->doc .= DOKU_LF.DOKU_TAB.'"'; + /** + * Stop a block quote + */ + function quote_close() { + $this->cdata('"'.DOKU_LF); } - function quote_close(){ - if ($this->capture){ - $this->doc .= '"'; - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Display text as file content, optionally syntax highlighted + * + * @param string $text text to show + * @param string $lang programming language to use for syntax highlighting + * @param string $file file path label + */ + function file($text, $lang = null, $file = null) { + $this->cdata(DOKU_LF.$text.DOKU_LF); } - function code($text, $language = null, $file = null){ - if ($this->capture){ - $this->doc .= DOKU_LF.$text; - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Display text as code content, optionally syntax highlighted + * + * @param string $text text to show + * @param string $language programming language to use for syntax highlighting + * @param string $file file path label + */ + function code($text, $language = null, $file = null) { + $this->cdata(DOKU_LF.$text.DOKU_LF); } - function acronym($acronym){ - if ($this->capture) $this->doc .= $acronym; + /** + * Format an acronym + * + * Uses $this->acronyms + * + * @param string $acronym + */ + function acronym($acronym) { + $this->cdata($acronym); } - function smiley($smiley){ - if ($this->capture) $this->doc .= $smiley; + /** + * Format a smiley + * + * Uses $this->smiley + * + * @param string $smiley + */ + function smiley($smiley) { + $this->cdata($smiley); } - function entity($entity){ - if ($this->capture) $this->doc .= $entity; + /** + * Format an entity + * + * Entities are basically small text replacements + * + * Uses $this->entities + * + * @param string $entity + */ + function entity($entity) { + $this->cdata($entity); } - function multiplyentity($x, $y){ - if ($this->capture) $this->doc .= $x.'×'.$y; + /** + * Typographically format a multiply sign + * + * Example: ($x=640, $y=480) should result in "640×480" + * + * @param string|int $x first value + * @param string|int $y second value + */ + function multiplyentity($x, $y) { + $this->cdata($x.'×'.$y); } - function singlequoteopening(){ + /** + * Render an opening single quote char (language specific) + */ + function singlequoteopening() { global $lang; - if ($this->capture) $this->doc .= $lang['singlequoteopening']; + $this->cdata($lang['singlequoteopening']); } - function singlequoteclosing(){ + /** + * Render a closing single quote char (language specific) + */ + function singlequoteclosing() { global $lang; - if ($this->capture) $this->doc .= $lang['singlequoteclosing']; + $this->cdata($lang['singlequoteclosing']); } + /** + * Render an apostrophe char (language specific) + */ function apostrophe() { global $lang; - if ($this->capture) $this->doc .= $lang['apostrophe']; + $this->cdata($lang['apostrophe']); } - function doublequoteopening(){ + /** + * Render an opening double quote char (language specific) + */ + function doublequoteopening() { global $lang; - if ($this->capture) $this->doc .= $lang['doublequoteopening']; + $this->cdata($lang['doublequoteopening']); } - function doublequoteclosing(){ + /** + * Render an closinging double quote char (language specific) + */ + function doublequoteclosing() { global $lang; - if ($this->capture) $this->doc .= $lang['doublequoteclosing']; + $this->cdata($lang['doublequoteclosing']); } + /** + * Render a CamelCase link + * + * @param string $link The link name + * @see http://en.wikipedia.org/wiki/CamelCase + */ function camelcaselink($link) { $this->internallink($link, $link); } - function locallink($hash, $name = null){ + /** + * Render a page local link + * + * @param string $hash hash link identifier + * @param string $name name for the link + */ + function locallink($hash, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } } /** * keep track of internal links in $this->meta['relation']['references'] + * + * @param string $id page ID to link to. eg. 'wiki:syntax' + * @param string|array $name name for the link, array for media file */ - function internallink($id, $name = null){ + function internallink($id, $name = null) { global $ID; if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } $parts = explode('?', $id, 2); - if (count($parts) === 2) { + if(count($parts) === 2) { $id = $parts[0]; } @@ -299,7 +441,7 @@ class Doku_Renderer_metadata extends Doku_Renderer { // first resolve and clean up the $id resolve_pageid(getNS($ID), $id, $exists); - @list($page, $hash) = explode('#', $id, 2); + @list($page) = explode('#', $id, 2); // set metadata $this->meta['relation']['references'][$page] = $exists; @@ -307,84 +449,141 @@ class Doku_Renderer_metadata extends Doku_Renderer { // p_set_metadata($id, $data); // add link title to summary - if ($this->capture){ + if($this->capture) { $name = $this->_getLinkTitle($name, $default, $id); $this->doc .= $name; } } - function externallink($url, $name = null){ + /** + * Render an external link + * + * @param string $url full URL with scheme + * @param string|array $name name for the link, array for media file + */ + function externallink($url, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>'); + if($this->capture) { + $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>'); } } - function interwikilink($match, $name = null, $wikiName, $wikiUri){ + /** + * Render an interwiki link + * + * You may want to use $this->_resolveInterWiki() here + * + * @param string $match original link - probably not much use + * @param string|array $name name for the link, array for media file + * @param string $wikiName indentifier (shortcut) for the remote wiki + * @param string $wikiUri the fragment parsed from the original link + */ + function interwikilink($match, $name = null, $wikiName, $wikiUri) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - list($wikiUri, $hash) = explode('#', $wikiUri, 2); + if($this->capture) { + list($wikiUri) = explode('#', $wikiUri, 2); $name = $this->_getLinkTitle($name, $wikiUri); $this->doc .= $name; } } - function windowssharelink($url, $name = null){ + /** + * Link to windows share + * + * @param string $url the link + * @param string|array $name name for the link, array for media file + */ + function windowssharelink($url, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - if ($name) $this->doc .= $name; + if($this->capture) { + if($name) $this->doc .= $name; else $this->doc .= '<'.$url.'>'; } } - function emaillink($address, $name = null){ + /** + * Render a linked E-Mail Address + * + * Should honor $conf['mailguard'] setting + * + * @param string $address Email-Address + * @param string|array $name name for the link, array for media file + */ + function emaillink($address, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - if ($name) $this->doc .= $name; + if($this->capture) { + if($name) $this->doc .= $name; else $this->doc .= '<'.$address.'>'; } } - function internalmedia($src, $title=null, $align=null, $width=null, - $height=null, $cache=null, $linking=null){ - if ($this->capture && $title) $this->doc .= '['.$title.']'; + /** + * Render an internal media file + * + * @param string $src media ID + * @param string $title descriptive text + * @param string $align left|center|right + * @param int $width width of media in pixel + * @param int $height height of media in pixel + * @param string $cache cache|recache|nocache + * @param string $linking linkonly|detail|nolink + */ + function internalmedia($src, $title = null, $align = null, $width = null, + $height = null, $cache = null, $linking = null) { + if($this->capture && $title) $this->doc .= '['.$title.']'; $this->_firstimage($src); $this->_recordMediaUsage($src); } - function externalmedia($src, $title=null, $align=null, $width=null, - $height=null, $cache=null, $linking=null){ - if ($this->capture && $title) $this->doc .= '['.$title.']'; + /** + * Render an external media file + * + * @param string $src full media URL + * @param string $title descriptive text + * @param string $align left|center|right + * @param int $width width of media in pixel + * @param int $height height of media in pixel + * @param string $cache cache|recache|nocache + * @param string $linking linkonly|detail|nolink + */ + function externalmedia($src, $title = null, $align = null, $width = null, + $height = null, $cache = null, $linking = null) { + if($this->capture && $title) $this->doc .= '['.$title.']'; $this->_firstimage($src); } - function rss($url,$params) { + /** + * Render the output of an RSS feed + * + * @param string $url URL of the feed + * @param array $params Finetuning of the output + */ + function rss($url, $params) { $this->meta['relation']['haspart'][$url] = true; $this->meta['date']['valid']['age'] = - isset($this->meta['date']['valid']['age']) ? - min($this->meta['date']['valid']['age'],$params['refresh']) : - $params['refresh']; + isset($this->meta['date']['valid']['age']) ? + min($this->meta['date']['valid']['age'], $params['refresh']) : + $params['refresh']; } - //---------------------------------------------------------- - // Utils + #region Utils /** * Removes any Namespace from the given name but keeps @@ -392,35 +591,36 @@ class Doku_Renderer_metadata extends Doku_Renderer { * * @author Andreas Gohr <andi@splitbrain.org> */ - function _simpleTitle($name){ + function _simpleTitle($name) { global $conf; if(is_array($name)) return ''; - if($conf['useslash']){ + if($conf['useslash']) { $nssep = '[:;/]'; - }else{ + } else { $nssep = '[:;]'; } - $name = preg_replace('!.*'.$nssep.'!','',$name); + $name = preg_replace('!.*'.$nssep.'!', '', $name); //if there is a hash we use the anchor name only - $name = preg_replace('!.*#!','',$name); + $name = preg_replace('!.*#!', '', $name); return $name; } /** * Creates a linkid from a headline * + * @author Andreas Gohr <andi@splitbrain.org> * @param string $title The headline title * @param boolean $create Create a new unique ID? - * @author Andreas Gohr <andi@splitbrain.org> + * @return string */ - function _headerToLink($title, $create=false) { - if($create){ - return sectionID($title,$this->headers); - }else{ + function _headerToLink($title, $create = false) { + if($create) { + return sectionID($title, $this->headers); + } else { $check = false; - return sectionID($title,$check); + return sectionID($title, $check); } } @@ -428,17 +628,22 @@ class Doku_Renderer_metadata extends Doku_Renderer { * Construct a title and handle images in titles * * @author Harry Fuecks <hfuecks@gmail.com> + * @param string|array $title either string title or media array + * @param string $default default title if nothing else is found + * @param null|string $id linked page id (used to extract title from first heading) + * @return string title text */ - function _getLinkTitle($title, $default, $id=null) { - global $conf; - - $isImage = false; - if (is_array($title)){ - if($title['title']) return '['.$title['title'].']'; - } else if (is_null($title) || trim($title)==''){ - if (useHeading('content') && $id){ - $heading = p_get_first_heading($id,METADATA_DONT_RENDER); - if ($heading) return $heading; + function _getLinkTitle($title, $default, $id = null) { + if(is_array($title)) { + if($title['title']) { + return '['.$title['title'].']'; + } else { + return $default; + } + } else if(is_null($title) || trim($title) == '') { + if(useHeading('content') && $id) { + $heading = p_get_first_heading($id, METADATA_DONT_RENDER); + if($heading) return $heading; } return $default; } else { @@ -446,27 +651,39 @@ class Doku_Renderer_metadata extends Doku_Renderer { } } - function _firstimage($src){ + /** + * Remember first image + * + * @param string $src image URL or ID + */ + function _firstimage($src) { if($this->firstimage) return; global $ID; - list($src,$hash) = explode('#',$src,2); - if(!media_isexternal($src)){ - resolve_mediaid(getNS($ID),$src, $exists); + list($src) = explode('#', $src, 2); + if(!media_isexternal($src)) { + resolve_mediaid(getNS($ID), $src, $exists); } - if(preg_match('/.(jpe?g|gif|png)$/i',$src)){ + if(preg_match('/.(jpe?g|gif|png)$/i', $src)) { $this->firstimage = $src; } } + /** + * Store list of used media files in metadata + * + * @param string $src media ID + */ function _recordMediaUsage($src) { global $ID; - list ($src, $hash) = explode('#', $src, 2); - if (media_isexternal($src)) return; + list ($src) = explode('#', $src, 2); + if(media_isexternal($src)) return; resolve_mediaid(getNS($ID), $src, $exists); $this->meta['relation']['media'][$src] = $exists; } + + #endregion } //Setup VIM: ex: et ts=4 : |