diff options
author | Andreas Gohr <andi@splitbrain.org> | 2014-09-29 20:17:39 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2014-09-29 20:17:39 +0200 |
commit | 6c1ae996157551dcf5bb4e7e8922677bb3d3d358 (patch) | |
tree | b3a4162367176a4e2ebadbd6ab31753c1b042be0 /inc/parser/metadata.php | |
parent | 35f3340eb3b989194a496861abfb5b3d3c9a630d (diff) | |
parent | 57271d078b9c433bec79d75cb44dadcafeae07df (diff) | |
download | rpg-6c1ae996157551dcf5bb4e7e8922677bb3d3d358.tar.gz rpg-6c1ae996157551dcf5bb4e7e8922677bb3d3d358.tar.bz2 |
Merge branch 'master' into stable
* master: (214 commits)
release preparations
postgresql auth plugin: correct function name
parse AT parameter: first strtotime then timestamp remove config option
move more strings to lang.php
move strings to lang.php
add placeholders for create page text
phpdocs parserutils
improve some scrutinizer issues
visibility plugin methods
use config cascade for loading of localizations
reformatting config cascade
add lang files to cascading
work around missing gzopen on certain systems #865
translation update
fix scrutinizer issues
fixed typos in docblock comments
do not allow empty passwords
clean user credentials from control chars
added filter method to INPUT class
translation update
...
Diffstat (limited to 'inc/parser/metadata.php')
-rw-r--r-- | inc/parser/metadata.php | 575 |
1 files changed, 396 insertions, 179 deletions
diff --git a/inc/parser/metadata.php b/inc/parser/metadata.php index 82a268fd6..25bf3fe3d 100644 --- a/inc/parser/metadata.php +++ b/inc/parser/metadata.php @@ -6,129 +6,198 @@ */ if(!defined('DOKU_INC')) die('meh.'); -if ( !defined('DOKU_LF') ) { +if(!defined('DOKU_LF')) { // Some whitespace to help View > Source - define ('DOKU_LF',"\n"); + define ('DOKU_LF', "\n"); } -if ( !defined('DOKU_TAB') ) { +if(!defined('DOKU_TAB')) { // Some whitespace to help View > Source - define ('DOKU_TAB',"\t"); + define ('DOKU_TAB', "\t"); } /** - * The Renderer + * The MetaData Renderer + * + * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content + * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and + * $persistent. + * + * Some simplified rendering to $doc is done to gather the page's (text-only) abstract. */ class Doku_Renderer_metadata extends Doku_Renderer { + /** the approximate byte lenght to capture for the abstract */ + const ABSTRACT_LEN = 250; + + /** the maximum UTF8 character length for the abstract */ + const ABSTRACT_MAX = 500; + + /** @var array transient meta data, will be reset on each rendering */ + public $meta = array(); + + /** @var array persistent meta data, will be kept until explicitly deleted */ + public $persistent = array(); - var $doc = ''; - var $meta = array(); - var $persistent = array(); + /** @var array the list of headers used to create unique link ids */ + protected $headers = array(); - var $headers = array(); - var $capture = true; - var $store = ''; - var $firstimage = ''; + /** @var string temporary $doc store */ + protected $store = ''; - function getFormat(){ + /** @var string keeps the first image reference */ + protected $firstimage = ''; + + /** @var bool determines if enough data for the abstract was collected, yet */ + public $capture = true; + + /** @var int number of bytes captured for abstract */ + protected $captured = 0; + + /** + * Returns the format produced by this renderer. + * + * @return string always 'metadata' + */ + function getFormat() { return 'metadata'; } - function document_start(){ + /** + * Initialize the document + * + * Sets up some of the persistent info about the page if it doesn't exist, yet. + */ + function document_start() { global $ID; $this->headers = array(); // external pages are missing create date - if(!$this->persistent['date']['created']){ + if(!$this->persistent['date']['created']) { $this->persistent['date']['created'] = filectime(wikiFN($ID)); } - if(!isset($this->persistent['user'])){ + if(!isset($this->persistent['user'])) { $this->persistent['user'] = ''; } - if(!isset($this->persistent['creator'])){ + if(!isset($this->persistent['creator'])) { $this->persistent['creator'] = ''; } // reset metadata to persistent values $this->meta = $this->persistent; } - function document_end(){ + /** + * Finalize the document + * + * Stores collected data in the metadata + */ + function document_end() { global $ID; // store internal info in metadata (notoc,nocache) $this->meta['internal'] = $this->info; - if (!isset($this->meta['description']['abstract'])){ + if(!isset($this->meta['description']['abstract'])) { // cut off too long abstracts $this->doc = trim($this->doc); - if (strlen($this->doc) > 500) - $this->doc = utf8_substr($this->doc, 0, 500).'…'; + if(strlen($this->doc) > self::ABSTRACT_MAX) { + $this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…'; + } $this->meta['description']['abstract'] = $this->doc; } $this->meta['relation']['firstimage'] = $this->firstimage; - if(!isset($this->meta['date']['modified'])){ + if(!isset($this->meta['date']['modified'])) { $this->meta['date']['modified'] = filemtime(wikiFN($ID)); } } + /** + * Render plain text data + * + * This function takes care of the amount captured data and will stop capturing when + * enough abstract data is available + * + * @param $text + */ + function cdata($text) { + if(!$this->capture) return; + + $this->doc .= $text; + + $this->captured += strlen($text); + if($this->captured > self::ABSTRACT_LEN) $this->capture = false; + } + + /** + * Add an item to the TOC + * + * @param string $id the hash link + * @param string $text the text to display + * @param int $level the nesting level + */ function toc_additem($id, $text, $level) { global $conf; //only add items within configured levels - if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']){ + if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) { // the TOC is one of our standard ul list arrays ;-) $this->meta['description']['tableofcontents'][] = array( - 'hid' => $id, - 'title' => $text, - 'type' => 'ul', - 'level' => $level-$conf['toptoclevel']+1 + 'hid' => $id, + 'title' => $text, + 'type' => 'ul', + 'level' => $level - $conf['toptoclevel'] + 1 ); } } + /** + * Render a heading + * + * @param string $text the text to display + * @param int $level header level + * @param int $pos byte position in the original source + */ function header($text, $level, $pos) { - if (!isset($this->meta['title'])) $this->meta['title'] = $text; + if(!isset($this->meta['title'])) $this->meta['title'] = $text; // add the header to the TOC - $hid = $this->_headerToLink($text,'true'); + $hid = $this->_headerToLink($text, 'true'); $this->toc_additem($hid, $text, $level); // add to summary - if ($this->capture && ($level > 1)) $this->doc .= DOKU_LF.$text.DOKU_LF; + $this->cdata(DOKU_LF.$text.DOKU_LF); } - function section_open($level){} - function section_close(){} - - function cdata($text){ - if ($this->capture) $this->doc .= $text; - } - - function p_open(){ - if ($this->capture) $this->doc .= DOKU_LF; + /** + * Open a paragraph + */ + function p_open() { + $this->cdata(DOKU_LF); } - function p_close(){ - if ($this->capture){ - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Close a paragraph + */ + function p_close() { + $this->cdata(DOKU_LF); } - function linebreak(){ - if ($this->capture) $this->doc .= DOKU_LF; + /** + * Create a line break + */ + function linebreak() { + $this->cdata(DOKU_LF); } - function hr(){ - if ($this->capture){ - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF.'----------'.DOKU_LF; - } + /** + * Create a horizontal line + */ + function hr() { + $this->cdata(DOKU_LF.'----------'.DOKU_LF); } /** @@ -141,7 +210,7 @@ class Doku_Renderer_metadata extends Doku_Renderer { * @author Andreas Gohr <andi@splitbrain.org> */ function footnote_open() { - if ($this->capture){ + if($this->capture) { // move current content to store and record footnote $this->store = $this->doc; $this->doc = ''; @@ -157,141 +226,214 @@ class Doku_Renderer_metadata extends Doku_Renderer { * @author Andreas Gohr */ function footnote_close() { - if ($this->capture){ + if($this->capture) { // restore old content - $this->doc = $this->store; + $this->doc = $this->store; $this->store = ''; } } - function listu_open(){ - if ($this->capture) $this->doc .= DOKU_LF; - } - - function listu_close(){ - if ($this->capture && (strlen($this->doc) > 250)) $this->capture = false; - } - - function listo_open(){ - if ($this->capture) $this->doc .= DOKU_LF; - } - - function listo_close(){ - if ($this->capture && (strlen($this->doc) > 250)) $this->capture = false; + /** + * Open an unordered list + */ + function listu_open() { + $this->cdata(DOKU_LF); } - function listitem_open($level){ - if ($this->capture) $this->doc .= str_repeat(DOKU_TAB, $level).'* '; + /** + * Open an ordered list + */ + function listo_open() { + $this->cdata(DOKU_LF); } - function listitem_close(){ - if ($this->capture) $this->doc .= DOKU_LF; + /** + * Open a list item + * + * @param int $level the nesting level + */ + function listitem_open($level) { + $this->cdata(str_repeat(DOKU_TAB, $level).'* '); } - function listcontent_open(){} - function listcontent_close(){} - - function unformatted($text){ - if ($this->capture) $this->doc .= $text; + /** + * Close a list item + */ + function listitem_close() { + $this->cdata(DOKU_LF); } - function preformatted($text){ - if ($this->capture) $this->doc .= $text; + /** + * Output preformatted text + * + * @param string $text + */ + function preformatted($text) { + $this->cdata($text); } - function file($text, $lang = null, $file = null){ - if ($this->capture){ - $this->doc .= DOKU_LF.$text; - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Start a block quote + */ + function quote_open() { + $this->cdata(DOKU_LF.DOKU_TAB.'"'); } - function quote_open(){ - if ($this->capture) $this->doc .= DOKU_LF.DOKU_TAB.'"'; + /** + * Stop a block quote + */ + function quote_close() { + $this->cdata('"'.DOKU_LF); } - function quote_close(){ - if ($this->capture){ - $this->doc .= '"'; - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Display text as file content, optionally syntax highlighted + * + * @param string $text text to show + * @param string $lang programming language to use for syntax highlighting + * @param string $file file path label + */ + function file($text, $lang = null, $file = null) { + $this->cdata(DOKU_LF.$text.DOKU_LF); } - function code($text, $language = null, $file = null){ - if ($this->capture){ - $this->doc .= DOKU_LF.$text; - if (strlen($this->doc) > 250) $this->capture = false; - else $this->doc .= DOKU_LF; - } + /** + * Display text as code content, optionally syntax highlighted + * + * @param string $text text to show + * @param string $language programming language to use for syntax highlighting + * @param string $file file path label + */ + function code($text, $language = null, $file = null) { + $this->cdata(DOKU_LF.$text.DOKU_LF); } - function acronym($acronym){ - if ($this->capture) $this->doc .= $acronym; + /** + * Format an acronym + * + * Uses $this->acronyms + * + * @param string $acronym + */ + function acronym($acronym) { + $this->cdata($acronym); } - function smiley($smiley){ - if ($this->capture) $this->doc .= $smiley; + /** + * Format a smiley + * + * Uses $this->smiley + * + * @param string $smiley + */ + function smiley($smiley) { + $this->cdata($smiley); } - function entity($entity){ - if ($this->capture) $this->doc .= $entity; + /** + * Format an entity + * + * Entities are basically small text replacements + * + * Uses $this->entities + * + * @param string $entity + */ + function entity($entity) { + $this->cdata($entity); } - function multiplyentity($x, $y){ - if ($this->capture) $this->doc .= $x.'×'.$y; + /** + * Typographically format a multiply sign + * + * Example: ($x=640, $y=480) should result in "640×480" + * + * @param string|int $x first value + * @param string|int $y second value + */ + function multiplyentity($x, $y) { + $this->cdata($x.'×'.$y); } - function singlequoteopening(){ + /** + * Render an opening single quote char (language specific) + */ + function singlequoteopening() { global $lang; - if ($this->capture) $this->doc .= $lang['singlequoteopening']; + $this->cdata($lang['singlequoteopening']); } - function singlequoteclosing(){ + /** + * Render a closing single quote char (language specific) + */ + function singlequoteclosing() { global $lang; - if ($this->capture) $this->doc .= $lang['singlequoteclosing']; + $this->cdata($lang['singlequoteclosing']); } + /** + * Render an apostrophe char (language specific) + */ function apostrophe() { global $lang; - if ($this->capture) $this->doc .= $lang['apostrophe']; + $this->cdata($lang['apostrophe']); } - function doublequoteopening(){ + /** + * Render an opening double quote char (language specific) + */ + function doublequoteopening() { global $lang; - if ($this->capture) $this->doc .= $lang['doublequoteopening']; + $this->cdata($lang['doublequoteopening']); } - function doublequoteclosing(){ + /** + * Render an closinging double quote char (language specific) + */ + function doublequoteclosing() { global $lang; - if ($this->capture) $this->doc .= $lang['doublequoteclosing']; + $this->cdata($lang['doublequoteclosing']); } + /** + * Render a CamelCase link + * + * @param string $link The link name + * @see http://en.wikipedia.org/wiki/CamelCase + */ function camelcaselink($link) { $this->internallink($link, $link); } - function locallink($hash, $name = null){ + /** + * Render a page local link + * + * @param string $hash hash link identifier + * @param string $name name for the link + */ + function locallink($hash, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } } /** * keep track of internal links in $this->meta['relation']['references'] + * + * @param string $id page ID to link to. eg. 'wiki:syntax' + * @param string|array $name name for the link, array for media file */ - function internallink($id, $name = null){ + function internallink($id, $name = null) { global $ID; if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } $parts = explode('?', $id, 2); - if (count($parts) === 2) { + if(count($parts) === 2) { $id = $parts[0]; } @@ -299,7 +441,7 @@ class Doku_Renderer_metadata extends Doku_Renderer { // first resolve and clean up the $id resolve_pageid(getNS($ID), $id, $exists); - @list($page, $hash) = explode('#', $id, 2); + @list($page) = explode('#', $id, 2); // set metadata $this->meta['relation']['references'][$page] = $exists; @@ -307,84 +449,141 @@ class Doku_Renderer_metadata extends Doku_Renderer { // p_set_metadata($id, $data); // add link title to summary - if ($this->capture){ + if($this->capture) { $name = $this->_getLinkTitle($name, $default, $id); $this->doc .= $name; } } - function externallink($url, $name = null){ + /** + * Render an external link + * + * @param string $url full URL with scheme + * @param string|array $name name for the link, array for media file + */ + function externallink($url, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>'); + if($this->capture) { + $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>'); } } - function interwikilink($match, $name = null, $wikiName, $wikiUri){ + /** + * Render an interwiki link + * + * You may want to use $this->_resolveInterWiki() here + * + * @param string $match original link - probably not much use + * @param string|array $name name for the link, array for media file + * @param string $wikiName indentifier (shortcut) for the remote wiki + * @param string $wikiUri the fragment parsed from the original link + */ + function interwikilink($match, $name = null, $wikiName, $wikiUri) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - list($wikiUri, $hash) = explode('#', $wikiUri, 2); + if($this->capture) { + list($wikiUri) = explode('#', $wikiUri, 2); $name = $this->_getLinkTitle($name, $wikiUri); $this->doc .= $name; } } - function windowssharelink($url, $name = null){ + /** + * Link to windows share + * + * @param string $url the link + * @param string|array $name name for the link, array for media file + */ + function windowssharelink($url, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - if ($name) $this->doc .= $name; + if($this->capture) { + if($name) $this->doc .= $name; else $this->doc .= '<'.$url.'>'; } } - function emaillink($address, $name = null){ + /** + * Render a linked E-Mail Address + * + * Should honor $conf['mailguard'] setting + * + * @param string $address Email-Address + * @param string|array $name name for the link, array for media file + */ + function emaillink($address, $name = null) { if(is_array($name)) { $this->_firstimage($name['src']); - if ($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); + if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']); } - if ($this->capture){ - if ($name) $this->doc .= $name; + if($this->capture) { + if($name) $this->doc .= $name; else $this->doc .= '<'.$address.'>'; } } - function internalmedia($src, $title=null, $align=null, $width=null, - $height=null, $cache=null, $linking=null){ - if ($this->capture && $title) $this->doc .= '['.$title.']'; + /** + * Render an internal media file + * + * @param string $src media ID + * @param string $title descriptive text + * @param string $align left|center|right + * @param int $width width of media in pixel + * @param int $height height of media in pixel + * @param string $cache cache|recache|nocache + * @param string $linking linkonly|detail|nolink + */ + function internalmedia($src, $title = null, $align = null, $width = null, + $height = null, $cache = null, $linking = null) { + if($this->capture && $title) $this->doc .= '['.$title.']'; $this->_firstimage($src); $this->_recordMediaUsage($src); } - function externalmedia($src, $title=null, $align=null, $width=null, - $height=null, $cache=null, $linking=null){ - if ($this->capture && $title) $this->doc .= '['.$title.']'; + /** + * Render an external media file + * + * @param string $src full media URL + * @param string $title descriptive text + * @param string $align left|center|right + * @param int $width width of media in pixel + * @param int $height height of media in pixel + * @param string $cache cache|recache|nocache + * @param string $linking linkonly|detail|nolink + */ + function externalmedia($src, $title = null, $align = null, $width = null, + $height = null, $cache = null, $linking = null) { + if($this->capture && $title) $this->doc .= '['.$title.']'; $this->_firstimage($src); } - function rss($url,$params) { + /** + * Render the output of an RSS feed + * + * @param string $url URL of the feed + * @param array $params Finetuning of the output + */ + function rss($url, $params) { $this->meta['relation']['haspart'][$url] = true; $this->meta['date']['valid']['age'] = - isset($this->meta['date']['valid']['age']) ? - min($this->meta['date']['valid']['age'],$params['refresh']) : - $params['refresh']; + isset($this->meta['date']['valid']['age']) ? + min($this->meta['date']['valid']['age'], $params['refresh']) : + $params['refresh']; } - //---------------------------------------------------------- - // Utils + #region Utils /** * Removes any Namespace from the given name but keeps @@ -392,35 +591,36 @@ class Doku_Renderer_metadata extends Doku_Renderer { * * @author Andreas Gohr <andi@splitbrain.org> */ - function _simpleTitle($name){ + function _simpleTitle($name) { global $conf; if(is_array($name)) return ''; - if($conf['useslash']){ + if($conf['useslash']) { $nssep = '[:;/]'; - }else{ + } else { $nssep = '[:;]'; } - $name = preg_replace('!.*'.$nssep.'!','',$name); + $name = preg_replace('!.*'.$nssep.'!', '', $name); //if there is a hash we use the anchor name only - $name = preg_replace('!.*#!','',$name); + $name = preg_replace('!.*#!', '', $name); return $name; } /** * Creates a linkid from a headline * + * @author Andreas Gohr <andi@splitbrain.org> * @param string $title The headline title * @param boolean $create Create a new unique ID? - * @author Andreas Gohr <andi@splitbrain.org> + * @return string */ - function _headerToLink($title, $create=false) { - if($create){ - return sectionID($title,$this->headers); - }else{ + function _headerToLink($title, $create = false) { + if($create) { + return sectionID($title, $this->headers); + } else { $check = false; - return sectionID($title,$check); + return sectionID($title, $check); } } @@ -428,17 +628,22 @@ class Doku_Renderer_metadata extends Doku_Renderer { * Construct a title and handle images in titles * * @author Harry Fuecks <hfuecks@gmail.com> + * @param string|array $title either string title or media array + * @param string $default default title if nothing else is found + * @param null|string $id linked page id (used to extract title from first heading) + * @return string title text */ - function _getLinkTitle($title, $default, $id=null) { - global $conf; - - $isImage = false; - if (is_array($title)){ - if($title['title']) return '['.$title['title'].']'; - } else if (is_null($title) || trim($title)==''){ - if (useHeading('content') && $id){ - $heading = p_get_first_heading($id,METADATA_DONT_RENDER); - if ($heading) return $heading; + function _getLinkTitle($title, $default, $id = null) { + if(is_array($title)) { + if($title['title']) { + return '['.$title['title'].']'; + } else { + return $default; + } + } else if(is_null($title) || trim($title) == '') { + if(useHeading('content') && $id) { + $heading = p_get_first_heading($id, METADATA_DONT_RENDER); + if($heading) return $heading; } return $default; } else { @@ -446,27 +651,39 @@ class Doku_Renderer_metadata extends Doku_Renderer { } } - function _firstimage($src){ + /** + * Remember first image + * + * @param string $src image URL or ID + */ + function _firstimage($src) { if($this->firstimage) return; global $ID; - list($src,$hash) = explode('#',$src,2); - if(!media_isexternal($src)){ - resolve_mediaid(getNS($ID),$src, $exists); + list($src) = explode('#', $src, 2); + if(!media_isexternal($src)) { + resolve_mediaid(getNS($ID), $src, $exists); } - if(preg_match('/.(jpe?g|gif|png)$/i',$src)){ + if(preg_match('/.(jpe?g|gif|png)$/i', $src)) { $this->firstimage = $src; } } + /** + * Store list of used media files in metadata + * + * @param string $src media ID + */ function _recordMediaUsage($src) { global $ID; - list ($src, $hash) = explode('#', $src, 2); - if (media_isexternal($src)) return; + list ($src) = explode('#', $src, 2); + if(media_isexternal($src)) return; resolve_mediaid(getNS($ID), $src, $exists); $this->meta['relation']['media'][$src] = $exists; } + + #endregion } //Setup VIM: ex: et ts=4 : |