From 98214867894eba512bf47cba3439ccba3968f49b Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Mon, 22 Nov 2010 21:12:02 +0100 Subject: Render metadata when needed This changes fundamentally when metadata is rendered. This commit introduces a new cache file for every page that just contains a timestamp and is updated whenever the metadata of that page is rendered. Metadata is rendered when p_get_metadata is called and the last rendering has been before a page, metadata, configuration or renderer update or purge is set like in the xhtml renderer cache. Metadata is no longer automatically rendered when the xhtml renderer cache isn't used but will still be rendered when needed as p_get_metadata is called in the cache. Metadata is also no longer rendered in the indexer script when missing as that is already done by pageinfo() before anything else is done so the indexer script won't be called when there is no metadata file. --- inc/parserutils.php | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index a50e3f4f3..d4f55a6e4 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -221,6 +221,7 @@ function p_get_instructions($text){ * returns the metadata of a page * * @author Esther Brunner + * @author Michael Hamann */ function p_get_metadata($id, $key='', $render=false){ global $ID; @@ -231,10 +232,24 @@ function p_get_metadata($id, $key='', $render=false){ $cache = ($ID == $id); $meta = p_read_metadata($id, $cache); - // metadata has never been rendered before - do it! (but not for non-existent pages) - if ($render && !isset($meta['current']['description']['abstract']) && page_exists($id)){ - $meta = p_render_metadata($id, $meta); - p_save_metadata($id, $meta); + // prevent recursive calls in the cache + static $recursion = false; + if (!$recursion){ + $recursion = true; + + $cachefile = new cache_renderer($id, wikiFN($id), 'metadata'); + + if (page_exists($id) && !$cachefile->useCache()){ + $meta = p_render_metadata($id, $meta); + if (p_save_metadata($id, $meta)) { + // store a timestamp in order to make sure that the cachefile is touched + $cachefile->storeCache(time()); + } else { + msg('Unable to save metadata file. Hint: disk full; file permissions; safe_mode setting.',-1); + } + } + + $recursion = false; } $val = $meta['current']; -- cgit v1.2.3 From 69ba640bb6c63d2132ca401588053dfc507dbb1b Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Mon, 27 Dec 2010 22:51:06 +0100 Subject: Save metadata only when really changed This avoids disk writes when not needed and possibly also xhtml rendering when the metadata needs to be rendered but xhtml doesn't (unless the metadata file is changed). --- inc/parserutils.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index d4f55a6e4..fbdc2e3a9 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -240,8 +240,10 @@ function p_get_metadata($id, $key='', $render=false){ $cachefile = new cache_renderer($id, wikiFN($id), 'metadata'); if (page_exists($id) && !$cachefile->useCache()){ + $old_meta = $meta; $meta = p_render_metadata($id, $meta); - if (p_save_metadata($id, $meta)) { + // only update the file when the metadata has been changed + if ($meta == $old_meta || p_save_metadata($id, $meta)) { // store a timestamp in order to make sure that the cachefile is touched $cachefile->storeCache(time()); } else { -- cgit v1.2.3 From 4a81940267e4278153d3726b605286fd963084ec Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Mon, 10 Jan 2011 20:39:44 +0100 Subject: Activate the render parameter of p_get_metadata p_get_metadata has a $render parameter that has been disabled by the restructuring of metadata rendering. This change reactivates it so rendering metadata can be prevented. This is e.g. used in the search and in some plugins like indexmenu that use p_get_first_heading. The default of the parameter has been changed to true as otherwise the new caching structure won't work as almost all calls to p_get_metadata don't set the $render parameter. The indexer call to p_get_first_heading has been changed to set $render to true as in the indexer only one page will be rendered and the title in the index should really be the current one. This does not fix the problem that rendering pages with lots of links or displaying the index can cause the parsing/rendering of a lot of pages. --- inc/parserutils.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index fbdc2e3a9..9224dae10 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -220,10 +220,15 @@ function p_get_instructions($text){ /** * returns the metadata of a page * + * @param string $id The id of the page the metadata should be returned from + * @param string $key The key of the metdata value that shall be read (by default everything) - separate hierarchies by " " like "date created" + * @param boolean $render If the page should be rendererd when the cache can't be used - default true + * @return mixed The requested metadata fields + * * @author Esther Brunner * @author Michael Hamann */ -function p_get_metadata($id, $key='', $render=false){ +function p_get_metadata($id, $key='', $render=true){ global $ID; // cache the current page @@ -234,7 +239,7 @@ function p_get_metadata($id, $key='', $render=false){ // prevent recursive calls in the cache static $recursion = false; - if (!$recursion){ + if (!$recursion && $render){ $recursion = true; $cachefile = new cache_renderer($id, wikiFN($id), 'metadata'); -- cgit v1.2.3 From bf0c93c21103a02f9efe4c427b9fefd6c5732666 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Mon, 10 Jan 2011 21:19:11 +0100 Subject: Use title index for more than 11 p_first_heading calls This change makes p_get_first_heading load the title index when more than 11 requests that caused a call to p_get_metadata have already been done. This means that small pages and the breadcrums won't trigger the loading of the title index but for larger pages or the sitemap the title index will be used. This is necessary because every call to p_get_metadata can trigger the parsing and rendering of a whole page and there can be many calls when useheading is activated and e.g. the index/sitemap page is displayed. Additionally this adds a small title cache that caches titles requested from p_get_metadata. Further tests should be done how this affects memory usage and how often the index loading is triggered in order to see if that parameter should be adjusted. --- inc/parserutils.php | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index 9224dae10..b7359d7ef 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -625,9 +625,41 @@ function & p_get_renderer($mode) { * headings ... and so on. * * @author Andreas Gohr + * @author Michael Hamann */ function p_get_first_heading($id, $render=true){ - return p_get_metadata($id,'title',$render); + // counter how many titles have been requested using p_get_metadata + static $count = 0; + // the index of all titles, only loaded when many titles are requested + static $title_index = null; + // cache for titles requested using p_get_metadata + static $title_cache = array(); + + $id = cleanID($id); + + // check if this title has already been requested + if (isset($title_cache[$id])) + return $title_cache[$id]; + + // check if already too many titles have been requested and probably + // using the title index is better + if ($count > 10) { + if (is_null($title_index)) { + $pages = array_map('rtrim', idx_getIndex('page', '')); + $titles = array_map('rtrim', idx_getIndex('title', '')); + // check for corrupt title index #FS2076 + if(count($pages) != count($titles)){ + $titles = array_fill(0,count($pages),''); + @unlink($conf['indexdir'].'/title.idx'); // will be rebuilt in inc/init.php + } + $title_index = array_combine($pages, $titles); + } + return $title_index[$id]; + } + + ++$count; + $title_cache[$id] = p_get_metadata($id,'title',$render); + return $title_cache[$id]; } /** -- cgit v1.2.3 From ff725173bf45c47b1ed9778524710cce72b1d42d Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Mon, 10 Jan 2011 23:41:43 +0100 Subject: Add define for metadata usage limit in p_get_first_heading This commit introduces a new define P_GET_FIRST_HEADING_METADATA_LIMIT that can be set in preload.php in order to change the limit for how many pages the first heading shall be loaded from metadata in p_get_first_heading. Changing this is probably most interesting for Wikis with a lot of pages where loading the title index costs a significant amount of time and memory. --- inc/parserutils.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index b7359d7ef..6e349e984 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -9,6 +9,13 @@ if(!defined('DOKU_INC')) die('meh.'); +/** + * For how many different pages shall the first heading be loaded from the + * metadata? When this limit is reached the title index is loaded and used for + * all following requests. + */ +if (!defined('P_GET_FIRST_HEADING_METADATA_LIMIT')) define('P_GET_FIRST_HEADING_METADATA_LIMIT', 10); + /** * Returns the parsed Wikitext in XHTML for the given id and revision. * @@ -629,7 +636,7 @@ function & p_get_renderer($mode) { */ function p_get_first_heading($id, $render=true){ // counter how many titles have been requested using p_get_metadata - static $count = 0; + static $count = 1; // the index of all titles, only loaded when many titles are requested static $title_index = null; // cache for titles requested using p_get_metadata @@ -643,7 +650,7 @@ function p_get_first_heading($id, $render=true){ // check if already too many titles have been requested and probably // using the title index is better - if ($count > 10) { + if ($count > P_GET_FIRST_HEADING_METADATA_LIMIT) { if (is_null($title_index)) { $pages = array_map('rtrim', idx_getIndex('page', '')); $titles = array_map('rtrim', idx_getIndex('title', '')); -- cgit v1.2.3