From 65f40513c558b4d57493485465b12d493911f2a5 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Tue, 26 Apr 2011 23:40:24 +0200 Subject: Fix p_get_first_heading for when the search index is empty When the search index is empty, there was a warning and no headings were displayed. Now the headings are displayed and there is no warning anymore. --- inc/parserutils.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index 9b2d99328..84aaebb4e 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -682,10 +682,15 @@ function p_get_first_heading($id, $render=true){ if(count($pages) != count($titles)){ $titles = array_fill(0,count($pages),''); @unlink($conf['indexdir'].'/title.idx'); // will be rebuilt in inc/init.php + } else { + if (!empty($pages)) // array_combine throws a warning when the parameters are empty arrays + $title_index = array_combine($pages, $titles); + else + $title_index = array(); } - $title_index = array_combine($pages, $titles); } - return $title_index[$id]; + if (!empty($title_index)) // don't use the index when it obviously isn't working + return $title_index[$id]; } ++$count; -- cgit v1.2.3 From 67c15ecea77ff971dcb554de77cf1c25de1140a0 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sun, 8 May 2011 00:51:22 +0200 Subject: Change when metadata is rendered - only when really needed This changes the cache logic for metadata. It introduces a new mode that tries to avoid rendering the page again for simple requests but still updates the metadata when the page has been changed (but not when the cache timeout has been reached or purge is used). It simply compares the time of the last rendering with the last modified time of the page. The old boolean $render parameter has been changed into an int with three possible values. Compatibility for the old parameter is provided using a check with is_numeric using the following mapping: - false is still don't render (0 is the new value for that) - true is using that new render logic which means that many plugins will still work unchanged even if they request a lot of data using $render=true (1 is the new value for that providing full compatibility in the case 1 has been used instead of true) The default value for p_get_first_heading is now that new simple cache logic, the default value for getting metadata is the cache logic which should be used with care but is the only way to request (rendered) metadata that can change because of plugin installations or upgrades. --- inc/parserutils.php | 60 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 12 deletions(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index 84aaebb4e..d7451cee4 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -14,7 +14,16 @@ if(!defined('DOKU_INC')) die('meh.'); * metadata? When this limit is reached the title index is loaded and used for * all following requests. */ -if (!defined('P_GET_FIRST_HEADING_METADATA_LIMIT')) define('P_GET_FIRST_HEADING_METADATA_LIMIT', 10); +if (!defined('P_GET_FIRST_HEADING_METADATA_LIMIT')) define('P_GET_FIRST_HEADING_METADATA_LIMIT', 20); + +/** Don't render metadata even if it is outdated or doesn't exist */ +define('METADATA_DONT_RENDER', 0); +/** Render metadata when the page is really newer or the metadata doesn't exist. Uses just a simple check, + but should work pretty well for loading simple metadata values like the page title and avoids + rendering a lot of pages in one request. */ +define('METADATA_RENDER_USING_SIMPLE_CACHE', 1); +/** Render metadata using the metadata cache logic. */ +define('METADATA_RENDER_USING_CACHE', 2); /** * Returns the parsed Wikitext in XHTML for the given id and revision. @@ -229,13 +238,15 @@ function p_get_instructions($text){ * * @param string $id The id of the page the metadata should be returned from * @param string $key The key of the metdata value that shall be read (by default everything) - separate hierarchies by " " like "date created" - * @param boolean $render If the page should be rendererd when the cache can't be used - default true + * @param int $render If the page should be rendererd - possible values: + * METADATA_DONT_RENDER, METADATA_RENDER_USING_SIMPLE_CACHE, METADATA_RENDER_USING_CACHE, default: + * METADATA_RENDER_USING_CACHE * @return mixed The requested metadata fields * * @author Esther Brunner * @author Michael Hamann */ -function p_get_metadata($id, $key='', $render=true){ +function p_get_metadata($id, $key='', $render=METADATA_RENDER_USING_CACHE){ global $ID; // cache the current page @@ -244,14 +255,32 @@ function p_get_metadata($id, $key='', $render=true){ $cache = ($ID == $id); $meta = p_read_metadata($id, $cache); + if (!is_numeric($render)) { + if ($render) { + $render = METADATA_RENDER_USING_SIMPLE_CACHE; + } else { + $render = METADATA_DONT_RENDER; + } + } + // prevent recursive calls in the cache static $recursion = false; - if (!$recursion && $render){ + if (!$recursion && $render != METADATA_DONT_RENDER && page_exists($id)){ $recursion = true; $cachefile = new cache_renderer($id, wikiFN($id), 'metadata'); - if (page_exists($id) && !$cachefile->useCache()){ + $do_render = false; + if ($render == METADATA_RENDER_USING_SIMPLE_CACHE) { + $pagefn = wikiFN($id); + $metafn = metaFN($id, '.meta'); + if (!@file_exists($metafn) || @filemtime($pagefn) > @filemtime($cachefile->cache)) { + $do_render = true; + } + } elseif (!$cachefile->useCache()){ + $do_render = true; + } + if ($do_render) { $old_meta = $meta; $meta = p_render_metadata($id, $meta); // only update the file when the metadata has been changed @@ -647,18 +676,25 @@ function & p_get_renderer($mode) { /** * Gets the first heading from a file * + * After P_GET_FIRST_HEADING_METADATA_LIMIT requests for different pages the title + * index will be loaded and used instead. Use METADATA_DONT_RENDER when you are + * requesting a lot of titles, METADATA_RENDER_USING_CACHE when you think + * rendering the page although it hasn't changed might be needed (or also + * want to influence rendering using events) and METADATA_RENDER_USING_SIMPLE_CACHE + * otherwise. Use METADATA_RENDER_USING_CACHE with care as it could cause + * parsing and rendering a lot of pages in one request. + * * @param string $id dokuwiki page id - * @param bool $render rerender if first heading not known - * default: true -- must be set to false for calls from the metadata renderer to - * protects against loops and excessive resource usage when pages - * for which only a first heading is required will attempt to - * render metadata for all the pages for which they require first - * headings ... and so on. + * @param int $render rerender if first heading not known + * default: METADATA_RENDER_USING_SIMPLE_CACHE + * Possible values: METADATA_DONT_RENDER, + * METADATA_RENDER_USING_SIMPLE_CACHE, + * METADATA_RENDER_USING_CACHE * * @author Andreas Gohr * @author Michael Hamann */ -function p_get_first_heading($id, $render=true){ +function p_get_first_heading($id, $render=METADATA_RENDER_USING_SIMPLE_CACHE){ // counter how many titles have been requested using p_get_metadata static $count = 1; // the index of all titles, only loaded when many titles are requested -- cgit v1.2.3 From 65aa8490874917620bd9a9a773cd0fb7621afb41 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sun, 8 May 2011 11:44:12 +0200 Subject: Add more render/cache logic to the metadata code This adds a new rendering limit of currently 5 pages to the p_get_metadata function. This means that in one request not more than 3 pages will be parsed/rendered. Pages for which the cache can be used aren't counted. This should make the new cache modes safe to use and should provide backwards compatibility while keeping the advantage of rendering metadata on demand (i.e. imagine one included page out of 10 is updated, then the metadata for that page can be rendered, but when you request a purge of the cache not 10 pages are rendered). In this commit most of the changes to the p_get_first_heading function are reverted and the title index is no longer used. This makes the first heading functionality no longer depends on the search index of DokuWiki. Maybe it can be added again later when the indexer provides a proper API for getting metadata values for all or selected pages. The performance of the p_get_first_heading function should be almost back to the performance in Anteater as the simple cache of p_get_metadata is used and also the limit of p_get_metadata is of course applied. --- inc/parserutils.php | 115 ++++++++++++++++++++++++---------------------------- 1 file changed, 52 insertions(+), 63 deletions(-) (limited to 'inc/parserutils.php') diff --git a/inc/parserutils.php b/inc/parserutils.php index d7451cee4..abba89b5a 100644 --- a/inc/parserutils.php +++ b/inc/parserutils.php @@ -10,20 +10,43 @@ if(!defined('DOKU_INC')) die('meh.'); /** - * For how many different pages shall the first heading be loaded from the - * metadata? When this limit is reached the title index is loaded and used for - * all following requests. + * How many pages shall be rendered for getting metadata during one request + * at maximum? Note that this limit isn't respected when METADATA_RENDER_UNLIMITED + * is passed as render parameter to p_get_metadata. */ -if (!defined('P_GET_FIRST_HEADING_METADATA_LIMIT')) define('P_GET_FIRST_HEADING_METADATA_LIMIT', 20); +if (!defined('P_GET_METADATA_RENDER_LIMIT')) define('P_GET_METADATA_RENDER_LIMIT', 5); /** Don't render metadata even if it is outdated or doesn't exist */ define('METADATA_DONT_RENDER', 0); -/** Render metadata when the page is really newer or the metadata doesn't exist. Uses just a simple check, - but should work pretty well for loading simple metadata values like the page title and avoids - rendering a lot of pages in one request. */ +/** + * Render metadata when the page is really newer or the metadata doesn't exist. + * Uses just a simple check, but should work pretty well for loading simple + * metadata values like the page title and avoids rendering a lot of pages in + * one request. The P_GET_METADATA_RENDER_LIMIT is used in this mode. + * Use this if it is unlikely that the metadata value you are requesting + * does depend e.g. on pages that are included in the current page using + * the include plugin (this is very likely the case for the page title, but + * not for relation references). + */ define('METADATA_RENDER_USING_SIMPLE_CACHE', 1); -/** Render metadata using the metadata cache logic. */ +/** + * Render metadata using the metadata cache logic. The P_GET_METADATA_RENDER_LIMIT + * is used in this mode. Use this mode when you are requesting more complex + * metadata. Although this will cause rendering more often it might actually have + * the effect that less current metadata is returned as it is more likely than in + * the simple cache mode that metadata needs to be rendered for all pages at once + * which means that when the metadata for the page is requested that actually needs + * to be updated the limit might have been reached already. + */ define('METADATA_RENDER_USING_CACHE', 2); +/** + * Render metadata without limiting the number of pages for which metadata is + * rendered. Use this mode with care, normally it should only be used in places + * like the indexer or in cli scripts where the execution time normally isn't + * limited. This can be combined with the simple cache using + * METADATA_RENDER_USING_CACHE | METADATA_RENDER_UNLIMITED. + */ +define('METADATA_RENDER_UNLIMITED', 4); /** * Returns the parsed Wikitext in XHTML for the given id and revision. @@ -239,8 +262,9 @@ function p_get_instructions($text){ * @param string $id The id of the page the metadata should be returned from * @param string $key The key of the metdata value that shall be read (by default everything) - separate hierarchies by " " like "date created" * @param int $render If the page should be rendererd - possible values: - * METADATA_DONT_RENDER, METADATA_RENDER_USING_SIMPLE_CACHE, METADATA_RENDER_USING_CACHE, default: - * METADATA_RENDER_USING_CACHE + * METADATA_DONT_RENDER, METADATA_RENDER_USING_SIMPLE_CACHE, METADATA_RENDER_USING_CACHE + * METADATA_RENDER_UNLIMITED (also combined with the previous two options), + * default: METADATA_RENDER_USING_CACHE * @return mixed The requested metadata fields * * @author Esther Brunner @@ -248,6 +272,10 @@ function p_get_instructions($text){ */ function p_get_metadata($id, $key='', $render=METADATA_RENDER_USING_CACHE){ global $ID; + static $render_count = 0; + // track pages that have already been rendered in order to avoid rendering the same page + // again + static $rendered_pages = array(); // cache the current page // Benchmarking shows the current page's metadata is generally the only page metadata @@ -265,22 +293,26 @@ function p_get_metadata($id, $key='', $render=METADATA_RENDER_USING_CACHE){ // prevent recursive calls in the cache static $recursion = false; - if (!$recursion && $render != METADATA_DONT_RENDER && page_exists($id)){ + if (!$recursion && $render != METADATA_DONT_RENDER && !isset($rendered_pages[$id])&& page_exists($id)){ $recursion = true; $cachefile = new cache_renderer($id, wikiFN($id), 'metadata'); $do_render = false; - if ($render == METADATA_RENDER_USING_SIMPLE_CACHE) { - $pagefn = wikiFN($id); - $metafn = metaFN($id, '.meta'); - if (!@file_exists($metafn) || @filemtime($pagefn) > @filemtime($cachefile->cache)) { + if ($render & METADATA_RENDER_UNLIMITED || $render_count < P_GET_METADATA_RENDER_LIMIT) { + if ($render & METADATA_RENDER_USING_SIMPLE_CACHE) { + $pagefn = wikiFN($id); + $metafn = metaFN($id, '.meta'); + if (!@file_exists($metafn) || @filemtime($pagefn) > @filemtime($cachefile->cache)) { + $do_render = true; + } + } elseif (!$cachefile->useCache()){ $do_render = true; } - } elseif (!$cachefile->useCache()){ - $do_render = true; } if ($do_render) { + ++$render_count; + $rendered_pages[$id] = true; $old_meta = $meta; $meta = p_render_metadata($id, $meta); // only update the file when the metadata has been changed @@ -676,62 +708,19 @@ function & p_get_renderer($mode) { /** * Gets the first heading from a file * - * After P_GET_FIRST_HEADING_METADATA_LIMIT requests for different pages the title - * index will be loaded and used instead. Use METADATA_DONT_RENDER when you are - * requesting a lot of titles, METADATA_RENDER_USING_CACHE when you think - * rendering the page although it hasn't changed might be needed (or also - * want to influence rendering using events) and METADATA_RENDER_USING_SIMPLE_CACHE - * otherwise. Use METADATA_RENDER_USING_CACHE with care as it could cause - * parsing and rendering a lot of pages in one request. - * * @param string $id dokuwiki page id * @param int $render rerender if first heading not known * default: METADATA_RENDER_USING_SIMPLE_CACHE * Possible values: METADATA_DONT_RENDER, * METADATA_RENDER_USING_SIMPLE_CACHE, - * METADATA_RENDER_USING_CACHE + * METADATA_RENDER_USING_CACHE, + * METADATA_RENDER_UNLIMITED * * @author Andreas Gohr * @author Michael Hamann */ function p_get_first_heading($id, $render=METADATA_RENDER_USING_SIMPLE_CACHE){ - // counter how many titles have been requested using p_get_metadata - static $count = 1; - // the index of all titles, only loaded when many titles are requested - static $title_index = null; - // cache for titles requested using p_get_metadata - static $title_cache = array(); - - $id = cleanID($id); - - // check if this title has already been requested - if (isset($title_cache[$id])) - return $title_cache[$id]; - - // check if already too many titles have been requested and probably - // using the title index is better - if ($count > P_GET_FIRST_HEADING_METADATA_LIMIT) { - if (is_null($title_index)) { - $pages = array_map('rtrim', idx_getIndex('page', '')); - $titles = array_map('rtrim', idx_getIndex('title', '')); - // check for corrupt title index #FS2076 - if(count($pages) != count($titles)){ - $titles = array_fill(0,count($pages),''); - @unlink($conf['indexdir'].'/title.idx'); // will be rebuilt in inc/init.php - } else { - if (!empty($pages)) // array_combine throws a warning when the parameters are empty arrays - $title_index = array_combine($pages, $titles); - else - $title_index = array(); - } - } - if (!empty($title_index)) // don't use the index when it obviously isn't working - return $title_index[$id]; - } - - ++$count; - $title_cache[$id] = p_get_metadata($id,'title',$render); - return $title_cache[$id]; + return p_get_metadata(cleanID($id),'title',$render); } /** -- cgit v1.2.3