From fcd3bb7cfe60027407e0af2ec83fafbf89758e9c Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 31 Jan 2010 18:00:38 +0100 Subject: fixed file header --- inc/indexer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 14af579bb..39592e8f4 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -1,6 +1,6 @@ -- cgit v1.2.3 From 16905344219a6293705b71cd526fad3ba07b04eb Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 31 Jan 2010 19:02:14 +0100 Subject: first attempt to centralize all include loading Classes are loaded throug PHP5's class autoloader, all other includes are just loaded by default. This skips a lot of require_once calls. Parser and Plugin stuff isn't handled by the class loader yet. --- inc/indexer.php | 3 --- 1 file changed, 3 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 39592e8f4..07b67c014 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -7,9 +7,6 @@ */ if(!defined('DOKU_INC')) die('meh.'); -require_once(DOKU_INC.'inc/io.php'); -require_once(DOKU_INC.'inc/utf8.php'); -require_once(DOKU_INC.'inc/parserutils.php'); // set the minimum token length to use in the index (note, this doesn't apply to numeric tokens) if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',2); -- cgit v1.2.3 From 229529655f061863ec76db9ea557fef8b1a5161b Mon Sep 17 00:00:00 2001 From: YoBoY Date: Tue, 23 Mar 2010 22:50:41 +0100 Subject: Limiting use of readdir in the idx_indexLengths function (v2). Each searches on the wiki use this function. Scanning the index directory eachtime is time consuming with a constant series of disk access. Switching a normal search to use file_exists 1 or more times, and not readdir all the directory. Switching a wildcard search to use a lengths.idx file containing all the word lengths used in the wiki, file generated if a new configuration parameter $conf[readdircache] is not 0 and fixed to a time in second. Creation of a new function idx_listIndexLengths to do this part. --- inc/indexer.php | 85 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 20 deletions(-) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 07b67c014..54277a88c 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -410,41 +410,86 @@ function idx_updateIndexLine($line,$pid,$count){ return join(':',$updated)."\n"; } +/** + * Get the list of lenghts indexed in the wiki + * + * Read the index directory or a cache file and returns + * a sorted array of lengths of the words used in the wiki. + * + * @author YoBoY + */ +function idx_listIndexLengths() { + global $conf; + // testing what we have to do, create a cache file or not. + if ($conf['readdircache'] == 0) { + $docache = false; + } else { + clearstatcache(); + if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) { + if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) { + $idx = array(); + foreach ( $lengths as $length) { + $idx[] = (int)$length; + } + return $idx; + } + } + $docache = true; + } + + if ($conf['readdircache'] == 0 or $docache ) { + $dir = @opendir($conf['indexdir']); + if($dir===false) + return array(); + $idx[] = array(); + while (($f = readdir($dir)) !== false) { + if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ + $i = substr($f,1,-4); + if (is_numeric($i)) + $idx[] = (int)$i; + } + } + closedir($dir); + sort($idx); + // we save this in a file. + if ($docache === true) { + $handle = @fopen($conf['indexdir'].'/lengths.idx','w'); + @fwrite($handle, implode("\n",$idx)); + @fclose($handle); + } + return $idx; + } + + return array(); +} + /** * Get the word lengths that have been indexed. * * Reads the index directory and returns an array of lengths * that there are indices for. * - * @author Tom N Harris + * @author YoBoY */ function idx_indexLengths(&$filter){ global $conf; - $dir = @opendir($conf['indexdir']); - if($dir===false) - return array(); $idx = array(); - if(is_array($filter)){ - while (($f = readdir($dir)) !== false) { - if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ - $i = substr($f,1,-4); - if (is_numeric($i) && isset($filter[(int)$i])) - $idx[] = (int)$i; + if (is_array($filter)){ + // testing if index files exists only + foreach ($filter as $key => $value) { + if (@file_exists($conf['indexdir']."/i$key.idx")) { + $idx[] = $key; } } - }else{ - // Exact match first. - if(@file_exists($conf['indexdir']."/i$filter.idx")) - $idx[] = $filter; - while (($f = readdir($dir)) !== false) { - if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ - $i = substr($f,1,-4); - if (is_numeric($i) && $i > $filter) - $idx[] = (int)$i; + } else { + $lengths = idx_listIndexLengths(); + foreach ( $lengths as $key => $length) { + // we keep all the values equal or superior + if ((int)$length >= (int)$filter) { + $idx[] = $length; } } } - closedir($dir); return $idx; } -- cgit v1.2.3 From 80423ab626c72923f347e2196ce660957dcc216f Mon Sep 17 00:00:00 2001 From: Adrian Lang Date: Wed, 16 Jun 2010 14:41:59 +0200 Subject: Perform quick search in title as well --- inc/indexer.php | 2 ++ 1 file changed, 2 insertions(+) (limited to 'inc/indexer.php') diff --git a/inc/indexer.php b/inc/indexer.php index 54277a88c..01ba76b08 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -305,6 +305,8 @@ function idx_addPage($page){ } unset($page_idx); // free memory + idx_saveIndexLine('title', '', $pid, p_get_first_heading($page, false)); + $pagewords = array(); // get word usage in page $words = idx_getPageWords($page); -- cgit v1.2.3