summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoBoY <yoboy.leguesh@gmail.com>2010-03-23 22:50:41 +0100
committerAndreas Gohr <andi@splitbrain.org>2010-03-24 21:27:23 +0100
commit229529655f061863ec76db9ea557fef8b1a5161b (patch)
treea0363b7385e7a2e4f16e4c3bb65e1ee1475d2108
parent3371a8b471eea344a99f015cdf0ef9089b1f20ef (diff)
downloadrpg-229529655f061863ec76db9ea557fef8b1a5161b.tar.gz
rpg-229529655f061863ec76db9ea557fef8b1a5161b.tar.bz2
Limiting use of readdir in the idx_indexLengths function (v2).
Each searches on the wiki use this function. Scanning the index directory eachtime is time consuming with a constant series of disk access. Switching a normal search to use file_exists 1 or more times, and not readdir all the directory. Switching a wildcard search to use a lengths.idx file containing all the word lengths used in the wiki, file generated if a new configuration parameter $conf[readdircache] is not 0 and fixed to a time in second. Creation of a new function idx_listIndexLengths to do this part.
-rw-r--r--_test/cases/inc/indexer_idx_indexlengths.test.php121
-rw-r--r--conf/dokuwiki.php1
-rw-r--r--inc/indexer.php85
-rw-r--r--lib/plugins/config/lang/en/lang.php1
-rw-r--r--lib/plugins/config/settings/config.metadata.php1
5 files changed, 189 insertions, 20 deletions
diff --git a/_test/cases/inc/indexer_idx_indexlengths.test.php b/_test/cases/inc/indexer_idx_indexlengths.test.php
new file mode 100644
index 000000000..d1339a111
--- /dev/null
+++ b/_test/cases/inc/indexer_idx_indexlengths.test.php
@@ -0,0 +1,121 @@
+<?php
+
+require_once DOKU_INC.'inc/indexer.php';
+
+class indexer_idx_indexlengths_test extends UnitTestCase {
+
+ /**
+ * Test the function with an array of one value
+ */
+ function test_oneWord(){
+ global $conf;
+ $filter[8] = array('dokuwiki');
+ // one word should return the index
+ $ref[] = 8;
+ sort($ref);
+ $result = idx_indexLengths(&$filter);
+ sort($result);
+ $this->assertIdentical($result, $ref);
+ }
+
+ /**
+ * Test the function with an array of values
+ */
+ function test_moreWords() {
+ global $conf;
+ $filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered'));
+ // more words should return the indexes
+ $ref = array(4, 7, 8);
+ sort($ref);
+ $result = idx_indexLengths(&$filter);
+ sort($result);
+ $this->assertIdentical($result, $ref);
+ }
+
+ /**
+ * Test a minimal value in case of wildcard search
+ */
+ function test_minValue() {
+ global $conf;
+ $filter = 5;
+ // construction of the list of the index to compare
+ $dir = @opendir($conf['indexdir']);
+ $ref = array();
+ while (($f = readdir($dir)) !== false) {
+ if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
+ $i = substr($f,1,-4);
+ if (is_numeric($i) && $i >= $filter)
+ $ref[] = (int)$i;
+ }
+ }
+ closedir($dir);
+ sort($ref);
+ $result = idx_indexLengths(&$filter);
+ sort($result);
+ $this->assertIdentical($result, $ref);
+ }
+}
+
+class indexer_idx_indexlengths_time extends UnitTestCase {
+
+ /**
+ * Test the time improvments of the new function
+ * Time reference for 10000 call oneWords: 4,6s
+ * It's 90% faster
+ */
+ function test_oneWord(){
+ global $conf;
+ $filter[8] = array('dokuwiki');
+ $start = microtime(true);
+ for ($i = 0; $i < 10000; $i++) {
+ $result = idx_indexLengths(&$filter);
+ }
+ $end = microtime(true);
+ $time = $end - $start;
+ $timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls
+ echo "1) 10% ref : $timeref -> $time \n";
+ $this->assertTrue($time < $timeref);
+ }
+
+ /**
+ * Test the time improvments of the new function
+ * Time reference for 10000 call moreWords: 4,6s
+ * It's 90% faster
+ */
+ function test_moreWords() {
+ global $conf;
+ $filter = array( 4 => array('test'), 8 => array('dokuwiki'), 7 => array('powered'));
+ // more words should return the indexes
+ $start = microtime(true);
+ for ($i = 0; $i < 10000; $i++) {
+ $result = idx_indexLengths(&$filter);
+ }
+ $end = microtime(true);
+ $time = $end - $start;
+ $timeref = 4.6*0.10; // actual execution time of 4,6s for 10000 calls
+ echo "2) 10% ref : $timeref -> $time \n";
+ $this->assertTrue($time < $timeref);
+ }
+
+ /**
+ * Test the time improvments of the new function
+ * Time reference for 10000 call on minValue: 4,9s
+ * Sould be at least 65% faster
+ * Test fail with no cache
+ */
+ function test_minValue() {
+ global $conf;
+ $filter = 5;
+ $start = microtime(true);
+ for ($i = 0; $i < 10000; $i++) {
+ $result = idx_indexLengths(&$filter);
+ }
+ $end = microtime(true);
+ $time = $end - $start;
+ $timeref = 4.9 * 0.35; // actual execution time of 4,9s for 10000 calls
+ echo "3) 35% ref : $timeref -> $time \n";
+ $this->assertTrue($time < $timeref);
+ }
+}
+
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php
index 19397861c..d3823eb94 100644
--- a/conf/dokuwiki.php
+++ b/conf/dokuwiki.php
@@ -156,3 +156,4 @@ $conf['ftp']['user'] = 'user';
$conf['ftp']['pass'] = 'password';
$conf['ftp']['root'] = '/home/user/htdocs';
+$conf['readdircache'] = 0; //time cache in second for the readdir opération, 0 to deactivate.
diff --git a/inc/indexer.php b/inc/indexer.php
index 07b67c014..54277a88c 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -411,40 +411,85 @@ function idx_updateIndexLine($line,$pid,$count){
}
/**
+ * Get the list of lenghts indexed in the wiki
+ *
+ * Read the index directory or a cache file and returns
+ * a sorted array of lengths of the words used in the wiki.
+ *
+ * @author YoBoY <yoboy.leguesh@gmail.com>
+ */
+function idx_listIndexLengths() {
+ global $conf;
+ // testing what we have to do, create a cache file or not.
+ if ($conf['readdircache'] == 0) {
+ $docache = false;
+ } else {
+ clearstatcache();
+ if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) {
+ if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) {
+ $idx = array();
+ foreach ( $lengths as $length) {
+ $idx[] = (int)$length;
+ }
+ return $idx;
+ }
+ }
+ $docache = true;
+ }
+
+ if ($conf['readdircache'] == 0 or $docache ) {
+ $dir = @opendir($conf['indexdir']);
+ if($dir===false)
+ return array();
+ $idx[] = array();
+ while (($f = readdir($dir)) !== false) {
+ if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
+ $i = substr($f,1,-4);
+ if (is_numeric($i))
+ $idx[] = (int)$i;
+ }
+ }
+ closedir($dir);
+ sort($idx);
+ // we save this in a file.
+ if ($docache === true) {
+ $handle = @fopen($conf['indexdir'].'/lengths.idx','w');
+ @fwrite($handle, implode("\n",$idx));
+ @fclose($handle);
+ }
+ return $idx;
+ }
+
+ return array();
+}
+
+/**
* Get the word lengths that have been indexed.
*
* Reads the index directory and returns an array of lengths
* that there are indices for.
*
- * @author Tom N Harris <tnharris@whoopdedo.org>
+ * @author YoBoY <yoboy.leguesh@gmail.com>
*/
function idx_indexLengths(&$filter){
global $conf;
- $dir = @opendir($conf['indexdir']);
- if($dir===false)
- return array();
$idx = array();
- if(is_array($filter)){
- while (($f = readdir($dir)) !== false) {
- if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
- $i = substr($f,1,-4);
- if (is_numeric($i) && isset($filter[(int)$i]))
- $idx[] = (int)$i;
+ if (is_array($filter)){
+ // testing if index files exists only
+ foreach ($filter as $key => $value) {
+ if (@file_exists($conf['indexdir']."/i$key.idx")) {
+ $idx[] = $key;
}
}
- }else{
- // Exact match first.
- if(@file_exists($conf['indexdir']."/i$filter.idx"))
- $idx[] = $filter;
- while (($f = readdir($dir)) !== false) {
- if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
- $i = substr($f,1,-4);
- if (is_numeric($i) && $i > $filter)
- $idx[] = (int)$i;
+ } else {
+ $lengths = idx_listIndexLengths();
+ foreach ( $lengths as $key => $length) {
+ // we keep all the values equal or superior
+ if ((int)$length >= (int)$filter) {
+ $idx[] = $length;
}
}
}
- closedir($dir);
return $idx;
}
diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php
index 7198ca36d..2bcd17c12 100644
--- a/lib/plugins/config/lang/en/lang.php
+++ b/lib/plugins/config/lang/en/lang.php
@@ -234,3 +234,4 @@ $lang['useheading_o_navigation'] = 'Navigation Only';
$lang['useheading_o_content'] = 'Wiki Content Only';
$lang['useheading_o_1'] = 'Always';
+$lang['readdircache'] = 'Maximum age for readdir cache (sec)';
diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php
index 2f91b0ad8..cea191f56 100644
--- a/lib/plugins/config/settings/config.metadata.php
+++ b/lib/plugins/config/settings/config.metadata.php
@@ -188,6 +188,7 @@ $meta['rss_show_summary'] = array('onoff');
$meta['broken_iua'] = array('onoff');
$meta['xsendfile'] = array('multichoice','_choices' => array(0,1,2,3));
$meta['renderer_xhtml'] = array('renderer','_format' => 'xhtml','_choices' => array('xhtml'));
+$meta['readdircache'] = array('numeric');
$meta['_network'] = array('fieldset');
$meta['proxy____host'] = array('string','_pattern' => '#^(|[a-z0-9\-\.+]+)$#i');