summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2007-02-26 18:55:29 +0100
committerAndreas Gohr <andi@splitbrain.org>2007-02-26 18:55:29 +0100
commitadb16d4f36e6e2cfb75ce8c8d42724c7705dfe3c (patch)
tree9250cd0d35584796bdeb9ea0cc342332b82c3b36
parent2cb8129d7f0d4f419837f2ccd9385aceae0cd8e3 (diff)
downloadrpg-adb16d4f36e6e2cfb75ce8c8d42724c7705dfe3c.tar.gz
rpg-adb16d4f36e6e2cfb75ce8c8d42724c7705dfe3c.tar.bz2
soted indexer is now default
darcs-hash:20070226175529-7ad00-4d3d984da1edbf2ded546cfbd7374f97f032d032.gz
-rw-r--r--conf/dokuwiki.php2
-rw-r--r--inc/indexer.php68
-rw-r--r--lib/plugins/config/lang/en/lang.php2
-rw-r--r--lib/plugins/config/settings/config.metadata.php1
4 files changed, 6 insertions, 67 deletions
diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php
index d442f0e93..a6ba7d6ce 100644
--- a/conf/dokuwiki.php
+++ b/conf/dokuwiki.php
@@ -134,5 +134,3 @@ $conf['ftp']['user'] = 'user';
$conf['ftp']['pass'] = 'password';
$conf['ftp']['root'] = '/home/user/htdocs';
-/* FIXME: delete when no longer needed */
-$conf['test_indexer'] = 0;
diff --git a/inc/indexer.php b/inc/indexer.php
index 09b43a1b5..5a88ac439 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -253,8 +253,8 @@ function idx_writeIndexLine($fh,$line,$pid,$count){
/**
* Modify an index line with new information
*
- * This returns a line of the index. It removes the
- * given document from the line and readds it if
+ * This returns a line of the index. It removes the
+ * given document from the line and readds it if
* $count is >0.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
@@ -324,72 +324,19 @@ function idx_indexLengths(&$filter){
/**
* Find the the index number of each search term.
*
- * There are two variation: Simple and Sorted.
- * The simple version just takes the words one at a time.
- * The sorted version will group together words that appear in the same index.
+ * This will group together words that appear in the same index.
* So it should perform better, because it only opens each index once.
* Actually, it's not that great. (in my experience) Probably because of the disk cache.
* And the sorted function does more work, making it slightly slower in some cases.
*
- * For now, you can choose to use the sorted version by setting $conf['test_indexer'] = 1
- * Eventually, the more worthy will be chosen and the loser cast into the deepest depths.
- *
* @param array $words The query terms. Words should only contain valid characters,
* with a '*' at either the beginning or end of the word (or both)
- * @param arrayref $result Set to word => array("length*id" ...), use this to merge the
+ * @param arrayref $result Set to word => array("length*id" ...), use this to merge the
* index locations with the appropriate query term.
* @return array Set to length => array(id ...)
*
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
-function idx_getIndexWordsSimple($words, &$result){
- // get word IDs
- $wids = array();
- foreach($words as $word){
- $result[$word] = array();
- $wild = 0;
- $xword = $word;
- $wlen = wordlen($word);
-
- // check for wildcards
- if(substr($xword,0,1) == '*'){
- $xword = substr($xword,1);
- $wild |= 1;
- $wlen -= 1;
- }
- if(substr($xword,-1,1) == '*'){
- $xword = substr($xword,0,-1);
- $wild |= 2;
- $wlen -= 1;
- }
- if ($wlen < 3 && $wild == 0 && !is_numeric($xword)) continue;
-
- // look for the ID(s) for the given word
- if($wild){ // handle wildcard search
- $ptn = preg_quote($xword,'/');
- if(($wild&1) == 0) $ptn = '^'.$ptn;
- if(($wild&2) == 0) $ptn = $ptn.'$';
- $ptn = '/'.$ptn.'/';
- foreach (idx_indexLengths($wlen) as $ixlen){
- $word_idx = idx_getIndex('w',$ixlen);
- foreach(array_keys(preg_grep($ptn,$word_idx)) as $wid){
- $wids[$ixlen][] = $wid;
- $result[$word][] = "$ixlen*$wid";
- }
- }
- }else{ // handle exact search
- $word_idx = idx_getIndex('w',$wlen);
- $wid = array_search("$word\n",$word_idx);
- if(is_int($wid)){
- $wids[$wlen][] = $wid;
- $result[$word][] = "$wlen*$wid";
- }else{
- $result[$word] = array();
- }
- }
- }
- return $wids;
-}
function idx_getIndexWordsSorted($words,&$result){
// parse and sort tokens
$tokens = array();
@@ -480,15 +427,12 @@ function idx_lookup($words){
$result = array();
- if(isset($conf['test_indexer']) && ($conf['test_indexer']&1))
- $wids = idx_getIndexWordsSorted($words, $result);
- else
- $wids = idx_getIndexWordsSimple($words, $result);
+ $wids = idx_getIndexWordsSorted($words, $result);
if(empty($wids)) return array();
// load known words and documents
$page_idx = idx_getIndex('page','');
-
+
$docs = array(); // hold docs found
foreach(array_keys($wids) as $wlen){
$wids[$wlen] = array_unique($wids[$wlen]);
diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php
index 825f5cf85..41f688d0a 100644
--- a/lib/plugins/config/lang/en/lang.php
+++ b/lib/plugins/config/lang/en/lang.php
@@ -184,5 +184,3 @@ $lang['compression_o_0'] = 'none';
$lang['compression_o_gz'] = 'gzip';
$lang['compression_o_bz2'] = 'bz2';
-/* FIXME: delete when no longer needed */
-$lang['test_indexer'] = 'New Indexer testing bitfield: 0x1 -> sorted searching';
diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php
index ee8db882f..20a62f72d 100644
--- a/lib/plugins/config/settings/config.metadata.php
+++ b/lib/plugins/config/settings/config.metadata.php
@@ -167,7 +167,6 @@ $meta['rss_update'] = array('numeric');
$meta['recent_days'] = array('numeric');
$meta['rss_show_summary'] = array('onoff');
$meta['broken_iua'] = array('onoff');
-$meta['test_indexer'] = array('numeric'); // FIXME: delete when no longer needed
$meta['_network'] = array('fieldset');
$meta['proxy____host'] = array('string','_pattern' => '#^[a-z0-9\-\.+]+?#i');