7 files changed, 111 insertions, 121 deletions
diff --git a/bin/indexer.php b/bin/indexer.php
index 497c6146a..0d523df6e 100755
--- a/bin/indexer.php
+++ b/bin/indexer.php
@@ -24,6 +24,7 @@ if ( $OPTS->isError() ) {
 }
 $CLEAR = false;
 $QUIET = false;
+$INDEXER = null;
 foreach ($OPTS->options as $key => $val) {
     switch ($key) {
         case 'h':
@@ -66,6 +67,9 @@ function _usage() {
 
 function _update(){
     global $conf;
+    global $INDEXER;
+
+    $INDEXER = idx_get_indexer();
 
     $data = array();
     _quietecho("Searching pages... ");
@@ -78,25 +82,47 @@ function _update(){
 }
 
 function _index($id){
+    global $INDEXER;
     global $CLEAR;
+    global $QUIET;
 
     // if not cleared only update changed and new files
     if(!$CLEAR){
         $idxtag = metaFN($id,'.indexed');
         if(@file_exists($idxtag)){
             if(io_readFile($idxtag) == idx_get_version()){
-                $last = @filemtime(metaFN($id,'.indexed'));
+                $last = @filemtime($idxtag);
                 if($last > @filemtime(wikiFN($id))) return;
             }
         }
     }
 
-    _lock();
     _quietecho("$id... ");
-    idx_addPage($id);
-    io_saveFile(metaFN($id,'.indexed'), idx_get_version());
+    $body = '';
+    $data = array($id, $body);
+    $evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
+    if ($evt->advise_before()) $data[1] = $data[1] . " " . rawWiki($id);
+    $evt->advise_after();
+    unset($evt);
+    list($id,$body) = $data;
+    $said = false;
+    while(true) {
+        $result = $INDEXER->addPageWords($id, $body);
+        if ($result == "locked") {
+            if($said){
+                _quietecho(".");
+            }else{
+                _quietecho("Waiting for lockfile (max. 5 min)");
+                $said = true;
+            }
+            sleep(15);
+        } else {
+            break;
+        }
+    }
+    if ($result)
+        io_saveFile(metaFN($id,'.indexed'), idx_get_version());
     _quietecho("done.\n");
-    _unlock();
 }
 
 /**
@@ -141,7 +167,7 @@ function _clearindex(){
     _lock();
     _quietecho("Clearing index... ");
     io_saveFile($conf['indexdir'].'/page.idx','');
-    io_saveFile($conf['indexdir'].'/title.idx','');
+    //io_saveFile($conf['indexdir'].'/title.idx','');
     $dir = @opendir($conf['indexdir']);
     if($dir!==false){
         while(($f = readdir($dir)) !== false){
@@ -150,6 +176,7 @@ function _clearindex(){
                 @unlink($conf['indexdir']."/$f");
         }
     }
+    @unlink($conf['indexdir'].'/lengths.idx');
     _quietecho("done.\n");
     _unlock();
 }
diff --git a/inc/Sitemapper.php b/inc/Sitemapper.php
index 47a3fedb5..bbe1caf26 100644
--- a/inc/Sitemapper.php
+++ b/inc/Sitemapper.php
@@ -45,7 +45,7 @@ class Sitemapper {
 
         dbglog("Sitemapper::generate(): using $sitemap"); // FIXME: Only in debug mode
 
-        $pages = idx_getIndex('page', '');
+        $pages = idx_get_indexer()->getPages();
         dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages');
         $items = array();
 
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 7ace3a724..0411b9f99 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -36,19 +36,21 @@ function ft_pageSearch($query,&$highlight){
  * @author Kazutaka Miyasaka <kazmiya@gmail.com>
  */
 function _ft_pageSearch(&$data) {
+    $Indexer = idx_get_indexer();
+
     // parse the given query
-    $q = ft_queryParser($data['query']);
+    $q = ft_queryParser($Indexer, $data['query']);
     $data['highlight'] = $q['highlight'];
 
     if (empty($q['parsed_ary'])) return array();
 
     // lookup all words found in the query
-    $lookup = idx_lookup($q['words']);
+    $lookup = $Indexer->lookup($q['words']);
 
     // get all pages in this dokuwiki site (!: includes nonexistent pages)
     $pages_all = array();
-    foreach (idx_getIndex('page', '') as $id) {
-        $pages_all[trim($id)] = 0; // base: 0 hit
+    foreach ($Indexer->getPages() as $id) {
+        $pages_all[$id] = 0; // base: 0 hit
     }
 
     // process the query
@@ -126,15 +128,12 @@ function _ft_pageSearch(&$data) {
  * evaluates the instructions of the found pages
  */
 function ft_backlinks($id){
-    global $conf;
-    $swfile   = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
-    $stopwords = @file_exists($swfile) ? file($swfile) : array();
-
     $result = array();
 
     // quick lookup of the pagename
+    // FIXME use metadata key lookup
     $page    = noNS($id);
-    $matches = idx_lookup(idx_tokenizer($page,$stopwords));  // pagename may contain specials (_ or .)
+    $matches = idx_lookup(idx_tokenizer($page));  // pagename may contain specials (_ or .)
     $docs    = array_keys(ft_resultCombine(array_values($matches)));
     $docs    = array_filter($docs,'isVisiblePage'); // discard hidden pages
     if(!count($docs)) return $result;
@@ -168,17 +167,14 @@ function ft_backlinks($id){
  * Aborts after $max found results
  */
 function ft_mediause($id,$max){
-    global $conf;
-    $swfile   = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
-    $stopwords = @file_exists($swfile) ? file($swfile) : array();
-
     if(!$max) $max = 1; // need to find at least one
 
     $result = array();
 
     // quick lookup of the mediafile
+    // FIXME use metadata key lookup
     $media   = noNS($id);
-    $matches = idx_lookup(idx_tokenizer($media,$stopwords));
+    $matches = idx_lookup(idx_tokenizer($media));
     $docs    = array_keys(ft_resultCombine(array_values($matches)));
     if(!count($docs)) return $result;
 
@@ -229,7 +225,6 @@ function ft_pageLookup($id, $in_ns=false, $in_title=false){
 }
 
 function _ft_pageLookup(&$data){
-    global $conf;
     // split out original parameters
     $id = $data['id'];
     if (preg_match('/(?:^| )@(\w+)/', $id, $matches)) {
@@ -239,29 +234,27 @@ function _ft_pageLookup(&$data){
 
     $in_ns    = $data['in_ns'];
     $in_title = $data['in_title'];
+    $cleaned = cleanID($id);
 
-    $pages  = array_map('rtrim', idx_getIndex('page', ''));
-    $titles = array_map('rtrim', idx_getIndex('title', ''));
-    // check for corrupt title index #FS2076
-    if(count($pages) != count($titles)){
-        $titles = array_fill(0,count($pages),'');
-        @unlink($conf['indexdir'].'/title.idx'); // will be rebuilt in inc/init.php
-    }
-    $pages = array_combine($pages, $titles);
+    $Indexer = idx_get_indexer();
+    $page_idx = $Indexer->getPages();
 
-    $cleaned = cleanID($id);
+    $pages = array();
     if ($id !== '' && $cleaned !== '') {
-        foreach ($pages as $p_id => $p_title) {
-            if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false) &&
-                (!$in_title || (stripos($p_title, $id) === false)) ) {
-                unset($pages[$p_id]);
+        foreach ($page_idx as $p_id) {
+            if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) {
+                if (!isset($pages[$p_id]))
+                    $pages[$p_id] = p_get_first_heading($p_id, false);
             }
         }
+        //if ($in_title)
+        //    $titles = $Indexer->lookupKey('title', "*$id*");
     }
     if (isset($ns)) {
-        foreach (array_keys($pages) as $p_id) {
-            if (strpos($p_id, $ns) !== 0) {
-                unset($pages[$p_id]);
+        foreach ($page_idx as $p_id) {
+            if (strpos($p_id, $ns) === 0) {
+                if (!isset($pages[$p_id]))
+                    $pages[$p_id] = p_get_first_heading($p_id, false);
             }
         }
     }
@@ -494,11 +487,7 @@ function ft_resultComplement($args) {
  * @author Andreas Gohr <andi@splitbrain.org>
  * @author Kazutaka Miyasaka <kazmiya@gmail.com>
  */
-function ft_queryParser($query){
-    global $conf;
-    $swfile    = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
-    $stopwords = @file_exists($swfile) ? file($swfile) : array();
-
+function ft_queryParser($Indexer, $query){
     /**
      * parse a search query and transform it into intermediate representation
      *
@@ -544,7 +533,7 @@ function ft_queryParser($query){
         if (preg_match('/^(-?)"(.+)"$/u', $term, $matches)) {
             // phrase-include and phrase-exclude
             $not = $matches[1] ? 'NOT' : '';
-            $parsed = $not.ft_termParser($matches[2], $stopwords, false, true);
+            $parsed = $not.ft_termParser($Indexer, $matches[2], false, true);
         } else {
             // fix incomplete phrase
             $term = str_replace('"', ' ', $term);
@@ -591,10 +580,10 @@ function ft_queryParser($query){
                     $parsed .= '(N+:'.$matches[1].')';
                 } elseif (preg_match('/^-(.+)$/', $token, $matches)) {
                     // word-exclude
-                    $parsed .= 'NOT('.ft_termParser($matches[1], $stopwords).')';
+                    $parsed .= 'NOT('.ft_termParser($Indexer, $matches[1]).')';
                 } else {
                     // word-include
-                    $parsed .= ft_termParser($token, $stopwords);
+                    $parsed .= ft_termParser($Indexer, $token);
                 }
             }
         }
@@ -728,18 +717,18 @@ function ft_queryParser($query){
  *
  * @author Kazutaka Miyasaka <kazmiya@gmail.com>
  */
-function ft_termParser($term, &$stopwords, $consider_asian = true, $phrase_mode = false) {
+function ft_termParser($Indexer, $term, $consider_asian = true, $phrase_mode = false) {
     $parsed = '';
     if ($consider_asian) {
         // successive asian characters need to be searched as a phrase
         $words = preg_split('/('.IDX_ASIAN.'+)/u', $term, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
         foreach ($words as $word) {
             if (preg_match('/'.IDX_ASIAN.'/u', $word)) $phrase_mode = true;
-            $parsed .= ft_termParser($word, $stopwords, false, $phrase_mode);
+            $parsed .= ft_termParser($Indexer, $word, false, $phrase_mode);
         }
     } else {
         $term_noparen = str_replace(array('(', ')'), ' ', $term);
-        $words = idx_tokenizer($term_noparen, $stopwords, true);
+        $words = $Indexer->tokenizer($term_noparen, true);
 
         // W_: no need to highlight
         if (empty($words)) {
diff --git a/inc/indexer.php b/inc/indexer.php
index 099b7e9fc..a61f3772a 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -97,7 +97,8 @@ class Doku_Indexer {
      * @author Andreas Gohr <andi@splitbrain.org>
      */
     public function addPageWords($page, $text) {
-        $this->_lock();
+        if (!$this->_lock())
+            return "locked";
 
         // load known documents
         $page_idx = $this->_addIndexKey('page', '', $page);
@@ -348,12 +349,12 @@ class Doku_Indexer {
      * in the returned list is an array with the page names as keys and the
      * number of times that token appeas on the page as value.
      *
-     * @param array     $tokens list of words to search for
+     * @param arrayref  $tokens list of words to search for
      * @return array            list of page names with usage counts
      * @author Tom N Harris <tnharris@whoopdedo.org>
      * @author Andreas Gohr <andi@splitbrain.org>
      */
-    public function lookup($tokens) {
+    public function lookup(&$tokens) {
         $result = array();
         $wids = $this->_getIndexWords($tokens, $result);
         if (empty($wids)) return array();
@@ -397,10 +398,11 @@ class Doku_Indexer {
      * @param string    $key    name of the metadata key to look for
      * @param string    $value  search term to look for
      * @param callback  $func   comparison function
-     * @return array            list with page names
+     * @return array            list with page names, keys are query values if more than one given
      * @author Tom N Harris <tnharris@whoopdedo.org>
      */
     public function lookupKey($key, $value, $func=null) {
+        return array();
     }
 
     /**
@@ -411,12 +413,12 @@ class Doku_Indexer {
      * The $result parameter can be used to merge the index locations with
      * the appropriate query term.
      *
-     * @param array     $words  The query terms.
+     * @param arrayref  $words  The query terms.
      * @param arrayref  $result Set to word => array("length*id" ...)
      * @return array            Set to length => array(id ...)
      * @author Tom N Harris <tnharris@whoopdedo.org>
      */
-    private function _getIndexWords($words, &$result) {
+    private function _getIndexWords(&$words, &$result) {
         $tokens = array();
         $tokenlength = array();
         $tokenwild = array();
@@ -807,7 +809,7 @@ class Doku_Indexer {
  * @return object               a Doku_Indexer
  * @author Tom N Harris <tnharris@whoopdedo.org>
  */
-function & idx_get_indexer() {
+function idx_get_indexer() {
     static $Indexer = null;
     if (is_null($Indexer)) {
         $Indexer = new Doku_Indexer();
@@ -841,10 +843,23 @@ function & idx_get_stopwords() {
  * Locking is handled internally.
  *
  * @param string        $page   name of the page to index
+ * @param boolean       $verbose    print status messages
  * @return boolean              the function completed successfully
  * @author Tom N Harris <tnharris@whoopdedo.org>
  */
-function idx_addPage($page) {
+function idx_addPage($page, $verbose=false) {
+    // check if indexing needed
+    $idxtag = metaFN($page,'.indexed');
+    if(@file_exists($idxtag)){
+        if(trim(io_readFile($idxtag)) == idx_get_version()){
+            $last = @filemtime($idxtag);
+            if($last > @filemtime(wikiFN($ID))){
+                if ($verbose) print("Indexer: index for $page up to date".DOKU_LF);
+                return false;
+            }
+        }
+    }
+
     $body = '';
     $data = array($page, $body);
     $evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
@@ -853,8 +868,19 @@ function idx_addPage($page) {
     unset($evt);
     list($page,$body) = $data;
 
-    $Indexer =& idx_get_indexer();
-    return $Indexer->addPageWords($page, $body);
+    $Indexer = idx_get_indexer();
+    $result = $Indexer->addPageWords($page, $body);
+    if ($result == "locked") {
+        if ($verbose) print("Indexer: locked".DOKU_LF);
+        return false;
+    }
+    if ($result)
+        io_saveFile(metaFN($page,'.indexed'), idx_get_version());
+    if ($verbose) {
+        print("Indexer: finished".DOKU_LF);
+        return true;
+    }
+    return $result;
 }
 
 /**
@@ -866,11 +892,11 @@ function idx_addPage($page) {
  * Important: No ACL checking is done here! All results are
  *            returned, regardless of permissions
  *
- * @param array         $words  list of words to search for
+ * @param arrayref      $words  list of words to search for
  * @return array                list of pages found, associated with the search terms
  */
-function idx_lookup($words) {
-    $Indexer =& idx_get_indexer();
+function idx_lookup(&$words) {
+    $Indexer = idx_get_indexer();
     return $Indexer->lookup($words);
 }
 
@@ -879,7 +905,7 @@ function idx_lookup($words) {
  *
  */
 function idx_tokenizer($string, $wc=false) {
-    $Indexer =& idx_get_indexer();
+    $Indexer = idx_get_indexer();
     return $Indexer->tokenizer($string, $wc);
 }
 
diff --git a/inc/init.php b/inc/init.php
index ed4409729..1dc31a31f 100644
--- a/inc/init.php
+++ b/inc/init.php
@@ -276,6 +276,7 @@ function init_files(){
     }
 
     # create title index (needs to have same length as page.idx)
+    /*
     $file = $conf['indexdir'].'/title.idx';
     if(!@file_exists($file)){
         $pages = file($conf['indexdir'].'/page.idx');
@@ -290,6 +291,7 @@ function init_files(){
             nice_die("$file is not writable. Check your permissions settings!");
         }
     }
+    */
 }
 
 /**
diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php
index 55d860296..a5a7d6b2a 100644
--- a/lib/exe/indexer.php
+++ b/lib/exe/indexer.php
@@ -134,41 +134,8 @@ function runIndexer(){
 
     if(!$ID) return false;
 
-    // check if indexing needed
-    $idxtag = metaFN($ID,'.indexed');
-    if(@file_exists($idxtag)){
-        if(trim(io_readFile($idxtag)) == idx_get_version()){
-            $last = @filemtime($idxtag);
-            if($last > @filemtime(wikiFN($ID))){
-                print "runIndexer(): index for $ID up to date".NL;
-                return false;
-            }
-        }
-    }
-
-    // try to aquire a lock
-    $lock = $conf['lockdir'].'/_indexer.lock';
-    while(!@mkdir($lock,$conf['dmode'])){
-        usleep(50);
-        if(time()-@filemtime($lock) > 60*5){
-            // looks like a stale lock - remove it
-            @rmdir($lock);
-            print "runIndexer(): stale lock removed".NL;
-        }else{
-            print "runIndexer(): indexer locked".NL;
-            return false;
-        }
-    }
-    if($conf['dperm']) chmod($lock, $conf['dperm']);
-
     // do the work
-    idx_addPage($ID);
-
-    // we're finished - save and free lock
-    io_saveFile(metaFN($ID,'.indexed'), idx_get_version());
-    @rmdir($lock);
-    print "runIndexer(): finished".NL;
-    return true;
+    return idx_addPage($ID, true);
 }
 
 /**
diff --git a/lib/exe/xmlrpc.php b/lib/exe/xmlrpc.php
index 410d4f6ba..84068f96e 100644
--- a/lib/exe/xmlrpc.php
+++ b/lib/exe/xmlrpc.php
@@ -355,9 +355,8 @@ class dokuwiki_xmlrpc_server extends IXR_IntrospectionServer {
      */
     function listPages(){
         $list  = array();
-        $pages = array_filter(array_filter(idx_getIndex('page', ''),
-                                           'isVisiblePage'),
-                              'page_exists');
+        $pages = idx_get_indexer()->getPages();
+        $pages = array_filter(array_filter($pages,'isVisiblePage'),'page_exists');
 
         foreach(array_keys($pages) as $idx) {
             $perm = auth_quickaclcheck($pages[$idx]);
@@ -552,27 +551,7 @@ class dokuwiki_xmlrpc_server extends IXR_IntrospectionServer {
         unlock($id);
 
         // run the indexer if page wasn't indexed yet
-        if(!@file_exists(metaFN($id, '.indexed'))) {
-            // try to aquire a lock
-            $lock = $conf['lockdir'].'/_indexer.lock';
-            while(!@mkdir($lock,$conf['dmode'])){
-                usleep(50);
-                if(time()-@filemtime($lock) > 60*5){
-                    // looks like a stale lock - remove it
-                    @rmdir($lock);
-                }else{
-                    return false;
-                }
-            }
-            if($conf['dperm']) chmod($lock, $conf['dperm']);
-
-            // do the work
-            idx_addPage($id);
-
-            // we're finished - save and free lock
-            io_saveFile(metaFN($id,'.indexed'), idx_get_version());
-            @rmdir($lock);
-        }
+        idx_addPage($id);
 
         return 0;
     }