summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/indexer.php39
-rw-r--r--inc/Sitemapper.php2
-rw-r--r--inc/fulltext.php73
-rw-r--r--inc/indexer.php54
-rw-r--r--inc/init.php2
-rw-r--r--lib/exe/indexer.php35
-rw-r--r--lib/exe/xmlrpc.php27
7 files changed, 111 insertions, 121 deletions
diff --git a/bin/indexer.php b/bin/indexer.php
index 497c6146a..0d523df6e 100755
--- a/bin/indexer.php
+++ b/bin/indexer.php
@@ -24,6 +24,7 @@ if ( $OPTS->isError() ) {
}
$CLEAR = false;
$QUIET = false;
+$INDEXER = null;
foreach ($OPTS->options as $key => $val) {
switch ($key) {
case 'h':
@@ -66,6 +67,9 @@ function _usage() {
function _update(){
global $conf;
+ global $INDEXER;
+
+ $INDEXER = idx_get_indexer();
$data = array();
_quietecho("Searching pages... ");
@@ -78,25 +82,47 @@ function _update(){
}
function _index($id){
+ global $INDEXER;
global $CLEAR;
+ global $QUIET;
// if not cleared only update changed and new files
if(!$CLEAR){
$idxtag = metaFN($id,'.indexed');
if(@file_exists($idxtag)){
if(io_readFile($idxtag) == idx_get_version()){
- $last = @filemtime(metaFN($id,'.indexed'));
+ $last = @filemtime($idxtag);
if($last > @filemtime(wikiFN($id))) return;
}
}
}
- _lock();
_quietecho("$id... ");
- idx_addPage($id);
- io_saveFile(metaFN($id,'.indexed'), idx_get_version());
+ $body = '';
+ $data = array($id, $body);
+ $evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
+ if ($evt->advise_before()) $data[1] = $data[1] . " " . rawWiki($id);
+ $evt->advise_after();
+ unset($evt);
+ list($id,$body) = $data;
+ $said = false;
+ while(true) {
+ $result = $INDEXER->addPageWords($id, $body);
+ if ($result == "locked") {
+ if($said){
+ _quietecho(".");
+ }else{
+ _quietecho("Waiting for lockfile (max. 5 min)");
+ $said = true;
+ }
+ sleep(15);
+ } else {
+ break;
+ }
+ }
+ if ($result)
+ io_saveFile(metaFN($id,'.indexed'), idx_get_version());
_quietecho("done.\n");
- _unlock();
}
/**
@@ -141,7 +167,7 @@ function _clearindex(){
_lock();
_quietecho("Clearing index... ");
io_saveFile($conf['indexdir'].'/page.idx','');
- io_saveFile($conf['indexdir'].'/title.idx','');
+ //io_saveFile($conf['indexdir'].'/title.idx','');
$dir = @opendir($conf['indexdir']);
if($dir!==false){
while(($f = readdir($dir)) !== false){
@@ -150,6 +176,7 @@ function _clearindex(){
@unlink($conf['indexdir']."/$f");
}
}
+ @unlink($conf['indexdir'].'/lengths.idx');
_quietecho("done.\n");
_unlock();
}
diff --git a/inc/Sitemapper.php b/inc/Sitemapper.php
index 47a3fedb5..bbe1caf26 100644
--- a/inc/Sitemapper.php
+++ b/inc/Sitemapper.php
@@ -45,7 +45,7 @@ class Sitemapper {
dbglog("Sitemapper::generate(): using $sitemap"); // FIXME: Only in debug mode
- $pages = idx_getIndex('page', '');
+ $pages = idx_get_indexer()->getPages();
dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages');
$items = array();
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 7ace3a724..0411b9f99 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -36,19 +36,21 @@ function ft_pageSearch($query,&$highlight){
* @author Kazutaka Miyasaka <kazmiya@gmail.com>
*/
function _ft_pageSearch(&$data) {
+ $Indexer = idx_get_indexer();
+
// parse the given query
- $q = ft_queryParser($data['query']);
+ $q = ft_queryParser($Indexer, $data['query']);
$data['highlight'] = $q['highlight'];
if (empty($q['parsed_ary'])) return array();
// lookup all words found in the query
- $lookup = idx_lookup($q['words']);
+ $lookup = $Indexer->lookup($q['words']);
// get all pages in this dokuwiki site (!: includes nonexistent pages)
$pages_all = array();
- foreach (idx_getIndex('page', '') as $id) {
- $pages_all[trim($id)] = 0; // base: 0 hit
+ foreach ($Indexer->getPages() as $id) {
+ $pages_all[$id] = 0; // base: 0 hit
}
// process the query
@@ -126,15 +128,12 @@ function _ft_pageSearch(&$data) {
* evaluates the instructions of the found pages
*/
function ft_backlinks($id){
- global $conf;
- $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
- $stopwords = @file_exists($swfile) ? file($swfile) : array();
-
$result = array();
// quick lookup of the pagename
+ // FIXME use metadata key lookup
$page = noNS($id);
- $matches = idx_lookup(idx_tokenizer($page,$stopwords)); // pagename may contain specials (_ or .)
+ $matches = idx_lookup(idx_tokenizer($page)); // pagename may contain specials (_ or .)
$docs = array_keys(ft_resultCombine(array_values($matches)));
$docs = array_filter($docs,'isVisiblePage'); // discard hidden pages
if(!count($docs)) return $result;
@@ -168,17 +167,14 @@ function ft_backlinks($id){
* Aborts after $max found results
*/
function ft_mediause($id,$max){
- global $conf;
- $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
- $stopwords = @file_exists($swfile) ? file($swfile) : array();
-
if(!$max) $max = 1; // need to find at least one
$result = array();
// quick lookup of the mediafile
+ // FIXME use metadata key lookup
$media = noNS($id);
- $matches = idx_lookup(idx_tokenizer($media,$stopwords));
+ $matches = idx_lookup(idx_tokenizer($media));
$docs = array_keys(ft_resultCombine(array_values($matches)));
if(!count($docs)) return $result;
@@ -229,7 +225,6 @@ function ft_pageLookup($id, $in_ns=false, $in_title=false){
}
function _ft_pageLookup(&$data){
- global $conf;
// split out original parameters
$id = $data['id'];
if (preg_match('/(?:^| )@(\w+)/', $id, $matches)) {
@@ -239,29 +234,27 @@ function _ft_pageLookup(&$data){
$in_ns = $data['in_ns'];
$in_title = $data['in_title'];
+ $cleaned = cleanID($id);
- $pages = array_map('rtrim', idx_getIndex('page', ''));
- $titles = array_map('rtrim', idx_getIndex('title', ''));
- // check for corrupt title index #FS2076
- if(count($pages) != count($titles)){
- $titles = array_fill(0,count($pages),'');
- @unlink($conf['indexdir'].'/title.idx'); // will be rebuilt in inc/init.php
- }
- $pages = array_combine($pages, $titles);
+ $Indexer = idx_get_indexer();
+ $page_idx = $Indexer->getPages();
- $cleaned = cleanID($id);
+ $pages = array();
if ($id !== '' && $cleaned !== '') {
- foreach ($pages as $p_id => $p_title) {
- if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false) &&
- (!$in_title || (stripos($p_title, $id) === false)) ) {
- unset($pages[$p_id]);
+ foreach ($page_idx as $p_id) {
+ if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) {
+ if (!isset($pages[$p_id]))
+ $pages[$p_id] = p_get_first_heading($p_id, false);
}
}
+ //if ($in_title)
+ // $titles = $Indexer->lookupKey('title', "*$id*");
}
if (isset($ns)) {
- foreach (array_keys($pages) as $p_id) {
- if (strpos($p_id, $ns) !== 0) {
- unset($pages[$p_id]);
+ foreach ($page_idx as $p_id) {
+ if (strpos($p_id, $ns) === 0) {
+ if (!isset($pages[$p_id]))
+ $pages[$p_id] = p_get_first_heading($p_id, false);
}
}
}
@@ -494,11 +487,7 @@ function ft_resultComplement($args) {
* @author Andreas Gohr <andi@splitbrain.org>
* @author Kazutaka Miyasaka <kazmiya@gmail.com>
*/
-function ft_queryParser($query){
- global $conf;
- $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
- $stopwords = @file_exists($swfile) ? file($swfile) : array();
-
+function ft_queryParser($Indexer, $query){
/**
* parse a search query and transform it into intermediate representation
*
@@ -544,7 +533,7 @@ function ft_queryParser($query){
if (preg_match('/^(-?)"(.+)"$/u', $term, $matches)) {
// phrase-include and phrase-exclude
$not = $matches[1] ? 'NOT' : '';
- $parsed = $not.ft_termParser($matches[2], $stopwords, false, true);
+ $parsed = $not.ft_termParser($Indexer, $matches[2], false, true);
} else {
// fix incomplete phrase
$term = str_replace('"', ' ', $term);
@@ -591,10 +580,10 @@ function ft_queryParser($query){
$parsed .= '(N+:'.$matches[1].')';
} elseif (preg_match('/^-(.+)$/', $token, $matches)) {
// word-exclude
- $parsed .= 'NOT('.ft_termParser($matches[1], $stopwords).')';
+ $parsed .= 'NOT('.ft_termParser($Indexer, $matches[1]).')';
} else {
// word-include
- $parsed .= ft_termParser($token, $stopwords);
+ $parsed .= ft_termParser($Indexer, $token);
}
}
}
@@ -728,18 +717,18 @@ function ft_queryParser($query){
*
* @author Kazutaka Miyasaka <kazmiya@gmail.com>
*/
-function ft_termParser($term, &$stopwords, $consider_asian = true, $phrase_mode = false) {
+function ft_termParser($Indexer, $term, $consider_asian = true, $phrase_mode = false) {
$parsed = '';
if ($consider_asian) {
// successive asian characters need to be searched as a phrase
$words = preg_split('/('.IDX_ASIAN.'+)/u', $term, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
foreach ($words as $word) {
if (preg_match('/'.IDX_ASIAN.'/u', $word)) $phrase_mode = true;
- $parsed .= ft_termParser($word, $stopwords, false, $phrase_mode);
+ $parsed .= ft_termParser($Indexer, $word, false, $phrase_mode);
}
} else {
$term_noparen = str_replace(array('(', ')'), ' ', $term);
- $words = idx_tokenizer($term_noparen, $stopwords, true);
+ $words = $Indexer->tokenizer($term_noparen, true);
// W_: no need to highlight
if (empty($words)) {
diff --git a/inc/indexer.php b/inc/indexer.php
index 099b7e9fc..a61f3772a 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -97,7 +97,8 @@ class Doku_Indexer {
* @author Andreas Gohr <andi@splitbrain.org>
*/
public function addPageWords($page, $text) {
- $this->_lock();
+ if (!$this->_lock())
+ return "locked";
// load known documents
$page_idx = $this->_addIndexKey('page', '', $page);
@@ -348,12 +349,12 @@ class Doku_Indexer {
* in the returned list is an array with the page names as keys and the
* number of times that token appeas on the page as value.
*
- * @param array $tokens list of words to search for
+ * @param arrayref $tokens list of words to search for
* @return array list of page names with usage counts
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Andreas Gohr <andi@splitbrain.org>
*/
- public function lookup($tokens) {
+ public function lookup(&$tokens) {
$result = array();
$wids = $this->_getIndexWords($tokens, $result);
if (empty($wids)) return array();
@@ -397,10 +398,11 @@ class Doku_Indexer {
* @param string $key name of the metadata key to look for
* @param string $value search term to look for
* @param callback $func comparison function
- * @return array list with page names
+ * @return array list with page names, keys are query values if more than one given
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
public function lookupKey($key, $value, $func=null) {
+ return array();
}
/**
@@ -411,12 +413,12 @@ class Doku_Indexer {
* The $result parameter can be used to merge the index locations with
* the appropriate query term.
*
- * @param array $words The query terms.
+ * @param arrayref $words The query terms.
* @param arrayref $result Set to word => array("length*id" ...)
* @return array Set to length => array(id ...)
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
- private function _getIndexWords($words, &$result) {
+ private function _getIndexWords(&$words, &$result) {
$tokens = array();
$tokenlength = array();
$tokenwild = array();
@@ -807,7 +809,7 @@ class Doku_Indexer {
* @return object a Doku_Indexer
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
-function & idx_get_indexer() {
+function idx_get_indexer() {
static $Indexer = null;
if (is_null($Indexer)) {
$Indexer = new Doku_Indexer();
@@ -841,10 +843,23 @@ function & idx_get_stopwords() {
* Locking is handled internally.
*
* @param string $page name of the page to index
+ * @param boolean $verbose print status messages
* @return boolean the function completed successfully
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
-function idx_addPage($page) {
+function idx_addPage($page, $verbose=false) {
+ // check if indexing needed
+ $idxtag = metaFN($page,'.indexed');
+ if(@file_exists($idxtag)){
+ if(trim(io_readFile($idxtag)) == idx_get_version()){
+ $last = @filemtime($idxtag);
+ if($last > @filemtime(wikiFN($ID))){
+ if ($verbose) print("Indexer: index for $page up to date".DOKU_LF);
+ return false;
+ }
+ }
+ }
+
$body = '';
$data = array($page, $body);
$evt = new Doku_Event('INDEXER_PAGE_ADD', $data);
@@ -853,8 +868,19 @@ function idx_addPage($page) {
unset($evt);
list($page,$body) = $data;
- $Indexer =& idx_get_indexer();
- return $Indexer->addPageWords($page, $body);
+ $Indexer = idx_get_indexer();
+ $result = $Indexer->addPageWords($page, $body);
+ if ($result == "locked") {
+ if ($verbose) print("Indexer: locked".DOKU_LF);
+ return false;
+ }
+ if ($result)
+ io_saveFile(metaFN($page,'.indexed'), idx_get_version());
+ if ($verbose) {
+ print("Indexer: finished".DOKU_LF);
+ return true;
+ }
+ return $result;
}
/**
@@ -866,11 +892,11 @@ function idx_addPage($page) {
* Important: No ACL checking is done here! All results are
* returned, regardless of permissions
*
- * @param array $words list of words to search for
+ * @param arrayref $words list of words to search for
* @return array list of pages found, associated with the search terms
*/
-function idx_lookup($words) {
- $Indexer =& idx_get_indexer();
+function idx_lookup(&$words) {
+ $Indexer = idx_get_indexer();
return $Indexer->lookup($words);
}
@@ -879,7 +905,7 @@ function idx_lookup($words) {
*
*/
function idx_tokenizer($string, $wc=false) {
- $Indexer =& idx_get_indexer();
+ $Indexer = idx_get_indexer();
return $Indexer->tokenizer($string, $wc);
}
diff --git a/inc/init.php b/inc/init.php
index ed4409729..1dc31a31f 100644
--- a/inc/init.php
+++ b/inc/init.php
@@ -276,6 +276,7 @@ function init_files(){
}
# create title index (needs to have same length as page.idx)
+ /*
$file = $conf['indexdir'].'/title.idx';
if(!@file_exists($file)){
$pages = file($conf['indexdir'].'/page.idx');
@@ -290,6 +291,7 @@ function init_files(){
nice_die("$file is not writable. Check your permissions settings!");
}
}
+ */
}
/**
diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php
index 55d860296..a5a7d6b2a 100644
--- a/lib/exe/indexer.php
+++ b/lib/exe/indexer.php
@@ -134,41 +134,8 @@ function runIndexer(){
if(!$ID) return false;
- // check if indexing needed
- $idxtag = metaFN($ID,'.indexed');
- if(@file_exists($idxtag)){
- if(trim(io_readFile($idxtag)) == idx_get_version()){
- $last = @filemtime($idxtag);
- if($last > @filemtime(wikiFN($ID))){
- print "runIndexer(): index for $ID up to date".NL;
- return false;
- }
- }
- }
-
- // try to aquire a lock
- $lock = $conf['lockdir'].'/_indexer.lock';
- while(!@mkdir($lock,$conf['dmode'])){
- usleep(50);
- if(time()-@filemtime($lock) > 60*5){
- // looks like a stale lock - remove it
- @rmdir($lock);
- print "runIndexer(): stale lock removed".NL;
- }else{
- print "runIndexer(): indexer locked".NL;
- return false;
- }
- }
- if($conf['dperm']) chmod($lock, $conf['dperm']);
-
// do the work
- idx_addPage($ID);
-
- // we're finished - save and free lock
- io_saveFile(metaFN($ID,'.indexed'), idx_get_version());
- @rmdir($lock);
- print "runIndexer(): finished".NL;
- return true;
+ return idx_addPage($ID, true);
}
/**
diff --git a/lib/exe/xmlrpc.php b/lib/exe/xmlrpc.php
index 410d4f6ba..84068f96e 100644
--- a/lib/exe/xmlrpc.php
+++ b/lib/exe/xmlrpc.php
@@ -355,9 +355,8 @@ class dokuwiki_xmlrpc_server extends IXR_IntrospectionServer {
*/
function listPages(){
$list = array();
- $pages = array_filter(array_filter(idx_getIndex('page', ''),
- 'isVisiblePage'),
- 'page_exists');
+ $pages = idx_get_indexer()->getPages();
+ $pages = array_filter(array_filter($pages,'isVisiblePage'),'page_exists');
foreach(array_keys($pages) as $idx) {
$perm = auth_quickaclcheck($pages[$idx]);
@@ -552,27 +551,7 @@ class dokuwiki_xmlrpc_server extends IXR_IntrospectionServer {
unlock($id);
// run the indexer if page wasn't indexed yet
- if(!@file_exists(metaFN($id, '.indexed'))) {
- // try to aquire a lock
- $lock = $conf['lockdir'].'/_indexer.lock';
- while(!@mkdir($lock,$conf['dmode'])){
- usleep(50);
- if(time()-@filemtime($lock) > 60*5){
- // looks like a stale lock - remove it
- @rmdir($lock);
- }else{
- return false;
- }
- }
- if($conf['dperm']) chmod($lock, $conf['dperm']);
-
- // do the work
- idx_addPage($id);
-
- // we're finished - save and free lock
- io_saveFile(metaFN($id,'.indexed'), idx_get_version());
- @rmdir($lock);
- }
+ idx_addPage($id);
return 0;
}