From ee0891d8ffd7e4a59c958b9546a3b8382e4e5991 Mon Sep 17 00:00:00 2001 From: Tom N Harris Date: Sun, 14 Nov 2010 14:18:51 -0500 Subject: Do not assume that index files will be backward compatible --- lib/exe/indexer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php index 3fa81715b..4a6f74ba4 100644 --- a/lib/exe/indexer.php +++ b/lib/exe/indexer.php @@ -140,7 +140,7 @@ function runIndexer(){ // check if indexing needed $idxtag = metaFN($ID,'.indexed'); if(@file_exists($idxtag)){ - if(io_readFile($idxtag) >= INDEXER_VERSION){ + if(trim(io_readFile($idxtag)) == INDEXER_VERSION){ $last = @filemtime($idxtag); if($last > @filemtime(wikiFN($ID))){ print "runIndexer(): index for $ID up to date".NL; -- cgit v1.2.3 From 1c07b9e622d139fa815c955c89569f96342475fb Mon Sep 17 00:00:00 2001 From: Tom N Harris Date: Tue, 16 Nov 2010 18:09:53 -0500 Subject: Use external program to split pages into words An external tokenizer inserts extra spaces to mark words in the input text. The text is sent through STDIN and STDOUT file handles. A good choice for Chinese and Japanese is MeCab. http://sourceforge.net/projects/mecab/ With the command line 'mecab -O wakati' --- lib/plugins/config/lang/en/lang.php | 2 ++ lib/plugins/config/settings/config.metadata.php | 2 ++ 2 files changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php index a944d6bd7..85214bf98 100644 --- a/lib/plugins/config/lang/en/lang.php +++ b/lib/plugins/config/lang/en/lang.php @@ -141,6 +141,8 @@ $lang['renderer_xhtml'] = 'Renderer to use for main (xhtml) wiki output'; $lang['renderer__core'] = '%s (dokuwiki core)'; $lang['renderer__plugin'] = '%s (plugin)'; $lang['rememberme'] = 'Allow permanent login cookies (remember me)'; +$lang['external_tokenizer'] = 'Use an external program to split pages into words for indexing'; +$lang['tokenizer_cmd'] = 'Command line to start the external tokenizer'; $lang['rss_type'] = 'XML feed type'; $lang['rss_linkto'] = 'XML feed links to'; diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php index edba65262..331da5ab8 100644 --- a/lib/plugins/config/settings/config.metadata.php +++ b/lib/plugins/config/settings/config.metadata.php @@ -190,6 +190,8 @@ $meta['broken_iua'] = array('onoff'); $meta['xsendfile'] = array('multichoice','_choices' => array(0,1,2,3)); $meta['renderer_xhtml'] = array('renderer','_format' => 'xhtml','_choices' => array('xhtml')); $meta['readdircache'] = array('numeric'); +$meta['external_tokenizer'] = array('onoff'); +$meta['tokenizer_cmd'] = array('string'); $meta['_network'] = array('fieldset'); $meta['proxy____host'] = array('string','_pattern' => '#^(|[a-z0-9\-\.+]+)$#i'); -- cgit v1.2.3 From 7c2ef4e8d524fb9262c5a08831220f9fb2dc11fe Mon Sep 17 00:00:00 2001 From: Tom N Harris Date: Wed, 17 Nov 2010 17:02:31 -0500 Subject: Use a different indexer version when external tokenizer is enabled --- lib/exe/indexer.php | 7 ++----- lib/exe/xmlrpc.php | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php index 4a6f74ba4..55d860296 100644 --- a/lib/exe/indexer.php +++ b/lib/exe/indexer.php @@ -11,9 +11,6 @@ require_once(DOKU_INC.'inc/init.php'); session_write_close(); //close session if(!defined('NL')) define('NL',"\n"); -// Version tag used to force rebuild on upgrade -define('INDEXER_VERSION', 2); - // keep running after browser closes connection @ignore_user_abort(true); @@ -140,7 +137,7 @@ function runIndexer(){ // check if indexing needed $idxtag = metaFN($ID,'.indexed'); if(@file_exists($idxtag)){ - if(trim(io_readFile($idxtag)) == INDEXER_VERSION){ + if(trim(io_readFile($idxtag)) == idx_get_version()){ $last = @filemtime($idxtag); if($last > @filemtime(wikiFN($ID))){ print "runIndexer(): index for $ID up to date".NL; @@ -168,7 +165,7 @@ function runIndexer(){ idx_addPage($ID); // we're finished - save and free lock - io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION); + io_saveFile(metaFN($ID,'.indexed'), idx_get_version()); @rmdir($lock); print "runIndexer(): finished".NL; return true; diff --git a/lib/exe/xmlrpc.php b/lib/exe/xmlrpc.php index f06792361..410d4f6ba 100644 --- a/lib/exe/xmlrpc.php +++ b/lib/exe/xmlrpc.php @@ -1,7 +1,7 @@