diff options
author | Tom N Harris <tnharris@whoopdedo.org> | 2010-11-16 18:09:53 -0500 |
---|---|---|
committer | Tom N Harris <tnharris@whoopdedo.org> | 2010-11-16 18:09:53 -0500 |
commit | 1c07b9e622d139fa815c955c89569f96342475fb (patch) | |
tree | 08b1d84b5d1fa7c3b1b22c89a9be6efd3e543704 /lib/plugins | |
parent | 6c528220aaf62f4ba5890483797d6661352500bb (diff) | |
download | rpg-1c07b9e622d139fa815c955c89569f96342475fb.tar.gz rpg-1c07b9e622d139fa815c955c89569f96342475fb.tar.bz2 |
Use external program to split pages into words
An external tokenizer inserts extra spaces to mark words in the input text.
The text is sent through STDIN and STDOUT file handles.
A good choice for Chinese and Japanese is MeCab.
http://sourceforge.net/projects/mecab/
With the command line 'mecab -O wakati'
Diffstat (limited to 'lib/plugins')
-rw-r--r-- | lib/plugins/config/lang/en/lang.php | 2 | ||||
-rw-r--r-- | lib/plugins/config/settings/config.metadata.php | 2 |
2 files changed, 4 insertions, 0 deletions
diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php index a944d6bd7..85214bf98 100644 --- a/lib/plugins/config/lang/en/lang.php +++ b/lib/plugins/config/lang/en/lang.php @@ -141,6 +141,8 @@ $lang['renderer_xhtml'] = 'Renderer to use for main (xhtml) wiki output'; $lang['renderer__core'] = '%s (dokuwiki core)'; $lang['renderer__plugin'] = '%s (plugin)'; $lang['rememberme'] = 'Allow permanent login cookies (remember me)'; +$lang['external_tokenizer'] = 'Use an external program to split pages into words for indexing'; +$lang['tokenizer_cmd'] = 'Command line to start the external tokenizer'; $lang['rss_type'] = 'XML feed type'; $lang['rss_linkto'] = 'XML feed links to'; diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php index edba65262..331da5ab8 100644 --- a/lib/plugins/config/settings/config.metadata.php +++ b/lib/plugins/config/settings/config.metadata.php @@ -190,6 +190,8 @@ $meta['broken_iua'] = array('onoff'); $meta['xsendfile'] = array('multichoice','_choices' => array(0,1,2,3)); $meta['renderer_xhtml'] = array('renderer','_format' => 'xhtml','_choices' => array('xhtml')); $meta['readdircache'] = array('numeric'); +$meta['external_tokenizer'] = array('onoff'); +$meta['tokenizer_cmd'] = array('string'); $meta['_network'] = array('fieldset'); $meta['proxy____host'] = array('string','_pattern' => '#^(|[a-z0-9\-\.+]+)$#i'); |