diff options
author | Andreas Gohr <andi@splitbrain.org> | 2005-08-14 20:10:35 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2005-08-14 20:10:35 +0200 |
commit | 7367b36877bca568d785e01be802652b6a719884 (patch) | |
tree | a1e27b5806ba835d43f69373cf83247953cd1ee1 /inc/indexer.php | |
parent | 48665d389b9bb386283c08172b24f3af26628bce (diff) | |
download | rpg-7367b36877bca568d785e01be802652b6a719884.tar.gz rpg-7367b36877bca568d785e01be802652b6a719884.tar.bz2 |
added stopword support to the indexer, added indexer webbug
darcs-hash:20050814181035-7ad00-ed5d879d29fcee7f925f806456675605b058966a.gz
Diffstat (limited to 'inc/indexer.php')
-rw-r--r-- | inc/indexer.php | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index 173b7aa3c..45eca2d8b 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -24,6 +24,12 @@ function idx_getPageWords($page){ global $conf; $word_idx = file($conf['cachedir'].'/word.idx'); + $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt'; + if(@file_exists($swfile)){ + $stopwords = file($swfile); + }else{ + $stopwords = array(); + } // split page into words $body = rawWiki($page); @@ -60,13 +66,17 @@ function idx_getPageWords($page){ // checking minimum word-size (excepting numbers) if(!is_numeric($word)) { - if(strlen($word) < 3) { #FIXME add config option for max wordsize + if(strlen($word) < 3) { $doit = false; continue; } } - //FIXME add stopword check + // stopword check + if(is_int(array_search("$word\n",$stopwords))){ + $doit = false; + continue; + } // get word ID $wid = array_search("$word\n",$word_idx); |