summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2005-08-14 20:10:35 +0200
committerAndreas Gohr <andi@splitbrain.org>2005-08-14 20:10:35 +0200
commit7367b36877bca568d785e01be802652b6a719884 (patch)
treea1e27b5806ba835d43f69373cf83247953cd1ee1 /inc/indexer.php
parent48665d389b9bb386283c08172b24f3af26628bce (diff)
downloadrpg-7367b36877bca568d785e01be802652b6a719884.tar.gz
rpg-7367b36877bca568d785e01be802652b6a719884.tar.bz2
added stopword support to the indexer, added indexer webbug
darcs-hash:20050814181035-7ad00-ed5d879d29fcee7f925f806456675605b058966a.gz
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php14
1 files changed, 12 insertions, 2 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 173b7aa3c..45eca2d8b 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -24,6 +24,12 @@
function idx_getPageWords($page){
global $conf;
$word_idx = file($conf['cachedir'].'/word.idx');
+ $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
+ if(@file_exists($swfile)){
+ $stopwords = file($swfile);
+ }else{
+ $stopwords = array();
+ }
// split page into words
$body = rawWiki($page);
@@ -60,13 +66,17 @@ function idx_getPageWords($page){
// checking minimum word-size (excepting numbers)
if(!is_numeric($word)) {
- if(strlen($word) < 3) { #FIXME add config option for max wordsize
+ if(strlen($word) < 3) {
$doit = false;
continue;
}
}
- //FIXME add stopword check
+ // stopword check
+ if(is_int(array_search("$word\n",$stopwords))){
+ $doit = false;
+ continue;
+ }
// get word ID
$wid = array_search("$word\n",$word_idx);