diff options
author | Andreas Gohr <andi@splitbrain.org> | 2005-08-07 22:33:22 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2005-08-07 22:33:22 +0200 |
commit | b4ce25e9a449e7a6a78476bf94bca31cbc4259ce (patch) | |
tree | 4c76c49de6039340312c7e7e87ac2b1e8e256726 /inc/indexer.php | |
parent | 1caeb00a5a0b9894a582514ef385b71cab195092 (diff) | |
download | rpg-b4ce25e9a449e7a6a78476bf94bca31cbc4259ce.tar.gz rpg-b4ce25e9a449e7a6a78476bf94bca31cbc4259ce.tar.bz2 |
a first step for search indexing - nothing to see yet
darcs-hash:20050807203322-7ad00-6db6733f8fcd861366856635ba3d205fd3bb54da.gz
Diffstat (limited to 'inc/indexer.php')
-rw-r--r-- | inc/indexer.php | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/inc/indexer.php b/inc/indexer.php new file mode 100644 index 000000000..3fb710338 --- /dev/null +++ b/inc/indexer.php @@ -0,0 +1,70 @@ +<?php +/** + * Common DokuWiki functions + * + * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) + * @author Andreas Gohr <andi@splitbrain.org> + */ + + if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); + require_once(DOKU_CONF.'dokuwiki.php'); + require_once(DOKU_INC.'inc/io.php'); + require_once(DOKU_INC.'inc/utf8.php'); + require_once(DOKU_INC.'inc/parserutils.php'); + +/** + * based upon class.search_indexer_phpcms.php::index_entry + */ +function idx_getPageWords($id){ + $body = rawWiki($id); + $body = utf8_stripspecials($body,' ','._\-:'); + $body = utf8_strtolower($body); + $body = trim($body); + $words = explode(' ',$body); + sort($words); + + $index = array(); //resulting index + $old = ''; + $doit = true; + $pos = 0; + + //compact wordlist FIXME check for stopwords + + foreach($words as $word){ + if(strlen($word) == 0) continue; + + // it's the same word + if($word == $old){ + if($doit == false) { + // we didn't wanted it last time + continue; + } + // just increase the counter + $index[$word]++; + continue; + } + + // rememember old word + $old = $word; + $doit = true; + + // checking minimum word-size (excepting numbers) + if(!is_numeric($word)) { + if(strlen($word) < 3) { #FIXME add config option for max wordsize + $doit = false; + continue; + } + } + + //FIXME add stopword check + + // add to index + $index[$word] = 1; + } + + return $index; +} + + + +//Setup VIM: ex: et ts=4 enc=utf-8 : |