summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2005-08-07 22:33:22 +0200
committerAndreas Gohr <andi@splitbrain.org>2005-08-07 22:33:22 +0200
commitb4ce25e9a449e7a6a78476bf94bca31cbc4259ce (patch)
tree4c76c49de6039340312c7e7e87ac2b1e8e256726 /inc/indexer.php
parent1caeb00a5a0b9894a582514ef385b71cab195092 (diff)
downloadrpg-b4ce25e9a449e7a6a78476bf94bca31cbc4259ce.tar.gz
rpg-b4ce25e9a449e7a6a78476bf94bca31cbc4259ce.tar.bz2
a first step for search indexing - nothing to see yet
darcs-hash:20050807203322-7ad00-6db6733f8fcd861366856635ba3d205fd3bb54da.gz
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php70
1 files changed, 70 insertions, 0 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
new file mode 100644
index 000000000..3fb710338
--- /dev/null
+++ b/inc/indexer.php
@@ -0,0 +1,70 @@
+<?php
+/**
+ * Common DokuWiki functions
+ *
+ * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
+ * @author Andreas Gohr <andi@splitbrain.org>
+ */
+
+ if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
+ require_once(DOKU_CONF.'dokuwiki.php');
+ require_once(DOKU_INC.'inc/io.php');
+ require_once(DOKU_INC.'inc/utf8.php');
+ require_once(DOKU_INC.'inc/parserutils.php');
+
+/**
+ * based upon class.search_indexer_phpcms.php::index_entry
+ */
+function idx_getPageWords($id){
+ $body = rawWiki($id);
+ $body = utf8_stripspecials($body,' ','._\-:');
+ $body = utf8_strtolower($body);
+ $body = trim($body);
+ $words = explode(' ',$body);
+ sort($words);
+
+ $index = array(); //resulting index
+ $old = '';
+ $doit = true;
+ $pos = 0;
+
+ //compact wordlist FIXME check for stopwords
+
+ foreach($words as $word){
+ if(strlen($word) == 0) continue;
+
+ // it's the same word
+ if($word == $old){
+ if($doit == false) {
+ // we didn't wanted it last time
+ continue;
+ }
+ // just increase the counter
+ $index[$word]++;
+ continue;
+ }
+
+ // rememember old word
+ $old = $word;
+ $doit = true;
+
+ // checking minimum word-size (excepting numbers)
+ if(!is_numeric($word)) {
+ if(strlen($word) < 3) { #FIXME add config option for max wordsize
+ $doit = false;
+ continue;
+ }
+ }
+
+ //FIXME add stopword check
+
+ // add to index
+ $index[$word] = 1;
+ }
+
+ return $index;
+}
+
+
+
+//Setup VIM: ex: et ts=4 enc=utf-8 :