blob: 3fb710338326a06b197ed25fb7954b167cfbdb6a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
<?php
/**
* Common DokuWiki functions
*
* @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
* @author Andreas Gohr <andi@splitbrain.org>
*/
if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
require_once(DOKU_CONF.'dokuwiki.php');
require_once(DOKU_INC.'inc/io.php');
require_once(DOKU_INC.'inc/utf8.php');
require_once(DOKU_INC.'inc/parserutils.php');
/**
* based upon class.search_indexer_phpcms.php::index_entry
*/
function idx_getPageWords($id){
$body = rawWiki($id);
$body = utf8_stripspecials($body,' ','._\-:');
$body = utf8_strtolower($body);
$body = trim($body);
$words = explode(' ',$body);
sort($words);
$index = array(); //resulting index
$old = '';
$doit = true;
$pos = 0;
//compact wordlist FIXME check for stopwords
foreach($words as $word){
if(strlen($word) == 0) continue;
// it's the same word
if($word == $old){
if($doit == false) {
// we didn't wanted it last time
continue;
}
// just increase the counter
$index[$word]++;
continue;
}
// rememember old word
$old = $word;
$doit = true;
// checking minimum word-size (excepting numbers)
if(!is_numeric($word)) {
if(strlen($word) < 3) { #FIXME add config option for max wordsize
$doit = false;
continue;
}
}
//FIXME add stopword check
// add to index
$index[$word] = 1;
}
return $index;
}
//Setup VIM: ex: et ts=4 enc=utf-8 :
|