summaryrefslogtreecommitdiff
path: root/inc
diff options
context:
space:
mode:
Diffstat (limited to 'inc')
-rw-r--r--inc/fulltext.php8
-rw-r--r--inc/indexer.php15
2 files changed, 20 insertions, 3 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 0d3acb609..448f72248 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -105,12 +105,14 @@ function ft_pageSearch($query,&$poswords){
*/
function ft_backlinks($id){
global $conf;
+ $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
+ $stopwords = @file_exists($swfile) ? file($swfile) : array();
+
$result = array();
// quick lookup of the pagename
$page = noNS($id);
- $sw = array(); // we don't use stopwords here
- $matches = idx_lookup(idx_tokenizer($page,$sw)); // pagename may contain specials (_ or .)
+ $matches = idx_lookup(idx_tokenizer($page,$stopwords)); // pagename may contain specials (_ or .)
$docs = array_keys(ft_resultCombine(array_values($matches)));
$docs = array_filter($docs,'isVisiblePage'); // discard hidden pages
if(!count($docs)) return $result;
@@ -119,7 +121,7 @@ function ft_backlinks($id){
// check metadata for matching links
foreach($docs as $match){
// metadata relation reference links are already resolved
- $links = p_get_metadata($match,"relation references");
+ $links = p_get_metadata($match,'relation references');
if (isset($links[$id])) $result[] = $match;
}
diff --git a/inc/indexer.php b/inc/indexer.php
index a2b7a0637..e6550c2e4 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -48,6 +48,21 @@ function idx_getPageWords($page){
$tokens = explode(' ', $body);
$tokens = array_count_values($tokens); // count the frequency of each token
+// ensure the deaccented or romanised page names of internal links are added to the token array
+// (this is necessary for the backlink function -- there maybe a better way!)
+ if ($conf['deaccent']) {
+ $links = p_get_metadata($page,'relation references');
+
+ $tmp = join(' ',array_keys($links)); // make a single string
+ $tmp = strtr($tmp, ':', ' '); // replace namespace separator with a space
+ $link_tokens = array_unique(explode(' ', $tmp)); // break into tokens
+
+ foreach ($link_tokens as $link_token) {
+ if (isset($tokens[$link_token])) continue;
+ $tokens[$link_token] = 1;
+ }
+ }
+
$words = array();
foreach ($tokens as $word => $count) {
// simple filter to restrict use of utf8_stripspecials