summaryrefslogtreecommitdiff
path: root/inc
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2005-09-04 00:02:29 +0200
committerAndreas Gohr <andi@splitbrain.org>2005-09-04 00:02:29 +0200
commit506fa8936561993b7f70aa507d0c39a44a6ebab9 (patch)
treed5df1af7e8c1de99397e68e2e7ba39e63ab25c0c /inc
parentb5a8175656b99ced69244b3ce593c00fb61ab825 (diff)
downloadrpg-506fa8936561993b7f70aa507d0c39a44a6ebab9.tar.gz
rpg-506fa8936561993b7f70aa507d0c39a44a6ebab9.tar.bz2
the search now uses the index
darcs-hash:20050903220229-7ad00-5d95f905eaeb3f6b867aa3ee43c2a8bccc533c00.gz
Diffstat (limited to 'inc')
-rw-r--r--inc/fulltext.php74
-rw-r--r--inc/html.php24
-rw-r--r--inc/search.php3
3 files changed, 82 insertions, 19 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 8549a67c1..6c4e148a2 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -14,15 +14,16 @@
* The fulltext search
*
* Returns a list of matching documents for the given query
+ *
*/
-function ft_pageSearch($query){
+function ft_pageSearch($query,&$poswords){
$q = ft_queryParser($query);
+ // use this for higlighting later:
+ $poswords = join(' ',$q['and']);
+
// lookup all words found in the query
$words = array_merge($q['and'],$q['not']);
- foreach($q['phrases'] as $phrase){
- $words = array_merge($words,$phrase['words']);
- }
if(!count($words)) return array();
$result = idx_lookup($words);
@@ -36,8 +37,7 @@ function ft_pageSearch($query){
$not = array_merge($not,array_keys($result[$w]));
}
-
- // combine and words
+ // combine and-words
if(count($q['and']) > 1){
$docs = ft_resultCombine($q['and']);
}else{
@@ -52,7 +52,6 @@ function ft_pageSearch($query){
if(!count($docs)) return array();
-
// handle phrases
if(count($q['phrases'])){
//build a regexp
@@ -63,7 +62,7 @@ function ft_pageSearch($query){
// check the source of all documents for the exact phrases
foreach(array_keys($docs) as $id){
$text = utf8_strtolower(rawWiki($id));
- if(!preg_match_all('/'.$regex.'/usi',$text)){
+ if(!preg_match('/'.$regex.'/usi',$text)){
unset($docs[$id]); // no hit - remove
}
}
@@ -78,6 +77,63 @@ function ft_pageSearch($query){
}
/**
+ * Quicksearch for pagenames
+ *
+ * By default it only matches the pagename and ignores the
+ * namespace. This can be changed with the second parameter
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ */
+function ft_pageLookup($id,$pageonly=true){
+ global $conf;
+ $id = preg_quote($id,'/');
+ $pages = file($conf['cachedir'].'/page.idx');
+ $pages = array_values(preg_grep('/'.$id.'/',$pages));
+
+ $cnt = count($pages);
+ for($i=0; $i<$cnt; $i++){
+ if($pageonly){
+ if(!preg_match('/'.$id.'/',noNS($pages[$i]))){
+ unset($pages[$i]);
+ continue;
+ }
+ }
+ if(!@file_exists(wikiFN($pages[$i]))){
+ unset($pages[$i]);
+ continue;
+ }
+ }
+ sort($pages);
+ return $pages;
+}
+
+/**
+ * Creates a snippet extract
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ */
+function ft_snippet($id,$poswords){
+ $poswords = preg_quote($poswords,'#');
+ $re = '('.str_replace(' ','|',$poswords).')';
+ $text = rawWiki($id);
+ //FIXME caseinsensitive matching doesn't work with UTF-8!?
+ preg_match_all('#(.{0,50})'.$re.'(.{0,50})#iu',$text,$matches,PREG_SET_ORDER);
+
+ $cnt = 0;
+ $snippet = '';
+ foreach($matches as $match){
+ $snippet .= '...'.htmlspecialchars($match[1]);
+ $snippet .= '<span class="search_hit">';
+ $snippet .= htmlspecialchars($match[2]);
+ $snippet .= '</span>';
+ $snippet .= htmlspecialchars($match[3]).'... ';
+ if($cnt++ == 2) break;
+ }
+
+ return $snippet;
+}
+
+/**
* Combine found documents and sum up their scores
*
* This function is used to combine searched words with a logical
@@ -144,4 +200,4 @@ function ft_queryParser($query){
return $q;
}
-
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/html.php b/inc/html.php
index dcd11feb1..b73eebf8c 100644
--- a/inc/html.php
+++ b/inc/html.php
@@ -295,6 +295,7 @@ function html_hilight($html,$query){
*/
function html_search(){
require_once(DOKU_INC.'inc/search.php');
+ require_once(DOKU_INC.'inc/fulltext.php');
global $conf;
global $QUERY;
global $ID;
@@ -312,14 +313,14 @@ function html_search(){
//do quick pagesearch
$data = array();
- search($data,$conf['datadir'],'search_pagename',array(query => cleanID($QUERY)));
+ $data = ft_pageLookup(cleanID($QUERY));
if(count($data)){
sort($data);
print '<div class="search_quickresult">';
print '<b>'.$lang[quickhits].':</b><br />';
- foreach($data as $row){
+ foreach($data as $id){
print '<div class="search_quickhits">';
- print html_wikilink(':'.$row['id'],$conf['useheading']?NULL:$row['id']);
+ print html_wikilink(':'.$id,$conf['useheading']?NULL:$id);
print '</div> ';
}
//clear float (see http://www.complexspiral.com/publications/containing-floats/)
@@ -329,16 +330,19 @@ function html_search(){
flush();
//do fulltext search
- $data = array();
- search($data,$conf['datadir'],'search_fulltext',array(query => utf8_strtolower($QUERY)));
+ $data = ft_pageSearch($QUERY,$poswords);
if(count($data)){
- usort($data,'sort_search_fulltext');
- foreach($data as $row){
+ $num = 1;
+ foreach($data as $id => $cnt){
print '<div class="search_result">';
- print html_wikilink(':'.$row['id'],$conf['useheading']?NULL:$row['id'],$row['poswords']);
- print ': <span class="search_cnt">'.$row['count'].' '.$lang['hits'].'</span><br />';
- print '<div class="search_snippet">'.$row['snippet'].'</div>';
+ print html_wikilink(':'.$id,$conf['useheading']?NULL:$id,$poswords);
+ print ': <span class="search_cnt">'.$cnt.' '.$lang['hits'].'</span><br />';
+ if($num < 15){ // create snippets for the first number of matches only #FIXME add to conf ?
+ print '<div class="search_snippet">'.ft_snippet($id,$poswords).'</div>';
+ }
print '</div>';
+ flush();
+ $num++;
}
}else{
print '<div class="nothing">'.$lang['nothingfound'].'</div>';
diff --git a/inc/search.php b/inc/search.php
index 3604db15e..ea20c4f3b 100644
--- a/inc/search.php
+++ b/inc/search.php
@@ -283,6 +283,7 @@ function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){
* $opts['query'] is the search query
*
* @author Andreas Gohr <andi@splitbrain.org>
+ * @deprecated - fulltext indexer is used instead
*/
function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
//we do nothing with directories
@@ -383,6 +384,8 @@ function search_reference(&$data,$base,$file,$type,$lvl,$opts){
*
* @author Andreas Gohr <andi@splitbrain.org>
* @author Matthias Grimm <matthiasgrimm@users.sourceforge.net>
+ *
+ * @deprecated - fulltext indexer is used instead
*/
function search_regex(&$data,$base,$file,$reg,$words){