extended search function

The DokuWiki search function uses 'OR' to combine multiple search words. This behaviour is unusual and not very helpful to narrow the search results. This patch changed the behaviour to 'AND'. Multiple search words will reduce the count of search results. It uses assertions now. This has the big advantage that only one regular expression have to be processed for each file and the behaviour can be changed easily. The functionallity has been extended: Words with a preceding minus sign (-) will be excluded from and words with a preceding plus sign (+) will be included in the search results. Is a preceding sign is missing (+) is assumed. darcs-hash:20050603182746-7ef76-03b18a3707c91e8c5cbb99253b603d090b679c1a.gz
author: matthiasgrimm <matthiasgrimm@users.sourceforge.net> 2005-06-03 20:27:46 +0200
committer: matthiasgrimm <matthiasgrimm@users.sourceforge.net> 2005-06-03 20:27:46 +0200
commit: 5ef370d246543eb020ac46989b5ab8717e10a5b0 (patch)
tree: 0271e9a80fe7da885641f7765c493b5b1224e0d1
parent: c1e3b7d9b49d6a9f9a47a6a19439fa2d377f9833 (diff)
download: rpg-5ef370d246543eb020ac46989b5ab8717e10a5b0.tar.gz
rpg-5ef370d246543eb020ac46989b5ab8717e10a5b0.tar.bz2
2 files changed, 30 insertions, 10 deletions
diff --git a/inc/html.php b/inc/html.php
index b9f2e957b..411e230c7 100644
--- a/inc/html.php
+++ b/inc/html.php
@@ -307,7 +307,7 @@ function html_search(){
     usort($data,'sort_search_fulltext');
     foreach($data as $row){
       print '<div class="search_result">';
-      print html_wikilink(':'.$row['id'],$row['id'],$QUERY);
+      print html_wikilink(':'.$row['id'],$row['id'],$row['poswords']);
       print ': <span class="search_cnt">'.$row['count'].' '.$lang['hits'].'</span><br />';
       print '<div class="search_snippet">'.$row['snippet'].'</div>';
       print '</div>';
diff --git a/inc/search.php b/inc/search.php
index 853faef8a..654f1ab69 100644
--- a/inc/search.php
+++ b/inc/search.php
@@ -300,12 +300,31 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
   $lctext = utf8_strtolower($text);
 
   //create regexp from queries  
-  $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#'));
-  $qpreg = '('.join('|',$qpreg).')';
-
+  $poswords = array();
+  $negwords = array();
+  $qpreg = preg_split('/\s+/',$opts['query']);
+  
+  foreach($qpreg as $word){
+    switch(substr($word,0,1)){
+      case '-':
+        array_push($negwords,preg_quote(substr($word,1),'#'));
+        break;
+      case '+':
+        array_push($poswords,preg_quote(substr($word,1),'#'));
+        break;
+      default:
+        array_push($poswords,preg_quote($word,'#'));
+        break;
+    }
+  }
+  
+  $req  = count($poswords) ? $reg .= '^(?=.*?'.join(')(?=.*?',$poswords).')' : '^';
+  $reg .= count($negwords) ? '((?!'.join('|',$negwords).').)*$' : '.*$';
+  $mark = '('.join('|',$poswords).')';
+  
   //do the fulltext search
   $matches = array();
-  if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){
+  if($cnt = preg_match_all('#'.$reg.'#usi',$lctext,$matches)){
     //this is not the best way for snippet generation but the fastest I could find
     //split query and only use the first token
     $q = preg_split('/\s+/',$opts['query'],2);
@@ -317,15 +336,16 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
     $snippet = '<span class="search_sep"> ... </span>'.
                htmlspecialchars(utf8_substr($text,$f,$l)).
                '<span class="search_sep"> ... </span>';
-    $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet);
+    $snippet = preg_replace('#'.$mark.'#si','<span class="search_hit">\\1</span>',$snippet);
 
     $data[] = array(
-      'id'      => $id,
-      'count'   => $cnt,
-      'snippet' => $snippet,
+      'id'       => $id,
+      'count'    => preg_match_all('#'.$mark.'#usi',$lctext,$matches),
+      'poswords' => join(' ',$poswords),
+      'snippet'  => $snippet,
     );
   }
-
+  
   return true;
 }
author	matthiasgrimm <matthiasgrimm@users.sourceforge.net>	2005-06-03 20:27:46 +0200
committer	matthiasgrimm <matthiasgrimm@users.sourceforge.net>	2005-06-03 20:27:46 +0200
commit	5ef370d246543eb020ac46989b5ab8717e10a5b0 (patch)
tree	0271e9a80fe7da885641f7765c493b5b1224e0d1
parent	c1e3b7d9b49d6a9f9a47a6a19439fa2d377f9833 (diff)
download	rpg-5ef370d246543eb020ac46989b5ab8717e10a5b0.tar.gz rpg-5ef370d246543eb020ac46989b5ab8717e10a5b0.tar.bz2