From 5ef370d246543eb020ac46989b5ab8717e10a5b0 Mon Sep 17 00:00:00 2001
From: matthiasgrimm <matthiasgrimm@users.sourceforge.net>
Date: Fri, 3 Jun 2005 20:27:46 +0200
Subject: extended search function

The DokuWiki search function uses 'OR' to combine
multiple search words. This behaviour is unusual and
not very helpful to narrow the search results.
This patch changed the behaviour to 'AND'. Multiple
search words will reduce the count of search results.

It uses assertions now. This has the big advantage
that only one regular expression have to be processed
for each file and the behaviour can be changed easily.

The functionallity has been extended: Words with a
preceding minus sign (-) will be excluded from and
words with a preceding plus sign (+) will be included
in the search results. Is a preceding sign is missing
(+) is assumed.

darcs-hash:20050603182746-7ef76-03b18a3707c91e8c5cbb99253b603d090b679c1a.gz
---
 inc/html.php   |  2 +-
 inc/search.php | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'inc')
diff --git a/inc/html.php b/inc/html.php
index b9f2e957b..411e230c7 100644
--- a/inc/html.php
+++ b/inc/html.php
@@ -307,7 +307,7 @@ function html_search(){
     usort($data,'sort_search_fulltext');
     foreach($data as $row){
       print '<div class="search_result">';
-      print html_wikilink(':'.$row['id'],$row['id'],$QUERY);
+      print html_wikilink(':'.$row['id'],$row['id'],$row['poswords']);
       print ': <span class="search_cnt">'.$row['count'].' '.$lang['hits'].'</span><br />';
       print '<div class="search_snippet">'.$row['snippet'].'</div>';
       print '</div>';
diff --git a/inc/search.php b/inc/search.php
index 853faef8a..654f1ab69 100644
--- a/inc/search.php
+++ b/inc/search.php
@@ -300,12 +300,31 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
   $lctext = utf8_strtolower($text);
 
   //create regexp from queries  
-  $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#'));
-  $qpreg = '('.join('|',$qpreg).')';
-
+  $poswords = array();
+  $negwords = array();
+  $qpreg = preg_split('/\s+/',$opts['query']);
+  
+  foreach($qpreg as $word){
+    switch(substr($word,0,1)){
+      case '-':
+        array_push($negwords,preg_quote(substr($word,1),'#'));
+        break;
+      case '+':
+        array_push($poswords,preg_quote(substr($word,1),'#'));
+        break;
+      default:
+        array_push($poswords,preg_quote($word,'#'));
+        break;
+    }
+  }
+  
+  $req  = count($poswords) ? $reg .= '^(?=.*?'.join(')(?=.*?',$poswords).')' : '^';
+  $reg .= count($negwords) ? '((?!'.join('|',$negwords).').)*$' : '.*$';
+  $mark = '('.join('|',$poswords).')';
+  
   //do the fulltext search
   $matches = array();
-  if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){
+  if($cnt = preg_match_all('#'.$reg.'#usi',$lctext,$matches)){
     //this is not the best way for snippet generation but the fastest I could find
     //split query and only use the first token
     $q = preg_split('/\s+/',$opts['query'],2);
@@ -317,15 +336,16 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
     $snippet = '<span class="search_sep"> ... </span>'.
                htmlspecialchars(utf8_substr($text,$f,$l)).
                '<span class="search_sep"> ... </span>';
-    $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet);
+    $snippet = preg_replace('#'.$mark.'#si','<span class="search_hit">\\1</span>',$snippet);
 
     $data[] = array(
-      'id'      => $id,
-      'count'   => $cnt,
-      'snippet' => $snippet,
+      'id'       => $id,
+      'count'    => preg_match_all('#'.$mark.'#usi',$lctext,$matches),
+      'poswords' => join(' ',$poswords),
+      'snippet'  => $snippet,
     );
   }
-
+  
   return true;
 }
 
-- 
cgit v1.2.3