From 5ef370d246543eb020ac46989b5ab8717e10a5b0 Mon Sep 17 00:00:00 2001 From: matthiasgrimm Date: Fri, 3 Jun 2005 20:27:46 +0200 Subject: extended search function The DokuWiki search function uses 'OR' to combine multiple search words. This behaviour is unusual and not very helpful to narrow the search results. This patch changed the behaviour to 'AND'. Multiple search words will reduce the count of search results. It uses assertions now. This has the big advantage that only one regular expression have to be processed for each file and the behaviour can be changed easily. The functionallity has been extended: Words with a preceding minus sign (-) will be excluded from and words with a preceding plus sign (+) will be included in the search results. Is a preceding sign is missing (+) is assumed. darcs-hash:20050603182746-7ef76-03b18a3707c91e8c5cbb99253b603d090b679c1a.gz --- inc/html.php | 2 +- inc/search.php | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 10 deletions(-) (limited to 'inc') diff --git a/inc/html.php b/inc/html.php index b9f2e957b..411e230c7 100644 --- a/inc/html.php +++ b/inc/html.php @@ -307,7 +307,7 @@ function html_search(){ usort($data,'sort_search_fulltext'); foreach($data as $row){ print '
'; - print html_wikilink(':'.$row['id'],$row['id'],$QUERY); + print html_wikilink(':'.$row['id'],$row['id'],$row['poswords']); print ': '.$row['count'].' '.$lang['hits'].'
'; print '
'.$row['snippet'].'
'; print '
'; diff --git a/inc/search.php b/inc/search.php index 853faef8a..654f1ab69 100644 --- a/inc/search.php +++ b/inc/search.php @@ -300,12 +300,31 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ $lctext = utf8_strtolower($text); //create regexp from queries - $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); - $qpreg = '('.join('|',$qpreg).')'; - + $poswords = array(); + $negwords = array(); + $qpreg = preg_split('/\s+/',$opts['query']); + + foreach($qpreg as $word){ + switch(substr($word,0,1)){ + case '-': + array_push($negwords,preg_quote(substr($word,1),'#')); + break; + case '+': + array_push($poswords,preg_quote(substr($word,1),'#')); + break; + default: + array_push($poswords,preg_quote($word,'#')); + break; + } + } + + $req = count($poswords) ? $reg .= '^(?=.*?'.join(')(?=.*?',$poswords).')' : '^'; + $reg .= count($negwords) ? '((?!'.join('|',$negwords).').)*$' : '.*$'; + $mark = '('.join('|',$poswords).')'; + //do the fulltext search $matches = array(); - if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){ + if($cnt = preg_match_all('#'.$reg.'#usi',$lctext,$matches)){ //this is not the best way for snippet generation but the fastest I could find //split query and only use the first token $q = preg_split('/\s+/',$opts['query'],2); @@ -317,15 +336,16 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ $snippet = ' ... '. htmlspecialchars(utf8_substr($text,$f,$l)). ' ... '; - $snippet = preg_replace('#'.$qpreg.'#si','\\1',$snippet); + $snippet = preg_replace('#'.$mark.'#si','\\1',$snippet); $data[] = array( - 'id' => $id, - 'count' => $cnt, - 'snippet' => $snippet, + 'id' => $id, + 'count' => preg_match_all('#'.$mark.'#usi',$lctext,$matches), + 'poswords' => join(' ',$poswords), + 'snippet' => $snippet, ); } - + return true; } -- cgit v1.2.3