diff options
author | matthiasgrimm <matthiasgrimm@users.sourceforge.net> | 2005-06-03 20:27:46 +0200 |
---|---|---|
committer | matthiasgrimm <matthiasgrimm@users.sourceforge.net> | 2005-06-03 20:27:46 +0200 |
commit | 5ef370d246543eb020ac46989b5ab8717e10a5b0 (patch) | |
tree | 0271e9a80fe7da885641f7765c493b5b1224e0d1 | |
parent | c1e3b7d9b49d6a9f9a47a6a19439fa2d377f9833 (diff) | |
download | rpg-5ef370d246543eb020ac46989b5ab8717e10a5b0.tar.gz rpg-5ef370d246543eb020ac46989b5ab8717e10a5b0.tar.bz2 |
extended search function
The DokuWiki search function uses 'OR' to combine
multiple search words. This behaviour is unusual and
not very helpful to narrow the search results.
This patch changed the behaviour to 'AND'. Multiple
search words will reduce the count of search results.
It uses assertions now. This has the big advantage
that only one regular expression have to be processed
for each file and the behaviour can be changed easily.
The functionallity has been extended: Words with a
preceding minus sign (-) will be excluded from and
words with a preceding plus sign (+) will be included
in the search results. Is a preceding sign is missing
(+) is assumed.
darcs-hash:20050603182746-7ef76-03b18a3707c91e8c5cbb99253b603d090b679c1a.gz
-rw-r--r-- | inc/html.php | 2 | ||||
-rw-r--r-- | inc/search.php | 38 |
2 files changed, 30 insertions, 10 deletions
diff --git a/inc/html.php b/inc/html.php index b9f2e957b..411e230c7 100644 --- a/inc/html.php +++ b/inc/html.php @@ -307,7 +307,7 @@ function html_search(){ usort($data,'sort_search_fulltext'); foreach($data as $row){ print '<div class="search_result">'; - print html_wikilink(':'.$row['id'],$row['id'],$QUERY); + print html_wikilink(':'.$row['id'],$row['id'],$row['poswords']); print ': <span class="search_cnt">'.$row['count'].' '.$lang['hits'].'</span><br />'; print '<div class="search_snippet">'.$row['snippet'].'</div>'; print '</div>'; diff --git a/inc/search.php b/inc/search.php index 853faef8a..654f1ab69 100644 --- a/inc/search.php +++ b/inc/search.php @@ -300,12 +300,31 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ $lctext = utf8_strtolower($text); //create regexp from queries - $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); - $qpreg = '('.join('|',$qpreg).')'; - + $poswords = array(); + $negwords = array(); + $qpreg = preg_split('/\s+/',$opts['query']); + + foreach($qpreg as $word){ + switch(substr($word,0,1)){ + case '-': + array_push($negwords,preg_quote(substr($word,1),'#')); + break; + case '+': + array_push($poswords,preg_quote(substr($word,1),'#')); + break; + default: + array_push($poswords,preg_quote($word,'#')); + break; + } + } + + $req = count($poswords) ? $reg .= '^(?=.*?'.join(')(?=.*?',$poswords).')' : '^'; + $reg .= count($negwords) ? '((?!'.join('|',$negwords).').)*$' : '.*$'; + $mark = '('.join('|',$poswords).')'; + //do the fulltext search $matches = array(); - if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){ + if($cnt = preg_match_all('#'.$reg.'#usi',$lctext,$matches)){ //this is not the best way for snippet generation but the fastest I could find //split query and only use the first token $q = preg_split('/\s+/',$opts['query'],2); @@ -317,15 +336,16 @@ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ $snippet = '<span class="search_sep"> ... </span>'. htmlspecialchars(utf8_substr($text,$f,$l)). '<span class="search_sep"> ... </span>'; - $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); + $snippet = preg_replace('#'.$mark.'#si','<span class="search_hit">\\1</span>',$snippet); $data[] = array( - 'id' => $id, - 'count' => $cnt, - 'snippet' => $snippet, + 'id' => $id, + 'count' => preg_match_all('#'.$mark.'#usi',$lctext,$matches), + 'poswords' => join(' ',$poswords), + 'snippet' => $snippet, ); } - + return true; } |