summaryrefslogtreecommitdiff
path: root/inc/fulltext.php
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2010-08-29 14:22:01 +0200
committerAndreas Gohr <andi@splitbrain.org>2010-08-29 14:22:01 +0200
commit2c053ed58376c6709596ab48fc40dceb90d4e89d (patch)
treec8d0f78c2f47f373473419396d3c0855ec671eca /inc/fulltext.php
parentcb4a07568e84d853fbcd9d5eca37f572fa10786f (diff)
parent5479a8c3341247ca228026819f20f3ab5c34a80f (diff)
downloadrpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.gz
rpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.bz2
Merge branch 'master' into stable
Conflicts: conf/msg lib/plugins/acl/ajax.php
Diffstat (limited to 'inc/fulltext.php')
-rw-r--r--inc/fulltext.php249
1 files changed, 131 insertions, 118 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index c8236e1d4..e90205e9c 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -7,8 +7,11 @@
*/
if(!defined('DOKU_INC')) die('meh.');
-require_once(DOKU_INC.'inc/indexer.php');
+/**
+ * create snippets for the first few results only
+ */
+if(!defined('FT_SNIPPET_NUMBER')) define('FT_SNIPPET_NUMBER',15);
/**
* The fulltext search
@@ -20,10 +23,10 @@ require_once(DOKU_INC.'inc/indexer.php');
*/
function ft_pageSearch($query,&$highlight){
- $data['query'] = $query;
- $data['highlight'] =& $highlight;
+ $data['query'] = $query;
+ $data['highlight'] =& $highlight;
- return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch');
+ return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch');
}
/**
@@ -135,7 +138,6 @@ function ft_backlinks($id){
$docs = array_keys(ft_resultCombine(array_values($matches)));
$docs = array_filter($docs,'isVisiblePage'); // discard hidden pages
if(!count($docs)) return $result;
- require_once(DOKU_INC.'inc/parserutils.php');
// check metadata for matching links
foreach($docs as $match){
@@ -189,7 +191,7 @@ function ft_mediause($id,$max){
foreach($matches[1] as $img){
$img = trim($img);
if(preg_match('/^https?:\/\//i',$img)) continue; // skip external images
- list($img) = explode('?',$img); // remove any parameters
+ list($img) = explode('?',$img); // remove any parameters
resolve_mediaid($ns,$img,$exists); // resolve the possibly relative img
if($img == $id){ // we have a match
@@ -211,53 +213,64 @@ function ft_mediause($id,$max){
* Quicksearch for pagenames
*
* By default it only matches the pagename and ignores the
- * namespace. This can be changed with the second parameter
+ * namespace. This can be changed with the second parameter.
+ * The third parameter allows to search in titles as well.
*
- * refactored into ft_pageLookup(), _ft_pageLookup() and trigger_event()
+ * The function always returns titles as well
*
+ * @triggers SEARCH_QUERY_PAGELOOKUP
* @author Andreas Gohr <andi@splitbrain.org>
+ * @author Adrian Lang <lang@cosmocode.de>
*/
-function ft_pageLookup($id,$pageonly=true){
- $data = array('id' => $id, 'pageonly' => $pageonly);
- return trigger_event('SEARCH_QUERY_PAGELOOKUP',$data,'_ft_pageLookup');
+function ft_pageLookup($id, $in_ns=false, $in_title=false){
+ $data = compact('id', 'in_ns', 'in_title');
+ $data['has_titles'] = true; // for plugin backward compatibility check
+ return trigger_event('SEARCH_QUERY_PAGELOOKUP', $data, '_ft_pageLookup');
}
function _ft_pageLookup(&$data){
- // split out original parameterrs
+ // split out original parameters
$id = $data['id'];
- $pageonly = $data['pageonly'];
+ if (preg_match('/(?:^| )@(\w+)/', $id, $matches)) {
+ $ns = cleanID($matches[1]) . ':';
+ $id = str_replace($matches[0], '', $id);
+ }
- global $conf;
- $id = preg_quote($id,'/');
- $pages = file($conf['indexdir'].'/page.idx');
- if($id) $pages = array_values(preg_grep('/'.$id.'/',$pages));
-
- $cnt = count($pages);
- for($i=0; $i<$cnt; $i++){
- if($pageonly){
- if(!preg_match('/'.$id.'/',noNS($pages[$i]))){
- unset($pages[$i]);
- continue;
+ $in_ns = $data['in_ns'];
+ $in_title = $data['in_title'];
+
+ $pages = array_map('rtrim', idx_getIndex('page', ''));
+ $titles = array_map('rtrim', idx_getIndex('title', ''));
+ $pages = array_combine($pages, $titles);
+
+ $cleaned = cleanID($id);
+ if ($id !== '' && $cleaned !== '') {
+ foreach ($pages as $p_id => $p_title) {
+ if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false) &&
+ (!$in_title || (stripos($p_title, $id) === false)) ) {
+ unset($pages[$p_id]);
}
}
- if(!page_exists($pages[$i])){
- unset($pages[$i]);
- continue;
+ }
+ if (isset($ns)) {
+ foreach (array_keys($pages) as $p_id) {
+ if (strpos($p_id, $ns) !== 0) {
+ unset($pages[$p_id]);
+ }
}
}
- $pages = array_filter($pages,'isVisiblePage'); // discard hidden pages
- if(!count($pages)) return array();
-
+ // discard hidden pages
+ // discard nonexistent pages
// check ACL permissions
foreach(array_keys($pages) as $idx){
- if(auth_quickaclcheck(trim($pages[$idx])) < AUTH_READ){
+ if(!isVisiblePage($idx) || !page_exists($idx) ||
+ auth_quickaclcheck($idx) < AUTH_READ) {
unset($pages[$idx]);
}
}
- $pages = array_map('trim',$pages);
- usort($pages,'ft_pagesorter');
+ uasort($pages,'ft_pagesorter');
return $pages;
}
@@ -286,11 +299,11 @@ function ft_pagesorter($a, $b){
function ft_snippet($id,$highlight){
$text = rawWiki($id);
$evdata = array(
- 'id' => $id,
- 'text' => &$text,
- 'highlight' => &$highlight,
- 'snippet' => '',
- );
+ 'id' => $id,
+ 'text' => &$text,
+ 'highlight' => &$highlight,
+ 'snippet' => '',
+ );
$evt = new Doku_Event('FULLTEXT_SNIPPET_CREATE',$evdata);
if ($evt->advise_before()) {
@@ -305,60 +318,60 @@ function ft_snippet($id,$highlight){
$re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
for ($cnt=4; $cnt--;) {
- if (0) {
- } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
- } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
- } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
- } else {
- break;
- }
-
- list($str,$idx) = $match[0];
-
- // convert $idx (a byte offset) into a utf8 character offset
- $utf8_idx = utf8_strlen(substr($text,0,$idx));
- $utf8_len = utf8_strlen($str);
-
- // establish context, 100 bytes surrounding the match string
- // first look to see if we can go 100 either side,
- // then drop to 50 adding any excess if the other side can't go to 50,
- $pre = min($utf8_idx-$utf8_offset,100);
- $post = min($len-$utf8_idx-$utf8_len,100);
-
- if ($pre>50 && $post>50) {
- $pre = $post = 50;
- } else if ($pre>50) {
- $pre = min($pre,100-$post);
- } else if ($post>50) {
- $post = min($post, 100-$pre);
- } else {
- // both are less than 50, means the context is the whole string
- // make it so and break out of this loop - there is no need for the
- // complex snippet calculations
- $snippets = array($text);
- break;
- }
-
- // establish context start and end points, try to append to previous
- // context if possible
- $start = $utf8_idx - $pre;
- $append = ($start < $end) ? $end : false; // still the end of the previous context snippet
- $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
-
- if ($append) {
- $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
- } else {
- $snippets[] = utf8_substr($text,$start,$end-$start);
- }
-
- // set $offset for next match attempt
- // substract strlen to avoid splitting a potential search success,
- // this is an approximation as the search pattern may match strings
- // of varying length and it will fail if the context snippet
- // boundary breaks a matching string longer than the current match
- $utf8_offset = $utf8_idx + $post;
- $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
- $offset = utf8_correctIdx($text,$offset);
+ if (0) {
+ } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+ } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+ } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+ } else {
+ break;
+ }
+
+ list($str,$idx) = $match[0];
+
+ // convert $idx (a byte offset) into a utf8 character offset
+ $utf8_idx = utf8_strlen(substr($text,0,$idx));
+ $utf8_len = utf8_strlen($str);
+
+ // establish context, 100 bytes surrounding the match string
+ // first look to see if we can go 100 either side,
+ // then drop to 50 adding any excess if the other side can't go to 50,
+ $pre = min($utf8_idx-$utf8_offset,100);
+ $post = min($len-$utf8_idx-$utf8_len,100);
+
+ if ($pre>50 && $post>50) {
+ $pre = $post = 50;
+ } else if ($pre>50) {
+ $pre = min($pre,100-$post);
+ } else if ($post>50) {
+ $post = min($post, 100-$pre);
+ } else {
+ // both are less than 50, means the context is the whole string
+ // make it so and break out of this loop - there is no need for the
+ // complex snippet calculations
+ $snippets = array($text);
+ break;
+ }
+
+ // establish context start and end points, try to append to previous
+ // context if possible
+ $start = $utf8_idx - $pre;
+ $append = ($start < $end) ? $end : false; // still the end of the previous context snippet
+ $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
+
+ if ($append) {
+ $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
+ } else {
+ $snippets[] = utf8_substr($text,$start,$end-$start);
+ }
+
+ // set $offset for next match attempt
+ // substract strlen to avoid splitting a potential search success,
+ // this is an approximation as the search pattern may match strings
+ // of varying length and it will fail if the context snippet
+ // boundary breaks a matching string longer than the current match
+ $utf8_offset = $utf8_idx + $post;
+ $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
+ $offset = utf8_correctIdx($text,$offset);
}
$m = "\1";
@@ -391,16 +404,16 @@ function ft_resultCombine($args){
$result = array();
if ($array_count > 1) {
- foreach ($args[0] as $key => $value) {
- $result[$key] = $value;
- for ($i = 1; $i !== $array_count; $i++) {
- if (!isset($args[$i][$key])) {
- unset($result[$key]);
- break;
+ foreach ($args[0] as $key => $value) {
+ $result[$key] = $value;
+ for ($i = 1; $i !== $array_count; $i++) {
+ if (!isset($args[$i][$key])) {
+ unset($result[$key]);
+ break;
+ }
+ $result[$key] += $args[$i][$key];
}
- $result[$key] += $args[$i][$key];
}
- }
}
return $result;
}
@@ -651,30 +664,30 @@ function ft_queryParser($query){
switch (substr($token, 0, 3)) {
case 'N+:':
- $q['ns'][] = $body; // for backward compatibility
- break;
+ $q['ns'][] = $body; // for backward compatibility
+ break;
case 'N-:':
- $q['notns'][] = $body; // for backward compatibility
- break;
+ $q['notns'][] = $body; // for backward compatibility
+ break;
case 'W_:':
- $q['words'][] = $body;
- break;
+ $q['words'][] = $body;
+ break;
case 'W-:':
- $q['words'][] = $body;
- $q['not'][] = $body; // for backward compatibility
- break;
+ $q['words'][] = $body;
+ $q['not'][] = $body; // for backward compatibility
+ break;
case 'W+:':
- $q['words'][] = $body;
- $q['highlight'][] = str_replace('*', '', $body);
- $q['and'][] = $body; // for backward compatibility
- break;
+ $q['words'][] = $body;
+ $q['highlight'][] = str_replace('*', '', $body);
+ $q['and'][] = $body; // for backward compatibility
+ break;
case 'P-:':
- $q['phrases'][] = $body;
- break;
+ $q['phrases'][] = $body;
+ break;
case 'P+:':
- $q['phrases'][] = $body;
- $q['highlight'][] = str_replace('*', '', $body);
- break;
+ $q['phrases'][] = $body;
+ $q['highlight'][] = str_replace('*', '', $body);
+ break;
}
}
foreach (array('words', 'phrases', 'highlight', 'ns', 'notns', 'and', 'not') as $key) {