diff options
author | Andreas Gohr <andi@splitbrain.org> | 2010-08-29 14:22:01 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2010-08-29 14:22:01 +0200 |
commit | 2c053ed58376c6709596ab48fc40dceb90d4e89d (patch) | |
tree | c8d0f78c2f47f373473419396d3c0855ec671eca /inc/fulltext.php | |
parent | cb4a07568e84d853fbcd9d5eca37f572fa10786f (diff) | |
parent | 5479a8c3341247ca228026819f20f3ab5c34a80f (diff) | |
download | rpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.gz rpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.bz2 |
Merge branch 'master' into stable
Conflicts:
conf/msg
lib/plugins/acl/ajax.php
Diffstat (limited to 'inc/fulltext.php')
-rw-r--r-- | inc/fulltext.php | 249 |
1 files changed, 131 insertions, 118 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php index c8236e1d4..e90205e9c 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -7,8 +7,11 @@ */ if(!defined('DOKU_INC')) die('meh.'); -require_once(DOKU_INC.'inc/indexer.php'); +/** + * create snippets for the first few results only + */ +if(!defined('FT_SNIPPET_NUMBER')) define('FT_SNIPPET_NUMBER',15); /** * The fulltext search @@ -20,10 +23,10 @@ require_once(DOKU_INC.'inc/indexer.php'); */ function ft_pageSearch($query,&$highlight){ - $data['query'] = $query; - $data['highlight'] =& $highlight; + $data['query'] = $query; + $data['highlight'] =& $highlight; - return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch'); + return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch'); } /** @@ -135,7 +138,6 @@ function ft_backlinks($id){ $docs = array_keys(ft_resultCombine(array_values($matches))); $docs = array_filter($docs,'isVisiblePage'); // discard hidden pages if(!count($docs)) return $result; - require_once(DOKU_INC.'inc/parserutils.php'); // check metadata for matching links foreach($docs as $match){ @@ -189,7 +191,7 @@ function ft_mediause($id,$max){ foreach($matches[1] as $img){ $img = trim($img); if(preg_match('/^https?:\/\//i',$img)) continue; // skip external images - list($img) = explode('?',$img); // remove any parameters + list($img) = explode('?',$img); // remove any parameters resolve_mediaid($ns,$img,$exists); // resolve the possibly relative img if($img == $id){ // we have a match @@ -211,53 +213,64 @@ function ft_mediause($id,$max){ * Quicksearch for pagenames * * By default it only matches the pagename and ignores the - * namespace. This can be changed with the second parameter + * namespace. This can be changed with the second parameter. + * The third parameter allows to search in titles as well. * - * refactored into ft_pageLookup(), _ft_pageLookup() and trigger_event() + * The function always returns titles as well * + * @triggers SEARCH_QUERY_PAGELOOKUP * @author Andreas Gohr <andi@splitbrain.org> + * @author Adrian Lang <lang@cosmocode.de> */ -function ft_pageLookup($id,$pageonly=true){ - $data = array('id' => $id, 'pageonly' => $pageonly); - return trigger_event('SEARCH_QUERY_PAGELOOKUP',$data,'_ft_pageLookup'); +function ft_pageLookup($id, $in_ns=false, $in_title=false){ + $data = compact('id', 'in_ns', 'in_title'); + $data['has_titles'] = true; // for plugin backward compatibility check + return trigger_event('SEARCH_QUERY_PAGELOOKUP', $data, '_ft_pageLookup'); } function _ft_pageLookup(&$data){ - // split out original parameterrs + // split out original parameters $id = $data['id']; - $pageonly = $data['pageonly']; + if (preg_match('/(?:^| )@(\w+)/', $id, $matches)) { + $ns = cleanID($matches[1]) . ':'; + $id = str_replace($matches[0], '', $id); + } - global $conf; - $id = preg_quote($id,'/'); - $pages = file($conf['indexdir'].'/page.idx'); - if($id) $pages = array_values(preg_grep('/'.$id.'/',$pages)); - - $cnt = count($pages); - for($i=0; $i<$cnt; $i++){ - if($pageonly){ - if(!preg_match('/'.$id.'/',noNS($pages[$i]))){ - unset($pages[$i]); - continue; + $in_ns = $data['in_ns']; + $in_title = $data['in_title']; + + $pages = array_map('rtrim', idx_getIndex('page', '')); + $titles = array_map('rtrim', idx_getIndex('title', '')); + $pages = array_combine($pages, $titles); + + $cleaned = cleanID($id); + if ($id !== '' && $cleaned !== '') { + foreach ($pages as $p_id => $p_title) { + if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false) && + (!$in_title || (stripos($p_title, $id) === false)) ) { + unset($pages[$p_id]); } } - if(!page_exists($pages[$i])){ - unset($pages[$i]); - continue; + } + if (isset($ns)) { + foreach (array_keys($pages) as $p_id) { + if (strpos($p_id, $ns) !== 0) { + unset($pages[$p_id]); + } } } - $pages = array_filter($pages,'isVisiblePage'); // discard hidden pages - if(!count($pages)) return array(); - + // discard hidden pages + // discard nonexistent pages // check ACL permissions foreach(array_keys($pages) as $idx){ - if(auth_quickaclcheck(trim($pages[$idx])) < AUTH_READ){ + if(!isVisiblePage($idx) || !page_exists($idx) || + auth_quickaclcheck($idx) < AUTH_READ) { unset($pages[$idx]); } } - $pages = array_map('trim',$pages); - usort($pages,'ft_pagesorter'); + uasort($pages,'ft_pagesorter'); return $pages; } @@ -286,11 +299,11 @@ function ft_pagesorter($a, $b){ function ft_snippet($id,$highlight){ $text = rawWiki($id); $evdata = array( - 'id' => $id, - 'text' => &$text, - 'highlight' => &$highlight, - 'snippet' => '', - ); + 'id' => $id, + 'text' => &$text, + 'highlight' => &$highlight, + 'snippet' => '', + ); $evt = new Doku_Event('FULLTEXT_SNIPPET_CREATE',$evdata); if ($evt->advise_before()) { @@ -305,60 +318,60 @@ function ft_snippet($id,$highlight){ $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1"; for ($cnt=4; $cnt--;) { - if (0) { - } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { - } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { - } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { - } else { - break; - } - - list($str,$idx) = $match[0]; - - // convert $idx (a byte offset) into a utf8 character offset - $utf8_idx = utf8_strlen(substr($text,0,$idx)); - $utf8_len = utf8_strlen($str); - - // establish context, 100 bytes surrounding the match string - // first look to see if we can go 100 either side, - // then drop to 50 adding any excess if the other side can't go to 50, - $pre = min($utf8_idx-$utf8_offset,100); - $post = min($len-$utf8_idx-$utf8_len,100); - - if ($pre>50 && $post>50) { - $pre = $post = 50; - } else if ($pre>50) { - $pre = min($pre,100-$post); - } else if ($post>50) { - $post = min($post, 100-$pre); - } else { - // both are less than 50, means the context is the whole string - // make it so and break out of this loop - there is no need for the - // complex snippet calculations - $snippets = array($text); - break; - } - - // establish context start and end points, try to append to previous - // context if possible - $start = $utf8_idx - $pre; - $append = ($start < $end) ? $end : false; // still the end of the previous context snippet - $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context - - if ($append) { - $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append); - } else { - $snippets[] = utf8_substr($text,$start,$end-$start); - } - - // set $offset for next match attempt - // substract strlen to avoid splitting a potential search success, - // this is an approximation as the search pattern may match strings - // of varying length and it will fail if the context snippet - // boundary breaks a matching string longer than the current match - $utf8_offset = $utf8_idx + $post; - $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); - $offset = utf8_correctIdx($text,$offset); + if (0) { + } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { + } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { + } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { + } else { + break; + } + + list($str,$idx) = $match[0]; + + // convert $idx (a byte offset) into a utf8 character offset + $utf8_idx = utf8_strlen(substr($text,0,$idx)); + $utf8_len = utf8_strlen($str); + + // establish context, 100 bytes surrounding the match string + // first look to see if we can go 100 either side, + // then drop to 50 adding any excess if the other side can't go to 50, + $pre = min($utf8_idx-$utf8_offset,100); + $post = min($len-$utf8_idx-$utf8_len,100); + + if ($pre>50 && $post>50) { + $pre = $post = 50; + } else if ($pre>50) { + $pre = min($pre,100-$post); + } else if ($post>50) { + $post = min($post, 100-$pre); + } else { + // both are less than 50, means the context is the whole string + // make it so and break out of this loop - there is no need for the + // complex snippet calculations + $snippets = array($text); + break; + } + + // establish context start and end points, try to append to previous + // context if possible + $start = $utf8_idx - $pre; + $append = ($start < $end) ? $end : false; // still the end of the previous context snippet + $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context + + if ($append) { + $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append); + } else { + $snippets[] = utf8_substr($text,$start,$end-$start); + } + + // set $offset for next match attempt + // substract strlen to avoid splitting a potential search success, + // this is an approximation as the search pattern may match strings + // of varying length and it will fail if the context snippet + // boundary breaks a matching string longer than the current match + $utf8_offset = $utf8_idx + $post; + $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); + $offset = utf8_correctIdx($text,$offset); } $m = "\1"; @@ -391,16 +404,16 @@ function ft_resultCombine($args){ $result = array(); if ($array_count > 1) { - foreach ($args[0] as $key => $value) { - $result[$key] = $value; - for ($i = 1; $i !== $array_count; $i++) { - if (!isset($args[$i][$key])) { - unset($result[$key]); - break; + foreach ($args[0] as $key => $value) { + $result[$key] = $value; + for ($i = 1; $i !== $array_count; $i++) { + if (!isset($args[$i][$key])) { + unset($result[$key]); + break; + } + $result[$key] += $args[$i][$key]; } - $result[$key] += $args[$i][$key]; } - } } return $result; } @@ -651,30 +664,30 @@ function ft_queryParser($query){ switch (substr($token, 0, 3)) { case 'N+:': - $q['ns'][] = $body; // for backward compatibility - break; + $q['ns'][] = $body; // for backward compatibility + break; case 'N-:': - $q['notns'][] = $body; // for backward compatibility - break; + $q['notns'][] = $body; // for backward compatibility + break; case 'W_:': - $q['words'][] = $body; - break; + $q['words'][] = $body; + break; case 'W-:': - $q['words'][] = $body; - $q['not'][] = $body; // for backward compatibility - break; + $q['words'][] = $body; + $q['not'][] = $body; // for backward compatibility + break; case 'W+:': - $q['words'][] = $body; - $q['highlight'][] = str_replace('*', '', $body); - $q['and'][] = $body; // for backward compatibility - break; + $q['words'][] = $body; + $q['highlight'][] = str_replace('*', '', $body); + $q['and'][] = $body; // for backward compatibility + break; case 'P-:': - $q['phrases'][] = $body; - break; + $q['phrases'][] = $body; + break; case 'P+:': - $q['phrases'][] = $body; - $q['highlight'][] = str_replace('*', '', $body); - break; + $q['phrases'][] = $body; + $q['highlight'][] = str_replace('*', '', $body); + break; } } foreach (array('words', 'phrases', 'highlight', 'ns', 'notns', 'and', 'not') as $key) { |