Merge branch 'master' into stable

Conflicts: conf/msg lib/plugins/acl/ajax.php
author: Andreas Gohr <andi@splitbrain.org> 2010-08-29 14:22:01 +0200
committer: Andreas Gohr <andi@splitbrain.org> 2010-08-29 14:22:01 +0200
commit: 2c053ed58376c6709596ab48fc40dceb90d4e89d (patch)
tree: c8d0f78c2f47f373473419396d3c0855ec671eca /inc/fulltext.php
parent: cb4a07568e84d853fbcd9d5eca37f572fa10786f (diff)
parent: 5479a8c3341247ca228026819f20f3ab5c34a80f (diff)
download: rpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.gz
rpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.bz2
1 files changed, 131 insertions, 118 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index c8236e1d4..e90205e9c 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -7,8 +7,11 @@
  */
 
 if(!defined('DOKU_INC')) die('meh.');
-require_once(DOKU_INC.'inc/indexer.php');
 
+/**
+ * create snippets for the first few results only
+ */
+if(!defined('FT_SNIPPET_NUMBER')) define('FT_SNIPPET_NUMBER',15);
 
 /**
  * The fulltext search
@@ -20,10 +23,10 @@ require_once(DOKU_INC.'inc/indexer.php');
  */
 function ft_pageSearch($query,&$highlight){
 
-  $data['query'] = $query;
-  $data['highlight'] =& $highlight;
+    $data['query'] = $query;
+    $data['highlight'] =& $highlight;
 
-  return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch');
+    return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch');
 }
 
 /**
@@ -135,7 +138,6 @@ function ft_backlinks($id){
     $docs    = array_keys(ft_resultCombine(array_values($matches)));
     $docs    = array_filter($docs,'isVisiblePage'); // discard hidden pages
     if(!count($docs)) return $result;
-    require_once(DOKU_INC.'inc/parserutils.php');
 
     // check metadata for matching links
     foreach($docs as $match){
@@ -189,7 +191,7 @@ function ft_mediause($id,$max){
         foreach($matches[1] as $img){
             $img = trim($img);
             if(preg_match('/^https?:\/\//i',$img)) continue; // skip external images
-            list($img) = explode('?',$img);                  // remove any parameters
+                list($img) = explode('?',$img);                  // remove any parameters
             resolve_mediaid($ns,$img,$exists);               // resolve the possibly relative img
 
             if($img == $id){                                 // we have a match
@@ -211,53 +213,64 @@ function ft_mediause($id,$max){
  * Quicksearch for pagenames
  *
  * By default it only matches the pagename and ignores the
- * namespace. This can be changed with the second parameter
+ * namespace. This can be changed with the second parameter.
+ * The third parameter allows to search in titles as well.
  *
- * refactored into ft_pageLookup(), _ft_pageLookup() and trigger_event()
+ * The function always returns titles as well
  *
+ * @triggers SEARCH_QUERY_PAGELOOKUP
  * @author Andreas Gohr <andi@splitbrain.org>
+ * @author Adrian Lang <lang@cosmocode.de>
  */
-function ft_pageLookup($id,$pageonly=true){
-    $data = array('id' => $id, 'pageonly' => $pageonly);
-    return trigger_event('SEARCH_QUERY_PAGELOOKUP',$data,'_ft_pageLookup');
+function ft_pageLookup($id, $in_ns=false, $in_title=false){
+    $data = compact('id', 'in_ns', 'in_title');
+    $data['has_titles'] = true; // for plugin backward compatibility check
+    return trigger_event('SEARCH_QUERY_PAGELOOKUP', $data, '_ft_pageLookup');
 }
 
 function _ft_pageLookup(&$data){
-    // split out original parameterrs
+    // split out original parameters
     $id = $data['id'];
-    $pageonly = $data['pageonly'];
+    if (preg_match('/(?:^| )@(\w+)/', $id, $matches)) {
+        $ns = cleanID($matches[1]) . ':';
+        $id = str_replace($matches[0], '', $id);
+    }
 
-    global $conf;
-    $id    = preg_quote($id,'/');
-    $pages = file($conf['indexdir'].'/page.idx');
-    if($id) $pages = array_values(preg_grep('/'.$id.'/',$pages));
-
-    $cnt = count($pages);
-    for($i=0; $i<$cnt; $i++){
-        if($pageonly){
-            if(!preg_match('/'.$id.'/',noNS($pages[$i]))){
-                unset($pages[$i]);
-                continue;
+    $in_ns    = $data['in_ns'];
+    $in_title = $data['in_title'];
+
+    $pages  = array_map('rtrim', idx_getIndex('page', ''));
+    $titles = array_map('rtrim', idx_getIndex('title', ''));
+    $pages = array_combine($pages, $titles);
+
+    $cleaned = cleanID($id);
+    if ($id !== '' && $cleaned !== '') {
+        foreach ($pages as $p_id => $p_title) {
+            if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false) &&
+                (!$in_title || (stripos($p_title, $id) === false)) ) {
+                unset($pages[$p_id]);
             }
         }
-        if(!page_exists($pages[$i])){
-            unset($pages[$i]);
-            continue;
+    }
+    if (isset($ns)) {
+        foreach (array_keys($pages) as $p_id) {
+            if (strpos($p_id, $ns) !== 0) {
+                unset($pages[$p_id]);
+            }
         }
     }
 
-    $pages = array_filter($pages,'isVisiblePage'); // discard hidden pages
-    if(!count($pages)) return array();
-
+    // discard hidden pages
+    // discard nonexistent pages
     // check ACL permissions
     foreach(array_keys($pages) as $idx){
-        if(auth_quickaclcheck(trim($pages[$idx])) < AUTH_READ){
+        if(!isVisiblePage($idx) || !page_exists($idx) ||
+           auth_quickaclcheck($idx) < AUTH_READ) {
             unset($pages[$idx]);
         }
     }
 
-    $pages = array_map('trim',$pages);
-    usort($pages,'ft_pagesorter');
+    uasort($pages,'ft_pagesorter');
     return $pages;
 }
 
@@ -286,11 +299,11 @@ function ft_pagesorter($a, $b){
 function ft_snippet($id,$highlight){
     $text = rawWiki($id);
     $evdata = array(
-                'id'        => $id,
-                'text'      => &$text,
-                'highlight' => &$highlight,
-                'snippet'   => '',
-              );
+            'id'        => $id,
+            'text'      => &$text,
+            'highlight' => &$highlight,
+            'snippet'   => '',
+            );
 
     $evt = new Doku_Event('FULLTEXT_SNIPPET_CREATE',$evdata);
     if ($evt->advise_before()) {
@@ -305,60 +318,60 @@ function ft_snippet($id,$highlight){
         $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
 
         for ($cnt=4; $cnt--;) {
-          if (0) {
-          } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
-          } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
-          } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
-          } else {
-            break;
-          }
-
-          list($str,$idx) = $match[0];
-
-          // convert $idx (a byte offset) into a utf8 character offset
-          $utf8_idx = utf8_strlen(substr($text,0,$idx));
-          $utf8_len = utf8_strlen($str);
-
-          // establish context, 100 bytes surrounding the match string
-          // first look to see if we can go 100 either side,
-          // then drop to 50 adding any excess if the other side can't go to 50,
-          $pre = min($utf8_idx-$utf8_offset,100);
-          $post = min($len-$utf8_idx-$utf8_len,100);
-
-          if ($pre>50 && $post>50) {
-            $pre = $post = 50;
-          } else if ($pre>50) {
-            $pre = min($pre,100-$post);
-          } else if ($post>50) {
-            $post = min($post, 100-$pre);
-          } else {
-            // both are less than 50, means the context is the whole string
-            // make it so and break out of this loop - there is no need for the
-            // complex snippet calculations
-            $snippets = array($text);
-            break;
-          }
-
-          // establish context start and end points, try to append to previous
-          // context if possible
-          $start = $utf8_idx - $pre;
-          $append = ($start < $end) ? $end : false;  // still the end of the previous context snippet
-          $end = $utf8_idx + $utf8_len + $post;      // now set it to the end of this context
-
-          if ($append) {
-            $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
-          } else {
-            $snippets[] = utf8_substr($text,$start,$end-$start);
-          }
-
-          // set $offset for next match attempt
-          //   substract strlen to avoid splitting a potential search success,
-          //   this is an approximation as the search pattern may match strings
-          //   of varying length and it will fail if the context snippet
-          //   boundary breaks a matching string longer than the current match
-          $utf8_offset = $utf8_idx + $post;
-          $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
-          $offset = utf8_correctIdx($text,$offset);
+            if (0) {
+            } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+            } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+            } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+            } else {
+                break;
+            }
+
+            list($str,$idx) = $match[0];
+
+            // convert $idx (a byte offset) into a utf8 character offset
+            $utf8_idx = utf8_strlen(substr($text,0,$idx));
+            $utf8_len = utf8_strlen($str);
+
+            // establish context, 100 bytes surrounding the match string
+            // first look to see if we can go 100 either side,
+            // then drop to 50 adding any excess if the other side can't go to 50,
+            $pre = min($utf8_idx-$utf8_offset,100);
+            $post = min($len-$utf8_idx-$utf8_len,100);
+
+            if ($pre>50 && $post>50) {
+                $pre = $post = 50;
+            } else if ($pre>50) {
+                $pre = min($pre,100-$post);
+            } else if ($post>50) {
+                $post = min($post, 100-$pre);
+            } else {
+                // both are less than 50, means the context is the whole string
+                // make it so and break out of this loop - there is no need for the
+                // complex snippet calculations
+                $snippets = array($text);
+                break;
+            }
+
+            // establish context start and end points, try to append to previous
+            // context if possible
+            $start = $utf8_idx - $pre;
+            $append = ($start < $end) ? $end : false;  // still the end of the previous context snippet
+            $end = $utf8_idx + $utf8_len + $post;      // now set it to the end of this context
+
+            if ($append) {
+                $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
+            } else {
+                $snippets[] = utf8_substr($text,$start,$end-$start);
+            }
+
+            // set $offset for next match attempt
+            //   substract strlen to avoid splitting a potential search success,
+            //   this is an approximation as the search pattern may match strings
+            //   of varying length and it will fail if the context snippet
+            //   boundary breaks a matching string longer than the current match
+            $utf8_offset = $utf8_idx + $post;
+            $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
+            $offset = utf8_correctIdx($text,$offset);
         }
 
         $m = "\1";
@@ -391,16 +404,16 @@ function ft_resultCombine($args){
 
     $result = array();
     if ($array_count > 1) {
-      foreach ($args[0] as $key => $value) {
-        $result[$key] = $value;
-        for ($i = 1; $i !== $array_count; $i++) {
-            if (!isset($args[$i][$key])) {
-                unset($result[$key]);
-                break;
+        foreach ($args[0] as $key => $value) {
+            $result[$key] = $value;
+            for ($i = 1; $i !== $array_count; $i++) {
+                if (!isset($args[$i][$key])) {
+                    unset($result[$key]);
+                    break;
+                }
+                $result[$key] += $args[$i][$key];
             }
-            $result[$key] += $args[$i][$key];
         }
-      }
     }
     return $result;
 }
@@ -651,30 +664,30 @@ function ft_queryParser($query){
 
         switch (substr($token, 0, 3)) {
             case 'N+:':
-                $q['ns'][]        = $body; // for backward compatibility
-                break;
+                     $q['ns'][]        = $body; // for backward compatibility
+                     break;
             case 'N-:':
-                $q['notns'][]     = $body; // for backward compatibility
-                break;
+                     $q['notns'][]     = $body; // for backward compatibility
+                     break;
             case 'W_:':
-                $q['words'][]     = $body;
-                break;
+                     $q['words'][]     = $body;
+                     break;
             case 'W-:':
-                $q['words'][]     = $body;
-                $q['not'][]       = $body; // for backward compatibility
-                break;
+                     $q['words'][]     = $body;
+                     $q['not'][]       = $body; // for backward compatibility
+                     break;
             case 'W+:':
-                $q['words'][]     = $body;
-                $q['highlight'][] = str_replace('*', '', $body);
-                $q['and'][]       = $body; // for backward compatibility
-                break;
+                     $q['words'][]     = $body;
+                     $q['highlight'][] = str_replace('*', '', $body);
+                     $q['and'][]       = $body; // for backward compatibility
+                     break;
             case 'P-:':
-                $q['phrases'][]   = $body;
-                break;
+                     $q['phrases'][]   = $body;
+                     break;
             case 'P+:':
-                $q['phrases'][]   = $body;
-                $q['highlight'][] = str_replace('*', '', $body);
-                break;
+                     $q['phrases'][]   = $body;
+                     $q['highlight'][] = str_replace('*', '', $body);
+                     break;
         }
     }
     foreach (array('words', 'phrases', 'highlight', 'ns', 'notns', 'and', 'not') as $key) {
author	Andreas Gohr <andi@splitbrain.org>	2010-08-29 14:22:01 +0200
committer	Andreas Gohr <andi@splitbrain.org>	2010-08-29 14:22:01 +0200
commit	2c053ed58376c6709596ab48fc40dceb90d4e89d (patch)
tree	c8d0f78c2f47f373473419396d3c0855ec671eca /inc/fulltext.php
parent	cb4a07568e84d853fbcd9d5eca37f572fa10786f (diff)
parent	5479a8c3341247ca228026819f20f3ab5c34a80f (diff)
download	rpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.gz rpg-2c053ed58376c6709596ab48fc40dceb90d4e89d.tar.bz2