From 49eb6e38061d744f4a35b78082dce49fa35f79c8 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Fri, 15 Jan 2010 19:50:13 +0100 Subject: some more coding standard compliance updates --- inc/fulltext.php | 178 +++++++++++++++++++++++++++---------------------------- 1 file changed, 89 insertions(+), 89 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index c8236e1d4..94c68d675 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -20,10 +20,10 @@ require_once(DOKU_INC.'inc/indexer.php'); */ function ft_pageSearch($query,&$highlight){ - $data['query'] = $query; - $data['highlight'] =& $highlight; + $data['query'] = $query; + $data['highlight'] =& $highlight; - return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch'); + return trigger_event('SEARCH_QUERY_FULLPAGE', $data, '_ft_pageSearch'); } /** @@ -189,7 +189,7 @@ function ft_mediause($id,$max){ foreach($matches[1] as $img){ $img = trim($img); if(preg_match('/^https?:\/\//i',$img)) continue; // skip external images - list($img) = explode('?',$img); // remove any parameters + list($img) = explode('?',$img); // remove any parameters resolve_mediaid($ns,$img,$exists); // resolve the possibly relative img if($img == $id){ // we have a match @@ -286,11 +286,11 @@ function ft_pagesorter($a, $b){ function ft_snippet($id,$highlight){ $text = rawWiki($id); $evdata = array( - 'id' => $id, - 'text' => &$text, - 'highlight' => &$highlight, - 'snippet' => '', - ); + 'id' => $id, + 'text' => &$text, + 'highlight' => &$highlight, + 'snippet' => '', + ); $evt = new Doku_Event('FULLTEXT_SNIPPET_CREATE',$evdata); if ($evt->advise_before()) { @@ -305,60 +305,60 @@ function ft_snippet($id,$highlight){ $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1"; for ($cnt=4; $cnt--;) { - if (0) { - } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { - } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { - } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { - } else { - break; - } - - list($str,$idx) = $match[0]; - - // convert $idx (a byte offset) into a utf8 character offset - $utf8_idx = utf8_strlen(substr($text,0,$idx)); - $utf8_len = utf8_strlen($str); - - // establish context, 100 bytes surrounding the match string - // first look to see if we can go 100 either side, - // then drop to 50 adding any excess if the other side can't go to 50, - $pre = min($utf8_idx-$utf8_offset,100); - $post = min($len-$utf8_idx-$utf8_len,100); - - if ($pre>50 && $post>50) { - $pre = $post = 50; - } else if ($pre>50) { - $pre = min($pre,100-$post); - } else if ($post>50) { - $post = min($post, 100-$pre); - } else { - // both are less than 50, means the context is the whole string - // make it so and break out of this loop - there is no need for the - // complex snippet calculations - $snippets = array($text); - break; - } - - // establish context start and end points, try to append to previous - // context if possible - $start = $utf8_idx - $pre; - $append = ($start < $end) ? $end : false; // still the end of the previous context snippet - $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context - - if ($append) { - $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append); - } else { - $snippets[] = utf8_substr($text,$start,$end-$start); - } - - // set $offset for next match attempt - // substract strlen to avoid splitting a potential search success, - // this is an approximation as the search pattern may match strings - // of varying length and it will fail if the context snippet - // boundary breaks a matching string longer than the current match - $utf8_offset = $utf8_idx + $post; - $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); - $offset = utf8_correctIdx($text,$offset); + if (0) { + } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { + } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { + } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { + } else { + break; + } + + list($str,$idx) = $match[0]; + + // convert $idx (a byte offset) into a utf8 character offset + $utf8_idx = utf8_strlen(substr($text,0,$idx)); + $utf8_len = utf8_strlen($str); + + // establish context, 100 bytes surrounding the match string + // first look to see if we can go 100 either side, + // then drop to 50 adding any excess if the other side can't go to 50, + $pre = min($utf8_idx-$utf8_offset,100); + $post = min($len-$utf8_idx-$utf8_len,100); + + if ($pre>50 && $post>50) { + $pre = $post = 50; + } else if ($pre>50) { + $pre = min($pre,100-$post); + } else if ($post>50) { + $post = min($post, 100-$pre); + } else { + // both are less than 50, means the context is the whole string + // make it so and break out of this loop - there is no need for the + // complex snippet calculations + $snippets = array($text); + break; + } + + // establish context start and end points, try to append to previous + // context if possible + $start = $utf8_idx - $pre; + $append = ($start < $end) ? $end : false; // still the end of the previous context snippet + $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context + + if ($append) { + $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append); + } else { + $snippets[] = utf8_substr($text,$start,$end-$start); + } + + // set $offset for next match attempt + // substract strlen to avoid splitting a potential search success, + // this is an approximation as the search pattern may match strings + // of varying length and it will fail if the context snippet + // boundary breaks a matching string longer than the current match + $utf8_offset = $utf8_idx + $post; + $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); + $offset = utf8_correctIdx($text,$offset); } $m = "\1"; @@ -391,16 +391,16 @@ function ft_resultCombine($args){ $result = array(); if ($array_count > 1) { - foreach ($args[0] as $key => $value) { - $result[$key] = $value; - for ($i = 1; $i !== $array_count; $i++) { - if (!isset($args[$i][$key])) { - unset($result[$key]); - break; + foreach ($args[0] as $key => $value) { + $result[$key] = $value; + for ($i = 1; $i !== $array_count; $i++) { + if (!isset($args[$i][$key])) { + unset($result[$key]); + break; + } + $result[$key] += $args[$i][$key]; } - $result[$key] += $args[$i][$key]; } - } } return $result; } @@ -651,30 +651,30 @@ function ft_queryParser($query){ switch (substr($token, 0, 3)) { case 'N+:': - $q['ns'][] = $body; // for backward compatibility - break; + $q['ns'][] = $body; // for backward compatibility + break; case 'N-:': - $q['notns'][] = $body; // for backward compatibility - break; + $q['notns'][] = $body; // for backward compatibility + break; case 'W_:': - $q['words'][] = $body; - break; + $q['words'][] = $body; + break; case 'W-:': - $q['words'][] = $body; - $q['not'][] = $body; // for backward compatibility - break; + $q['words'][] = $body; + $q['not'][] = $body; // for backward compatibility + break; case 'W+:': - $q['words'][] = $body; - $q['highlight'][] = str_replace('*', '', $body); - $q['and'][] = $body; // for backward compatibility - break; + $q['words'][] = $body; + $q['highlight'][] = str_replace('*', '', $body); + $q['and'][] = $body; // for backward compatibility + break; case 'P-:': - $q['phrases'][] = $body; - break; + $q['phrases'][] = $body; + break; case 'P+:': - $q['phrases'][] = $body; - $q['highlight'][] = str_replace('*', '', $body); - break; + $q['phrases'][] = $body; + $q['highlight'][] = str_replace('*', '', $body); + break; } } foreach (array('words', 'phrases', 'highlight', 'ns', 'notns', 'and', 'not') as $key) { -- cgit v1.2.3 From 16905344219a6293705b71cd526fad3ba07b04eb Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 31 Jan 2010 19:02:14 +0100 Subject: first attempt to centralize all include loading Classes are loaded throug PHP5's class autoloader, all other includes are just loaded by default. This skips a lot of require_once calls. Parser and Plugin stuff isn't handled by the class loader yet. --- inc/fulltext.php | 2 -- 1 file changed, 2 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index 94c68d675..58d17422a 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -7,8 +7,6 @@ */ if(!defined('DOKU_INC')) die('meh.'); -require_once(DOKU_INC.'inc/indexer.php'); - /** * The fulltext search -- cgit v1.2.3 From 0af14a6e25ba35e88d96762bc73325838868e3fe Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Mon, 1 Feb 2010 15:38:41 +0100 Subject: removed more unneeded require_once() calls --- inc/fulltext.php | 1 - 1 file changed, 1 deletion(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index 58d17422a..76dd01d1f 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -133,7 +133,6 @@ function ft_backlinks($id){ $docs = array_keys(ft_resultCombine(array_values($matches))); $docs = array_filter($docs,'isVisiblePage'); // discard hidden pages if(!count($docs)) return $result; - require_once(DOKU_INC.'inc/parserutils.php'); // check metadata for matching links foreach($docs as $match){ -- cgit v1.2.3 From bd0293e7406f93a8897c471193b69413b78b6236 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 28 Feb 2010 12:31:37 +0100 Subject: moved number of search result snippets to a define --- inc/fulltext.php | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index 94c68d675..4f4dc1bfd 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -9,6 +9,10 @@ if(!defined('DOKU_INC')) die('meh.'); require_once(DOKU_INC.'inc/indexer.php'); +/** + * create snippets for the first few results only + */ +if(!defined('FT_SNIPPET_NUMBER')) define('FT_SNIPPET_NUMBER',15); /** * The fulltext search -- cgit v1.2.3 From 80423ab626c72923f347e2196ce660957dcc216f Mon Sep 17 00:00:00 2001 From: Adrian Lang Date: Wed, 16 Jun 2010 14:41:59 +0200 Subject: Perform quick search in title as well --- inc/fulltext.php | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index 142862529..c4d8a7ea7 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -213,54 +213,55 @@ function ft_mediause($id,$max){ * Quicksearch for pagenames * * By default it only matches the pagename and ignores the - * namespace. This can be changed with the second parameter + * namespace. This can be changed with the second parameter. + * The third parameter allows to search in titles as well. + * If the function should search in titles as well, the return array + * has the ids as key and the titles as value. * * refactored into ft_pageLookup(), _ft_pageLookup() and trigger_event() * * @author Andreas Gohr */ -function ft_pageLookup($id,$pageonly=true){ - $data = array('id' => $id, 'pageonly' => $pageonly); - return trigger_event('SEARCH_QUERY_PAGELOOKUP',$data,'_ft_pageLookup'); +function ft_pageLookup($id, $not_in_ns=true, $not_in_title=true){ + $data = compact('id', 'not_in_ns', 'not_in_title'); + return trigger_event('SEARCH_QUERY_PAGELOOKUP', $data, '_ft_pageLookup'); } function _ft_pageLookup(&$data){ - // split out original parameterrs + // split out original parameters $id = $data['id']; - $pageonly = $data['pageonly']; + $in_ns = !$data['not_in_ns']; + $in_title = !$data['not_in_title']; global $conf; - $id = preg_quote($id,'/'); - $pages = file($conf['indexdir'].'/page.idx'); - if($id) $pages = array_values(preg_grep('/'.$id.'/',$pages)); - - $cnt = count($pages); - for($i=0; $i<$cnt; $i++){ - if($pageonly){ - if(!preg_match('/'.$id.'/',noNS($pages[$i]))){ - unset($pages[$i]); - continue; + $pages = array_map('rtrim', file($conf['indexdir'].'/page.idx')); + $titles = array_map('rtrim', file($conf['indexdir'].'/title.idx')); + $pages = array_combine($pages, $titles); + + if($id !== '' && cleanID($id) !== '') { + $cleaned = cleanID($id); + $matched_pages = array(); + foreach ($pages as $p_id => $p_title) { + if ((strpos($in_ns ? $p_id : noNS($p_id), $cleaned) !== false) || + ($in_title && stripos($p_title, $id) !== false)) { + $matched_pages[$p_id] = $p_title; } } - if(!page_exists($pages[$i])){ - unset($pages[$i]); - continue; - } + $pages = $matched_pages; } - $pages = array_filter($pages,'isVisiblePage'); // discard hidden pages - if(!count($pages)) return array(); - + // discard hidden pages + // discard nonexistent pages // check ACL permissions foreach(array_keys($pages) as $idx){ - if(auth_quickaclcheck(trim($pages[$idx])) < AUTH_READ){ + if(!isVisiblePage($idx) || !page_exists($idx) || + auth_quickaclcheck($idx) < AUTH_READ) { unset($pages[$idx]); } } - $pages = array_map('trim',$pages); - usort($pages,'ft_pagesorter'); - return $pages; + uasort($pages,'ft_pagesorter'); + return $in_title ? $pages : array_keys($pages); } /** -- cgit v1.2.3 From 5ca4a8a19425e432cff13aaef5eae897e153f320 Mon Sep 17 00:00:00 2001 From: Adrian Lang Date: Wed, 16 Jun 2010 15:50:06 +0200 Subject: Find start pages if namespace matches --- inc/fulltext.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index c4d8a7ea7..a68770277 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -242,7 +242,7 @@ function _ft_pageLookup(&$data){ $cleaned = cleanID($id); $matched_pages = array(); foreach ($pages as $p_id => $p_title) { - if ((strpos($in_ns ? $p_id : noNS($p_id), $cleaned) !== false) || + if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false) || ($in_title && stripos($p_title, $id) !== false)) { $matched_pages[$p_id] = $p_title; } -- cgit v1.2.3 From a0070b52bbd24f6972b819fa8ff4bdbfe81b5bbc Mon Sep 17 00:00:00 2001 From: Adrian Lang Date: Wed, 16 Jun 2010 16:15:28 +0200 Subject: Add title index to the indexer files, improve indexer calls --- inc/fulltext.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index a68770277..b6aac5c91 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -233,9 +233,8 @@ function _ft_pageLookup(&$data){ $in_ns = !$data['not_in_ns']; $in_title = !$data['not_in_title']; - global $conf; - $pages = array_map('rtrim', file($conf['indexdir'].'/page.idx')); - $titles = array_map('rtrim', file($conf['indexdir'].'/title.idx')); + $pages = array_map('rtrim', idx_getIndex('page', '')); + $titles = array_map('rtrim', idx_getIndex('title', '')); $pages = array_combine($pages, $titles); if($id !== '' && cleanID($id) !== '') { -- cgit v1.2.3 From b0f6db0c1350beb85dcff044dc2770f404a1b540 Mon Sep 17 00:00:00 2001 From: Adrian Lang Date: Wed, 23 Jun 2010 14:24:11 +0200 Subject: Support namespace selection in quicksearch --- inc/fulltext.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index b6aac5c91..950e7f7d3 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -229,7 +229,13 @@ function ft_pageLookup($id, $not_in_ns=true, $not_in_title=true){ function _ft_pageLookup(&$data){ // split out original parameters + $id = $data['id']; + if (preg_match('/(?:^| )@(\w+)/', $id, $matches)) { + $ns = cleanID($matches[1]) . ':'; + $id = str_replace($matches[0], '', $id); + } + $in_ns = !$data['not_in_ns']; $in_title = !$data['not_in_title']; @@ -241,8 +247,9 @@ function _ft_pageLookup(&$data){ $cleaned = cleanID($id); $matched_pages = array(); foreach ($pages as $p_id => $p_title) { - if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false) || - ($in_title && stripos($p_title, $id) !== false)) { + if (((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false) || + ($in_title && stripos($p_title, $id) !== false)) && + (!isset($ns) || strpos($p_id, $ns) === 0)) { $matched_pages[$p_id] = $p_title; } } -- cgit v1.2.3 From 8d22f1e96be5aa2c65ecb6ee934debbfe0f8f4cc Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sat, 26 Jun 2010 13:38:10 +0200 Subject: Changes to the ft_pageLookup and related event FS#1978 This patch changes the ft_pageLookup function to always return the title of pages with the result. This makes it easier to work with the array, as it no longer changes between numeric and key indexes depending on useheading. This also means that action plugins subscribed to SEARCH_QUERY_PAGELOOKUP need to be adjusted. The event contains a new data field called 'has_titles' which plugins can use to check for backwards compatibility. --- inc/fulltext.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index 950e7f7d3..1c9981812 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -215,29 +215,29 @@ function ft_mediause($id,$max){ * By default it only matches the pagename and ignores the * namespace. This can be changed with the second parameter. * The third parameter allows to search in titles as well. - * If the function should search in titles as well, the return array - * has the ids as key and the titles as value. * - * refactored into ft_pageLookup(), _ft_pageLookup() and trigger_event() + * The function always returns titles as well * + * @triggers SEARCH_QUERY_PAGELOOKUP * @author Andreas Gohr + * @author Adrian Lang */ -function ft_pageLookup($id, $not_in_ns=true, $not_in_title=true){ - $data = compact('id', 'not_in_ns', 'not_in_title'); +function ft_pageLookup($id, $in_ns=false, $in_title=false){ + $data = compact('id', 'in_ns', 'in_title'); + $data['has_titles'] = true; // for plugin backward compatibility check return trigger_event('SEARCH_QUERY_PAGELOOKUP', $data, '_ft_pageLookup'); } function _ft_pageLookup(&$data){ // split out original parameters - $id = $data['id']; if (preg_match('/(?:^| )@(\w+)/', $id, $matches)) { $ns = cleanID($matches[1]) . ':'; $id = str_replace($matches[0], '', $id); } - $in_ns = !$data['not_in_ns']; - $in_title = !$data['not_in_title']; + $in_ns = $data['in_ns']; + $in_title = $data['in_title']; $pages = array_map('rtrim', idx_getIndex('page', '')); $titles = array_map('rtrim', idx_getIndex('title', '')); @@ -267,7 +267,7 @@ function _ft_pageLookup(&$data){ } uasort($pages,'ft_pagesorter'); - return $in_title ? $pages : array_keys($pages); + return $pages; } /** -- cgit v1.2.3 From d0bdf7659fce98c2922b151766d51d5c7e8814d6 Mon Sep 17 00:00:00 2001 From: Adrian Lang Date: Fri, 20 Aug 2010 18:53:48 +0200 Subject: Use namespace filter in quicksearch with empty search term --- inc/fulltext.php | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index 1c9981812..cac2de4a4 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -243,17 +243,21 @@ function _ft_pageLookup(&$data){ $titles = array_map('rtrim', idx_getIndex('title', '')); $pages = array_combine($pages, $titles); - if($id !== '' && cleanID($id) !== '') { + if ($id !== '' && cleanID($id) !== '') { $cleaned = cleanID($id); - $matched_pages = array(); foreach ($pages as $p_id => $p_title) { - if (((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false) || - ($in_title && stripos($p_title, $id) !== false)) && - (!isset($ns) || strpos($p_id, $ns) === 0)) { - $matched_pages[$p_id] = $p_title; + if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false) && + ($in_title && stripos($p_title, $id) === false)) { + unset($pages[$p_id]); + } + } + } + if (isset($ns)) { + foreach (array_keys($pages) as $p_id) { + if (strpos($p_id, $ns) !== 0) { + unset($pages[$p_id]); } } - $pages = $matched_pages; } // discard hidden pages -- cgit v1.2.3 From 5479a8c3341247ca228026819f20f3ab5c34a80f Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 29 Aug 2010 14:13:47 +0200 Subject: fixed page lookup when useheading is disabled this was broken by d0bdf7659fce98c2922b151766d51d5c7e8814d6 --- inc/fulltext.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'inc/fulltext.php') diff --git a/inc/fulltext.php b/inc/fulltext.php index cac2de4a4..e90205e9c 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -243,11 +243,11 @@ function _ft_pageLookup(&$data){ $titles = array_map('rtrim', idx_getIndex('title', '')); $pages = array_combine($pages, $titles); - if ($id !== '' && cleanID($id) !== '') { - $cleaned = cleanID($id); + $cleaned = cleanID($id); + if ($id !== '' && $cleaned !== '') { foreach ($pages as $p_id => $p_title) { if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false) && - ($in_title && stripos($p_title, $id) === false)) { + (!$in_title || (stripos($p_title, $id) === false)) ) { unset($pages[$p_id]); } } -- cgit v1.2.3