summaryrefslogtreecommitdiff
path: root/inc/fulltext.php
diff options
context:
space:
mode:
authorAndreas Gohr <gohr@cosmocode.de>2009-10-26 10:23:59 +0100
committerAndreas Gohr <gohr@cosmocode.de>2009-10-26 10:23:59 +0100
commit60e91a171860bce870e3b3e9109d1313ed6bc071 (patch)
tree43350ec2e0635c3ef9b82db0e3158d9598ff7a7b /inc/fulltext.php
parentc155c65d54ff309f8cc97e92c3742dac49178e32 (diff)
downloadrpg-60e91a171860bce870e3b3e9109d1313ed6bc071.tar.gz
rpg-60e91a171860bce870e3b3e9109d1313ed6bc071.tar.bz2
added FULLTEXT_SNIPPET_CREATE event
Ignore-this: a0ebcdd129f4256e4be029e7fdf7ca45 darcs-hash:20091026092359-6e07b-4c41896825e091a3c8fbbeadc3bc7764d0735bf6.gz
Diffstat (limited to 'inc/fulltext.php')
-rw-r--r--inc/fulltext.php153
1 files changed, 84 insertions, 69 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 06834f5ae..afb15528e 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -278,81 +278,96 @@ function ft_pagesorter($a, $b){
* Creates a snippet extract
*
* @author Andreas Gohr <andi@splitbrain.org>
+ * @triggers FULLTEXT_SNIPPET_CREATE
*/
function ft_snippet($id,$highlight){
- $text = rawWiki($id);
- $match = array();
- $snippets = array();
- $utf8_offset = $offset = $end = 0;
- $len = utf8_strlen($text);
-
- // build a regexp from the phrases to highlight
- $re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')';
- $re2 = "$re1.{0,75}(?!\\1)$re1";
- $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
-
- for ($cnt=4; $cnt--;) {
- if (0) {
- } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
- } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
- } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
- } else {
- break;
- }
-
- list($str,$idx) = $match[0];
-
- // convert $idx (a byte offset) into a utf8 character offset
- $utf8_idx = utf8_strlen(substr($text,0,$idx));
- $utf8_len = utf8_strlen($str);
-
- // establish context, 100 bytes surrounding the match string
- // first look to see if we can go 100 either side,
- // then drop to 50 adding any excess if the other side can't go to 50,
- $pre = min($utf8_idx-$utf8_offset,100);
- $post = min($len-$utf8_idx-$utf8_len,100);
-
- if ($pre>50 && $post>50) {
- $pre = $post = 50;
- } else if ($pre>50) {
- $pre = min($pre,100-$post);
- } else if ($post>50) {
- $post = min($post, 100-$pre);
- } else {
- // both are less than 50, means the context is the whole string
- // make it so and break out of this loop - there is no need for the
- // complex snippet calculations
- $snippets = array($text);
- break;
- }
-
- // establish context start and end points, try to append to previous
- // context if possible
- $start = $utf8_idx - $pre;
- $append = ($start < $end) ? $end : false; // still the end of the previous context snippet
- $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
+ $text = rawWiki($id);
+ $evdata = array(
+ 'id' => $id,
+ 'text' => &$text,
+ 'highlight' => &$highlight,
+ 'snippet' => '',
+ );
+
+ $evt = new Doku_Event('FULLTEXT_SNIPPET_CREATE',$evdata);
+ if ($evt->advise_before()) {
+ $match = array();
+ $snippets = array();
+ $utf8_offset = $offset = $end = 0;
+ $len = utf8_strlen($text);
+
+ // build a regexp from the phrases to highlight
+ $re1 = '('.join('|',array_map('preg_quote_cb',array_filter((array) $highlight))).')';
+ $re2 = "$re1.{0,75}(?!\\1)$re1";
+ $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
+
+ for ($cnt=4; $cnt--;) {
+ if (0) {
+ } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+ } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+ } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
+ } else {
+ break;
+ }
+
+ list($str,$idx) = $match[0];
+
+ // convert $idx (a byte offset) into a utf8 character offset
+ $utf8_idx = utf8_strlen(substr($text,0,$idx));
+ $utf8_len = utf8_strlen($str);
+
+ // establish context, 100 bytes surrounding the match string
+ // first look to see if we can go 100 either side,
+ // then drop to 50 adding any excess if the other side can't go to 50,
+ $pre = min($utf8_idx-$utf8_offset,100);
+ $post = min($len-$utf8_idx-$utf8_len,100);
+
+ if ($pre>50 && $post>50) {
+ $pre = $post = 50;
+ } else if ($pre>50) {
+ $pre = min($pre,100-$post);
+ } else if ($post>50) {
+ $post = min($post, 100-$pre);
+ } else {
+ // both are less than 50, means the context is the whole string
+ // make it so and break out of this loop - there is no need for the
+ // complex snippet calculations
+ $snippets = array($text);
+ break;
+ }
+
+ // establish context start and end points, try to append to previous
+ // context if possible
+ $start = $utf8_idx - $pre;
+ $append = ($start < $end) ? $end : false; // still the end of the previous context snippet
+ $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
+
+ if ($append) {
+ $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
+ } else {
+ $snippets[] = utf8_substr($text,$start,$end-$start);
+ }
+
+ // set $offset for next match attempt
+ // substract strlen to avoid splitting a potential search success,
+ // this is an approximation as the search pattern may match strings
+ // of varying length and it will fail if the context snippet
+ // boundary breaks a matching string longer than the current match
+ $utf8_offset = $utf8_idx + $post;
+ $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
+ $offset = utf8_correctIdx($text,$offset);
+ }
- if ($append) {
- $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
- } else {
- $snippets[] = utf8_substr($text,$start,$end-$start);
- }
+ $m = "\1";
+ $snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets);
+ $snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets)));
- // set $offset for next match attempt
- // substract strlen to avoid splitting a potential search success,
- // this is an approximation as the search pattern may match strings
- // of varying length and it will fail if the context snippet
- // boundary breaks a matching string longer than the current match
- $utf8_offset = $utf8_idx + $post;
- $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
- $offset = utf8_correctIdx($text,$offset);
+ $evdata['snippet'] = $snippet;
}
+ $evt->advise_after();
+ unset($evt);
- $m = "\1";
- $snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets);
- $snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets)));
-
- return $snippet;
+ return $evdata['snippet'];
}
/**