summaryrefslogtreecommitdiff
path: root/modules/search/search.module
diff options
context:
space:
mode:
Diffstat (limited to 'modules/search/search.module')
-rw-r--r--modules/search/search.module99
1 files changed, 95 insertions, 4 deletions
diff --git a/modules/search/search.module b/modules/search/search.module
index 027643485..51db8695d 100644
--- a/modules/search/search.module
+++ b/modules/search/search.module
@@ -1142,12 +1142,13 @@ function search_excerpt($keys, $text) {
array_walk($keys, '_search_excerpt_replace');
$workkeys = $keys;
- // Extract a fragment per keyword for at most 4 keywords.
+ // Extract fragments around keywords.
// First we collect ranges of text around each keyword, starting/ending
- // at spaces.
+ // at spaces, trying to get to 256 characters.
// If the sum of all fragments is too short, we look for second occurrences.
$ranges = array();
$included = array();
+ $foundkeys = array();
$length = 0;
while ($length < 256 && count($workkeys)) {
foreach ($workkeys as $k => $key) {
@@ -1164,10 +1165,26 @@ function search_excerpt($keys, $text) {
if (!isset($included[$key])) {
$included[$key] = 0;
}
- // Locate a keyword (position $p), then locate a space in front (position
- // $q) and behind it (position $s)
+ // Locate a keyword (position $p, always >0 because $text starts with a
+ // space). First try bare keyword, but if that doesn't work, try to find a
+ // derived form from search_simplify().
+ $p = 0;
if (preg_match('/' . $boundary . $key . $boundary . '/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
$p = $match[0][1];
+ }
+ else {
+ $info = search_simplify_excerpt_match($key, $text, $included[$key], $boundary);
+ if ($info['where']) {
+ $p = $info['where'];
+ if ($info['keyword']) {
+ $foundkeys[] = $info['keyword'];
+ }
+ }
+ }
+ // Now locate a space in front (position $q) and behind it (position $s),
+ // leaving about 60 characters extra before and after for context.
+ // Note that a space was added to the front and end of $text above.
+ if ($p) {
if (($q = strpos(' ' . $text, ' ', max(0, $p - 61))) !== FALSE) {
$end = substr($text . ' ', $p, 80);
if (($s = strrpos($end, ' ')) !== FALSE) {
@@ -1233,6 +1250,10 @@ function search_excerpt($keys, $text) {
$text = (isset($newranges[0]) ? '' : $dots[0]) . implode($dots[1], $out) . $dots[2];
$text = check_plain($text);
+ // Slash-escape quotes in keys found in a derived form and merge with original keys.
+ array_walk($foundkeys, '_search_excerpt_replace');
+ $keys = array_merge($keys, $foundkeys);
+
// Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
$text = preg_replace('/' . $boundary . '(' . implode('|', $keys) . ')' . $boundary . '/iu', '<strong>\0</strong>', $text);
return $text;
@@ -1250,6 +1271,76 @@ function _search_excerpt_replace(&$text) {
}
/**
+ * Find words in the original text that matched via search_simplify().
+ *
+ * This is called in search_excerpt() if an exact match is not found in the
+ * text, so that we can find the derived form that matches.
+ *
+ * @param $key
+ * The keyword to find.
+ * @param $text
+ * The text to search for the keyword.
+ * @param $offset
+ * Offset position in $text to start searching at.
+ * @param $boundary
+ * Text to include in a regular expression that will match a word boundary.
+ *
+ * @return
+ * FALSE if no match is found. If a match is found, return an associative
+ * array with element 'where' giving the position of the match, and element
+ * 'keyword' giving the actual word found in the text at that position.
+ */
+function search_simplify_excerpt_match($key, $text, $offset, $boundary) {
+ $pos = NULL;
+ $simplified_key = search_simplify($key);
+ $simplified_text = search_simplify($text);
+
+ // Check if we have a match after simplification in the text.
+ if (!preg_match('/' . $boundary . $simplified_key . $boundary . '/iu', $simplified_text, $match, PREG_OFFSET_CAPTURE, $offset)) {
+ return FALSE;
+ }
+
+ // If we get here, we have a match. Now find the exact location of the match
+ // and the original text that matched. Start by splitting up the text by all
+ // potential starting points of the matching text and iterating through them.
+ $split = array_filter(preg_split('/' . $boundary . '/iu', $text, -1, PREG_SPLIT_OFFSET_CAPTURE), '_search_excerpt_match_filter');
+ foreach ($split as $value) {
+ // Skip starting points before the offset.
+ if ($value[1] < $offset) {
+ continue;
+ }
+
+ // Check a window of 80 characters after the starting point for a match,
+ // based on the size of the excerpt window.
+ $window = substr($text, $value[1], 80);
+ $simplified_window = search_simplify($window);
+ if (strpos($simplified_window, $simplified_key) === 0) {
+ // We have a match in this window. Store the position of the match.
+ $pos = $value[1];
+ // Iterate through the text in the window until we find the full original
+ // matching text.
+ $length = strlen($window);
+ for ($i = 1; $i <= $length; $i++) {
+ $keyfound = substr($text, $value[1], $i);
+ if ($simplified_key == search_simplify($keyfound)) {
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ return $pos ? array('where' => $pos, 'keyword' => $keyfound) : FALSE;
+}
+
+/**
+ * Helper function for array_filter() in search_search_excerpt_match().
+ */
+function _search_excerpt_match_filter($var) {
+ return strlen(trim($var[0]));
+}
+
+/**
* Implements hook_forms().
*/
function search_forms() {