summaryrefslogtreecommitdiff
path: root/modules/search
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2010-08-10 01:11:36 +0000
committerDries Buytaert <dries@buytaert.net>2010-08-10 01:11:36 +0000
commitbdb91dbe95efc586a2e9fb3aa5beae80fc656956 (patch)
tree234df97df99a45ba054bffd2e27897cba51bf7bb /modules/search
parenta2d78da0c2b2a991aea9e0752714d129e8a9366d (diff)
downloadbrdo-bdb91dbe95efc586a2e9fb3aa5beae80fc656956.tar.gz
brdo-bdb91dbe95efc586a2e9fb3aa5beae80fc656956.tar.bz2
- Patch #269911 by jhodgdon, Freso, kiamlaluno, mradcliffe: search result trimming should not fall inside HTML entities/tags.
Diffstat (limited to 'modules/search')
-rw-r--r--modules/search/search.module29
-rw-r--r--modules/search/search.test48
2 files changed, 69 insertions, 8 deletions
diff --git a/modules/search/search.module b/modules/search/search.module
index e37ba95c4..82d6a1b12 100644
--- a/modules/search/search.module
+++ b/modules/search/search.module
@@ -1090,8 +1090,11 @@ function search_excerpt($keys, $text) {
preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches);
$keys = array_merge($matches[2], $matches[3]);
- // Prepare text
- $text = ' ' . strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)) . ' ';
+ // Prepare text by stripping HTML tags and decoding HTML entities.
+ $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text));
+ $text = decode_entities($text);
+
+ // Slash-escape quotes in the search keyword string.
array_walk($keys, '_search_excerpt_replace');
$workkeys = $keys;
@@ -1121,9 +1124,12 @@ function search_excerpt($keys, $text) {
// $q) and behind it (position $s)
if (preg_match('/' . $boundary . $key . $boundary . '/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
$p = $match[0][1];
- if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) {
- $end = substr($text, $p, 80);
+ if (($q = strpos(' ' . $text, ' ', max(0, $p - 61))) !== FALSE) {
+ $end = substr($text . ' ', $p, 80);
if (($s = strrpos($end, ' ')) !== FALSE) {
+ // Account for the added spaces.
+ $q = max($q - 1, 0);
+ $s = min($s, drupal_strlen($end) - 1);
$ranges[$q] = $p + $s;
$length += $p + $s - $q;
$included[$key] = $p + 1;
@@ -1142,9 +1148,11 @@ function search_excerpt($keys, $text) {
}
}
- // If we didn't find anything, return the beginning.
if (count($ranges) == 0) {
- return truncate_utf8($text, 256, TRUE, TRUE);
+ // We didn't find any keyword matches, so just return the first part of the
+ // text. We also need to re-encode any HTML special characters that we
+ // entity-decoded above.
+ return check_plain(truncate_utf8($text, 256, TRUE, TRUE));
}
// Sort the text ranges by starting position.
@@ -1174,7 +1182,12 @@ function search_excerpt($keys, $text) {
foreach ($newranges as $from => $to) {
$out[] = substr($text, $from, $to - $from);
}
- $text = (isset($newranges[0]) ? '' : '... ') . implode(' ... ', $out) . ' ...';
+
+ // Let translators have the ... separator text as one chunk.
+ $dots = explode('!excerpt', t('... !excerpt ... !excerpt ...'));
+
+ $text = (isset($newranges[0]) ? '' : $dots[0]) . implode($dots[1], $out) . $dots[2];
+ $text = check_plain($text);
// Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
$text = preg_replace('/' . $boundary . '(' . implode('|', $keys) . ')' . $boundary . '/iu', '<strong>\0</strong>', $text);
@@ -1186,7 +1199,7 @@ function search_excerpt($keys, $text) {
*/
/**
- * Helper function for array_walk in search_except.
+ * Helper function for array_walk() in search_excerpt().
*/
function _search_excerpt_replace(&$text) {
$text = preg_quote($text, '/');
diff --git a/modules/search/search.test b/modules/search/search.test
index 9079326a1..c43f8d7d2 100644
--- a/modules/search/search.test
+++ b/modules/search/search.test
@@ -1138,6 +1138,54 @@ class SearchConfigSettingsForm extends DrupalWebTestCase {
}
/**
+ * Tests the search_excerpt() function.
+ */
+class SearchExcerptTestCase extends DrupalUnitTestCase {
+ public static function getInfo() {
+ return array(
+ 'name' => 'Search excerpt extraction',
+ 'description' => 'Tests that the search_excerpt() function works.',
+ 'group' => 'Search',
+ );
+ }
+
+ function setUp() {
+ parent::setUp('search');
+ }
+
+ /**
+ * Tests search_excerpt() with several simulated search keywords.
+ *
+ * Passes keywords and a sample marked up string, "The quick
+ * brown fox jumps over the lazy dog", and compares it to the
+ * correctly marked up string. The correctly marked up string
+ * contains either highlighted keywords or the original marked
+ * up string if no keywords matched the string.
+ */
+ function testSearchExcerpt() {
+ // Make some text with entities and tags.
+ $text = 'The <strong>quick</strong> <a href="#">brown</a> fox &amp; jumps <h2>over</h2> the lazy dog';
+ // Note: The search_excerpt() function adds some extra spaces -- not
+ // important for HTML formatting. Remove these for comparison.
+ $expected = 'The quick brown fox &amp; jumps over the lazy dog';
+ $result = preg_replace('| +|', ' ', search_excerpt('nothing', $text));
+ $this->assertEqual(preg_replace('| +|', ' ', $result), $expected, 'Entire string is returned when keyword is not found in short string');
+
+ $result = preg_replace('| +|', ' ', search_excerpt('fox', $text));
+ $this->assertEqual($result, 'The quick brown <strong>fox</strong> &amp; jumps over the lazy dog ...', 'Found keyword is highlighted');
+
+ $longtext = str_repeat($text . ' ', 10);
+ $result = preg_replace('| +|', ' ', search_excerpt('nothing', $text));
+ $this->assertTrue(strpos($result, $expected) === 0, 'When keyword is not found in long string, return value starts as expected');
+
+ $entities = str_repeat('k&eacute;sz&iacute;t&eacute;se ', 20);
+ $result = preg_replace('| +|', ' ', search_excerpt('nothing', $entities));
+ $this->assertFalse(strpos($result, '&'), 'Entities are not present in excerpt');
+ $this->assertTrue(strpos($result, 'í') > 0, 'Entities are converted in excerpt');
+ }
+}
+
+/**
* Test the CJK tokenizer.
*/
class SearchTokenizerTestCase extends DrupalWebTestCase {