summaryrefslogtreecommitdiff
path: root/modules/search
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2010-07-22 16:16:42 +0000
committerDries Buytaert <dries@buytaert.net>2010-07-22 16:16:42 +0000
commitd79dff03ac0a40457ec5d380f73ecd0bf1ea3e35 (patch)
treefb10b3cccb6ecc2166404e6049e18c44bb9cd288 /modules/search
parent19c7193c4c3b54efc053d8fee98cb5d00fd79c18 (diff)
downloadbrdo-d79dff03ac0a40457ec5d380f73ecd0bf1ea3e35.tar.gz
brdo-d79dff03ac0a40457ec5d380f73ecd0bf1ea3e35.tar.bz2
- Patch #124980 by jhodgdon: Indexer is removing ... and -- instead of replacing with a space.
Diffstat (limited to 'modules/search')
-rw-r--r--modules/search/search.module9
-rw-r--r--modules/search/search.test25
2 files changed, 27 insertions, 7 deletions
diff --git a/modules/search/search.module b/modules/search/search.module
index 334e8c14c..2ab3a4210 100644
--- a/modules/search/search.module
+++ b/modules/search/search.module
@@ -372,10 +372,13 @@ function search_simplify($text) {
// Readable regexp: ([number]+)[punctuation]+(?=[number])
$text = preg_replace('/([' . PREG_CLASS_NUMBERS . ']+)[' . PREG_CLASS_PUNCTUATION . ']+(?=[' . PREG_CLASS_NUMBERS . '])/u', '\1', $text);
+ // Multiple dot and dash groups are word boundaries and replaced with space.
+ // No need to use the unicode modifer here because 0-127 ASCII characters
+ // can't match higher UTF-8 characters as the leftmost bit of those are 1.
+ $text = preg_replace('/[.-]{2,}/', ' ', $text);
+
// The dot, underscore and dash are simply removed. This allows meaningful
- // search behavior with acronyms and URLs. No need to use the unicode modifer
- // here because 0-127 ASCII characters can't match higher UTF-8 characters as
- // the leftmost bit of those are 1.
+ // search behavior with acronyms and URLs. See unicode note directly above.
$text = preg_replace('/[._-]+/', '', $text);
// With the exception of the rules above, we consider all punctuation,
diff --git a/modules/search/search.test b/modules/search/search.test
index ae93e76f8..b6a72d0bd 100644
--- a/modules/search/search.test
+++ b/modules/search/search.test
@@ -935,22 +935,22 @@ class SearchCommentCountToggleTestCase extends DrupalWebTestCase {
$this->drupalPost('', $edit, t('Search'));
$this->assertNoText(t('0 comments'), t('Empty comment count does not display for nodes with comment status set to Hidden'));
$this->assertNoText(t('1 comment'), t('Non-empty comment count does not display for nodes with comment status set to Hidden'));
- }
+ }
}
/**
- * Test search_simplify() on every Unicode character.
+ * Test search_simplify() on every Unicode character, and some other cases.
*/
class SearchSimplifyTestCase extends DrupalWebTestCase {
public static function getInfo() {
return array(
'name' => 'Search simplify',
- 'description' => 'Check that simplification works as intended.',
+ 'description' => 'Check that the search_simply() function works as intended.',
'group' => 'Search',
);
}
- function testSearchSimplify() {
+ function testSearchSimplifyUnicode() {
$input = file_get_contents(DRUPAL_ROOT . '/modules/search/tests/UnicodeTest.txt');
$strings = explode(chr(10), $input);
foreach ($strings as $key => $string) {
@@ -969,6 +969,23 @@ class SearchSimplifyTestCase extends DrupalWebTestCase {
// Diff really does not like files starting with \0 so test it separately.
$this->assertIdentical(' ', search_simplify($string), t('Search simplify works for ASCII control characters.'));
}
+
+ /**
+ * Tests that search_simplify() does the right thing with punctuation.
+ */
+ function testSearchSimplifyPunctuation() {
+ $cases = array(
+ array('20.03/94-28,876', '20039428876', 'Punctuation removed from numbers'),
+ array('great...drupal--module', 'great drupal module', 'Multiple dot and dashes are word boundaries'),
+ array('very_great-drupal.module', 'verygreatdrupalmodule', 'Single dot, dash, underscore are removed'),
+ array('regular,punctuation;word', 'regular punctuation word', 'Punctuation is a word boundary'),
+ );
+
+ foreach ($cases as $case) {
+ $out = trim(search_simplify($case[0]));
+ $this->assertEqual($out, $case[1], $case[2]);
+ }
+ }
}
/**