summaryrefslogtreecommitdiff
path: root/modules/search
diff options
context:
space:
mode:
authorAngie Byron <webchick@24967.no-reply.drupal.org>2010-11-22 07:32:12 +0000
committerAngie Byron <webchick@24967.no-reply.drupal.org>2010-11-22 07:32:12 +0000
commit70bd9d5252e4a900e1f621ee7e3efd09ccb68cca (patch)
treec8f516bcba6d849baa528ffd2c7c461f4800fdbb /modules/search
parent5b3c6ab59548a31fac71a3e408085c9204ac1438 (diff)
downloadbrdo-70bd9d5252e4a900e1f621ee7e3efd09ccb68cca.tar.gz
brdo-70bd9d5252e4a900e1f621ee7e3efd09ccb68cca.tar.bz2
#74673 by jhodgdon, mcarbone: Search does not correctly handle large numbers (> maxint) so you cannot search by UPC code.
Diffstat (limited to 'modules/search')
-rw-r--r--modules/search/search.extender.inc5
-rw-r--r--modules/search/search.module27
-rw-r--r--modules/search/search.test241
3 files changed, 243 insertions, 30 deletions
diff --git a/modules/search/search.extender.inc b/modules/search/search.extender.inc
index 5347bb52a..1b001f07c 100644
--- a/modules/search/search.extender.inc
+++ b/modules/search/search.extender.inc
@@ -192,7 +192,9 @@ class SearchQuery extends SelectQueryExtender {
$phrase = TRUE;
$this->simple = FALSE;
}
- // Simplify keyword according to indexing rules and external preprocessors.
+ // Simplify keyword according to indexing rules and external
+ // preprocessors. Use same process as during search indexing, so it
+ // will match search index.
$words = search_simplify($match[2]);
// Re-explode in case simplification added more words, except when
// matching a phrase.
@@ -294,7 +296,6 @@ class SearchQuery extends SelectQueryExtender {
foreach ($split as $s) {
$num = is_numeric($s);
if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
- $s = $num ? ((int)ltrim($s, '-0')) : $s;
if (!isset($this->words[$s])) {
$this->words[$s] = $s;
$num_new_scores++;
diff --git a/modules/search/search.module b/modules/search/search.module
index 3489fd74f..d6e6350ae 100644
--- a/modules/search/search.module
+++ b/modules/search/search.module
@@ -399,6 +399,14 @@ function search_update_totals() {
/**
* Simplifies a string according to indexing rules.
+ *
+ * @param $text
+ * Text to simplify.
+ *
+ * @return
+ * Simplified text.
+ *
+ * @see hook_search_preprocess()
*/
function search_simplify($text) {
// Decode entities to UTF-8
@@ -436,6 +444,11 @@ function search_simplify($text) {
// marks, spacers, etc, to be a word boundary.
$text = preg_replace('/[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . ']+/u', ' ', $text);
+ // Truncate everything to 50 characters.
+ $words = explode(' ', $text);
+ array_walk($words, '_search_index_truncate');
+ $text = implode(' ', $words);
+
return $text;
}
@@ -486,7 +499,7 @@ function search_expand_cjk($matches) {
}
/**
- * Splits a string into tokens for indexing.
+ * Simplifies and splits a string into tokens for indexing.
*/
function search_index_split($text) {
$last = &drupal_static(__FUNCTION__);
@@ -498,7 +511,6 @@ function search_index_split($text) {
// Process words
$text = search_simplify($text);
$words = explode(' ', $text);
- array_walk($words, '_search_index_truncate');
// Save last keyword result
$last = $text;
@@ -511,6 +523,9 @@ function search_index_split($text) {
* Helper function for array_walk in search_index_split.
*/
function _search_index_truncate(&$text) {
+ if (is_numeric($text)) {
+ $text = ltrim($text, '0');
+ }
$text = truncate_utf8($text, 50);
}
@@ -644,14 +659,8 @@ function search_index($sid, $module, $text) {
foreach ($words as $word) {
// Add word to accumulator
$accum .= $word . ' ';
- $num = is_numeric($word);
// Check wordlength
- if ($num || drupal_strlen($word) >= $minimum_word_size) {
- // Normalize numbers
- if ($num) {
- $word = (int)ltrim($word, '-0');
- }
-
+ if (is_numeric($word) || drupal_strlen($word) >= $minimum_word_size) {
// Links score mainly for the target.
if ($link) {
if (!isset($results[$linknid])) {
diff --git a/modules/search/search.test b/modules/search/search.test
index c7a18ef46..35f363a67 100644
--- a/modules/search/search.test
+++ b/modules/search/search.test
@@ -526,7 +526,7 @@ class SearchRankingTestCase extends DrupalWebTestCase {
// See testRankings() above - build a node that will rank high for sticky.
$settings = array(
- 'type' => 'page',
+ 'type' => 'page',
'title' => 'Drupal rocks',
'body' => array(LANGUAGE_NONE => array(array('value' => "Drupal's search rocks"))),
'sticky' => 1,
@@ -929,9 +929,9 @@ class SearchExpressionInsertExtractTestCase extends DrupalUnitTestCase {
/**
* Tests that comment count display toggles properly on comment status of node
- *
+ *
* Issue 537278
- *
+ *
* - Nodes with comment status set to Open should always how comment counts
* - Nodes with comment status set to Closed should show comment counts
* only when there are comments
@@ -940,7 +940,7 @@ class SearchExpressionInsertExtractTestCase extends DrupalUnitTestCase {
class SearchCommentCountToggleTestCase extends DrupalWebTestCase {
protected $searching_user;
protected $searchable_nodes;
-
+
public static function getInfo() {
return array(
'name' => 'Comment count toggle',
@@ -957,23 +957,23 @@ class SearchCommentCountToggleTestCase extends DrupalWebTestCase {
// Create initial nodes.
$node_params = array('type' => 'article', 'body' => array(LANGUAGE_NONE => array(array('value' => 'SearchCommentToggleTestCase'))));
-
+
$this->searchable_nodes['1 comment'] = $this->drupalCreateNode($node_params);
$this->searchable_nodes['0 comments'] = $this->drupalCreateNode($node_params);
-
+
// Login with sufficient privileges.
$this->drupalLogin($this->searching_user);
-
+
// Create a comment array
$edit_comment = array();
$edit_comment['subject'] = $this->randomName();
$edit_comment['comment_body[' . LANGUAGE_NONE . '][0][value]'] = $this->randomName();
$filtered_html_format_id = db_query_range('SELECT format FROM {filter_format} WHERE name = :name', 0, 1, array(':name' => 'Filtered HTML'))->fetchField();
$edit_comment['comment_body[' . LANGUAGE_NONE . '][0][format]'] = $filtered_html_format_id;
-
+
// Post comment to the test node with comment
$this->drupalPost('comment/reply/' . $this->searchable_nodes['1 comment']->nid, $edit_comment, t('Save'));
-
+
// First update the index. This does the initial processing.
node_update_index();
@@ -996,13 +996,13 @@ class SearchCommentCountToggleTestCase extends DrupalWebTestCase {
$this->drupalPost('', $edit, t('Search'));
$this->assertText(t('0 comments'), t('Empty comment count displays for nodes with comment status set to Open'));
$this->assertText(t('1 comment'), t('Non-empty comment count displays for nodes with comment status set to Open'));
-
+
// Test comment count display for nodes with comment status set to Closed
$this->searchable_nodes['0 comments']->comment = COMMENT_NODE_CLOSED;
node_save($this->searchable_nodes['0 comments']);
$this->searchable_nodes['1 comment']->comment = COMMENT_NODE_CLOSED;
node_save($this->searchable_nodes['1 comment']);
-
+
$this->drupalPost('', $edit, t('Search'));
$this->assertNoText(t('0 comments'), t('Empty comment count does not display for nodes with comment status set to Closed'));
$this->assertText(t('1 comment'), t('Non-empty comment count displays for nodes with comment status set to Closed'));
@@ -1011,8 +1011,8 @@ class SearchCommentCountToggleTestCase extends DrupalWebTestCase {
$this->searchable_nodes['0 comments']->comment = COMMENT_NODE_HIDDEN;
node_save($this->searchable_nodes['0 comments']);
$this->searchable_nodes['1 comment']->comment = COMMENT_NODE_HIDDEN;
- node_save($this->searchable_nodes['1 comment']);
-
+ node_save($this->searchable_nodes['1 comment']);
+
$this->drupalPost('', $edit, t('Search'));
$this->assertNoText(t('0 comments'), t('Empty comment count does not display for nodes with comment status set to Hidden'));
$this->assertNoText(t('1 comment'), t('Non-empty comment count does not display for nodes with comment status set to Hidden'));
@@ -1031,23 +1031,55 @@ class SearchSimplifyTestCase extends DrupalWebTestCase {
);
}
+ /**
+ * Tests that all Unicode characters simplify correctly.
+ */
function testSearchSimplifyUnicode() {
+ // This test uses a file that was constructed so that the even lines are
+ // boundary characters, and the odd lines are valid word characters. (It
+ // was generated as a sequence of all the Unicode characters, and then the
+ // boundary chararacters (punctuation, spaces, etc.) were split off into
+ // their own lines). So the even-numbered lines should simplify to nothing,
+ // and the odd-numbered lines we need to split into shorter chunks and
+ // verify that simplification doesn't lose any characters.
$input = file_get_contents(DRUPAL_ROOT . '/modules/search/tests/UnicodeTest.txt');
- $strings = explode(chr(10), $input);
- foreach ($strings as $key => $string) {
- $simplified = search_simplify($string);
- if ($key % 2) {
+ $basestrings = explode(chr(10), $input);
+ $strings = array();
+ foreach ($basestrings as $key => $string) {
+ if ($key %2) {
+ // Even line - should simplify down to a space.
+ $simplified = search_simplify($string);
$this->assertIdentical($simplified, ' ', "Line $key is excluded from the index");
}
else {
- $this->assertTrue(drupal_strlen($simplified) >= drupal_strlen($string), "Nothing is removed on line $key.");
+ // Odd line, should be word characters.
+ // Split this into 30-character chunks, so we don't run into limits
+ // of truncation in search_simplify().
+ $start = 0;
+ while ($start < drupal_strlen($string)) {
+ $newstr = drupal_substr($string, $start, 30);
+ // Special case: leading zeros are removed from numeric strings,
+ // and there's one string in this file that is numbers starting with
+ // zero, so prepend a 1 on that string.
+ if (preg_match('/^[0-9]+$/', $newstr)) {
+ $newstr = '1' . $newstr;
+ }
+ $strings[] = $newstr;
+ $start += 30;
+ }
}
}
+ foreach ($strings as $key => $string) {
+ $simplified = search_simplify($string);
+ $this->assertTrue(drupal_strlen($simplified) >= drupal_strlen($string), "Nothing is removed from string $key.");
+ }
+
+ // Test the low-numbered ASCII control characters separately. They are not
+ // in the text file because they are problematic for diff, especially \0.
$string = '';
for ($i = 0; $i < 32; $i++) {
$string .= chr($i);
}
- // Diff really does not like files starting with \0 so test it separately.
$this->assertIdentical(' ', search_simplify($string), t('Search simplify works for ASCII control characters.'));
}
@@ -1122,6 +1154,177 @@ class SearchKeywordsConditions extends DrupalWebTestCase {
}
/**
+ * Tests that numbers can be searched.
+ */
+class SearchNumbersTestCase extends DrupalWebTestCase {
+ protected $test_user;
+ protected $numbers;
+ protected $nodes;
+
+ public static function getInfo() {
+ return array(
+ 'name' => 'Search numbers',
+ 'description' => 'Check that numbers can be searched',
+ 'group' => 'Search',
+ );
+ }
+
+ function setUp() {
+ parent::setUp('search');
+
+ $this->test_user = $this->drupalCreateUser(array('search content', 'access content', 'administer nodes', 'access site reports'));
+ $this->drupalLogin($this->test_user);
+
+ // Create content with various numbers in it.
+ // Note: 50 characters is the current limit of the search index's word
+ // field.
+ $this->numbers = array(
+ 'ISBN' => '978-0446365383',
+ 'UPC' => '036000 291452',
+ 'EAN bar code' => '5901234123457',
+ 'negative' => '-123456.7890',
+ 'quoted negative' => '"-123456.7890"',
+ 'leading zero' => '0777777777',
+ 'tiny' => '111',
+ 'small' => '22222222222222',
+ 'medium' => '333333333333333333333333333',
+ 'large' => '444444444444444444444444444444444444444',
+ 'gigantic' => '5555555555555555555555555555555555555555555555555',
+ 'over fifty characters' => '666666666666666666666666666666666666666666666666666666666666',
+ //'date', '01/02/2009',
+ 'commas', '987,654,321',
+ );
+
+ foreach ($this->numbers as $doc => $num) {
+ $info = array(
+ 'body' => array(LANGUAGE_NONE => array(array('value' => $num))),
+ 'type' => 'page',
+ 'language' => LANGUAGE_NONE,
+ 'title' => $doc . ' number',
+ );
+ $this->nodes[$doc] = $this->drupalCreateNode($info);
+ }
+
+ // Run cron to ensure the content is indexed.
+ $this->cronRun();
+ $this->drupalGet('admin/reports/dblog');
+ $this->assertText(t('Cron run completed'), 'Log shows cron run completed');
+ }
+
+ /**
+ * Tests that all the numbers can be searched.
+ */
+ function testNumberSearching() {
+ $types = array_keys($this->numbers);
+
+ foreach ($types as $type) {
+ $number = $this->numbers[$type];
+ // If the number is negative, remove the - sign, because - indicates
+ // "not keyword" when searching.
+ $number = ltrim($number, '-');
+ $node = $this->nodes[$type];
+
+ // Verify that the node title does not appear on the search page
+ // with a dummy search.
+ $this->drupalPost('search/node',
+ array('keys' => 'foo'),
+ t('Search'));
+ $this->assertNoText($node->title, $type . ': node title not shown in dummy search');
+
+ // Verify that the node title does appear as a link on the search page
+ // when searching for the number.
+ $this->drupalPost('search/node',
+ array('keys' => $number),
+ t('Search'));
+ $this->assertText($node->title, $type . ': node title shown (search found the node) in search for number ' . $number);
+ }
+ }
+}
+
+/**
+ * Tests that numbers can be searched, with more complex matching.
+ */
+class SearchNumberMatchingTestCase extends DrupalWebTestCase {
+ protected $test_user;
+ protected $numbers;
+ protected $nodes;
+
+ public static function getInfo() {
+ return array(
+ 'name' => 'Search number matching',
+ 'description' => 'Check that numbers can be searched with more complex matching',
+ 'group' => 'Search',
+ );
+ }
+
+ function setUp() {
+ parent::setUp('search');
+
+ $this->test_user = $this->drupalCreateUser(array('search content', 'access content', 'administer nodes', 'access site reports'));
+ $this->drupalLogin($this->test_user);
+
+ // Define a group of numbers that should all match each other --
+ // numbers with internal punctuation should match each other, as well
+ // as numbers with and without leading zeros and leading/trailing
+ // . and -.
+ $this->numbers = array(
+ '123456789',
+ //'12/34/56789',
+ '12.3456789',
+ '12-34-56789',
+ '123,456,789',
+ '-123456789',
+ '0123456789',
+ );
+
+ foreach ($this->numbers as $num) {
+ $info = array(
+ 'body' => array(LANGUAGE_NONE => array(array('value' => $num))),
+ 'type' => 'page',
+ 'language' => LANGUAGE_NONE,
+ );
+ $this->nodes[] = $this->drupalCreateNode($info);
+ }
+
+ // Run cron to ensure the content is indexed.
+ $this->cronRun();
+ $this->drupalGet('admin/reports/dblog');
+ $this->assertText(t('Cron run completed'), 'Log shows cron run completed');
+ }
+
+ /**
+ * Tests that all the numbers can be searched.
+ */
+ function testNumberSearching() {
+ for ($i = 0; $i < count($this->numbers); $i++) {
+ $node = $this->nodes[$i];
+
+ // Verify that the node title does not appear on the search page
+ // with a dummy search.
+ $this->drupalPost('search/node',
+ array('keys' => 'foo'),
+ t('Search'));
+ $this->assertNoText($node->title, $i . ': node title not shown in dummy search');
+
+ // Now verify that we can find node i by searching for any of the
+ // numbers.
+ for ($j = 0; $j < count($this->numbers); $j++) {
+ $number = $this->numbers[$j];
+ // If the number is negative, remove the - sign, because - indicates
+ // "not keyword" when searching.
+ $number = ltrim($number, '-');
+
+ $this->drupalPost('search/node',
+ array('keys' => $number),
+ t('Search'));
+ $this->assertText($node->title, $i . ': node title shown (search found the node) in search for number ' . $number);
+ }
+ }
+
+ }
+}
+
+/**
* Test config page.
*/
class SearchConfigSettingsForm extends DrupalWebTestCase {