summaryrefslogtreecommitdiff
path: root/modules/search.module
blob: baf0809be1b393d323c755aa0379cdae5c977574 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
<?php
// $Id$

/**
 * Implementation of hook_help().
 */
function search_help($section = 'admin/help#search') {
  switch ($section) {
    case 'admin/help#search':
      return t("
      <strong>Search guidelines</strong>
      <p>The search page allows you to search the web site's content.  You can specify multiple words, and they will all be searched for.  You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on.  Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.</p>
      <strong>Words excluded from the search</strong>
      <p>Words that frequently occur, typically called 'noise words', are ignored.  Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc.  Words shorter than %number letters are also ignored.</p>", array('%number' => variable_get('minimum_word_size', 2)));
    case 'admin/modules#description':
      return t('Enables site-wide keyword searching.');
    case 'admin/settings/search':
      return t('The search engine works by keeping an index of "interesting" words. To make sure we only get "interesting" words you need to set the following.');
  }
}

/**
 * Implementation of hook_perm().
 */
function search_perm() {
  return array('search content', 'administer search');
}

/**
 * Implementation of hook_link().
 */
function search_link($type) {
  $links = array();

  if ($type == 'page' && user_access('search content')) {
    $links[] = l(t('search'), 'search', array('title' => t('Search for older content.')));
  }

  return $links;
}

/**
 * Implementation of hook_menu().
 */
function search_menu() {
  $items = array();
  $items[] = array('path' => 'search', 'title' => t('search'),
    'callback' => 'search_view',
    'access' => user_access('search content'),
    'type' => MENU_SUGGESTED_ITEM);
  $items[] = array('path' => 'search/help', 'title' => t('search help'),
    'callback' => 'search_help_page',
    'access' => user_access('search content'),
    'type' => MENU_SUGGESTED_ITEM);
  $items[] = array('path' => 'search/configure', 'title' => t('configure'),
    'callback' => 'search_configure',
    'access' => user_access('administer site configuration'),
    'type' => MENU_LOCAL_TASK);
  return $items;
}

/**
 * Menu callback; displays the search module settings page.
 */
function search_configure() {
  if ($_POST) {
    system_settings_save();
  }

  // Indexing settings:
  $group = form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 2), 10, 10, t('The number of characters a word has to be to be indexed.  Words shorter than this will not be searchable.'));
  $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 0), 10, 10, t('The number of characters a word has to be to be searched for.'));
  $group .= form_textarea(t('Noise words'), 'noisewords', variable_get('noisewords', ''), 70, 10, t('These words will not be indexed. Enter a comma separated list; linebreaks and whitespace do not matter.  Example: and, or, not, a, to, I, it, ...'));
  $output = form_group(t('Indexing settings'), $group);

  // Visual settings:
  $group = form_radios(t('Help text position'), 'help_pos', variable_get('help_pos', 1), array('1' => t('Above search output'), '2' => t('Below search output'), '3' => t('Link from above search output'), '4' => t('Link from below search output')), t('Where to show the help text for users on the search page.'));
  $output .= form_group(t('Viewing options'), $group);

  print theme('page', system_settings_form($output));
}

/**
 * Implementation of hook_cron().
 *
 * Fires hook_update_index() in all modules and uses the results to make
 * the search index current.
 */
function search_cron() {
  foreach (module_list() as $module) {
    $module_array = module_invoke($module, 'update_index');
    if ($module_array) {
      update_index($module_array);
    }
    $module_array = null;
  }
  return;
}

/**
 * Perform a search on a word or words.
 *
 * This function is called by each node that supports the indexed search.
 *
 * @param $search_array
 *   An array as returned from hook_search(). The format of this array is
 *   array('keys' => ..., 'type' => ..., 'select' => ...). See the hook_search()
 *   documentation for an explanation of the array values.
 *
 * @return
 *   An array of search results, of which each element is an array with the
 *   keys "count", "title", "link", "user" (name), "date", and "keywords".
 */
function do_search($search_array) {

  $keys = strtolower($search_array['keys']);
  $type = $search_array['type'];
  $select = $search_array['select'];

  // Replace wildcards with MySQL wildcards.
  $keys = str_replace('*', '%', $keys);

  // Split the words entered into an array.
  $words = explode(' ', $keys);

  foreach ($words as $word) {

    // If the word is too short, and we've got it set to skip them, loop.
    if (strlen($word) < variable_get('remove_short', 0)) {
      continue;
    }

    // Put the next search word into the query and do the query.
    $query = str_replace("'%'", "'". check_query($word) ."'", $select);
    $result = db_query($query);

    if (db_num_rows($result) != 0) {
      // At least one matching record was found.
      $found = 1;

      // Create an in memory array of the results.
      while ($row = db_fetch_array($result)) {
        $lno       = $row['lno'];
        $nid       = $row['nid'];
        $title     = $row['title'];
        $created   = $row['created'];
        $uid       = $row['uid'];
        $name      = $row['name'];
        $count     = $row['count'];

        // Build reduction variable.
        $reduction[$lno][$word] = true;

        // Check whether the just-fetched row is already in the table.
        if ($results[$lno]['lno'] != $lno) {
          $results[$lno]['count'] = $count;

          $results[$lno]['lno'] = $lno;
          $results[$lno]['nid'] = $nid;
          $results[$lno]['title'] = $title;
          $results[$lno]['created'] = $created;
          $results[$lno]['uid'] = $uid;
          $results[$lno]['name'] = $name;
        }
        else {
          // Different word, but existing "lno". Increase the count of
          // matches against this "lno" by the number of times this
          // word appears in the text.
          $results[$lno]['count'] = $results[$lno]['count'] + $count;
        }
      }
    }
  }

  if ($found) {
    foreach ($results as $lno => $values) {
      $pass = true;
      foreach ($words as $word) {
        if (!$reduction[$lno][$word]) {
          $pass = false;
        }
      }
      if ($pass) {
        $fullresults[$lno] = $values;
      }
    }
    $results = $fullresults;
    if (!is_array($results)) {
      $found = 0;
    }
  }
  if ($found) {
    // Black magic here to sort the results.
    array_multisort($results, SORT_DESC);

    // Now, output the results.
    foreach ($results as $key => $value) {
      $lno       = $value['lno'];
      $nid       = $value['nid'];
      $title     = $value['title'];
      $created   = $value['created'];
      $uid       = $value['uid'];
      $name      = $value['name'];
      $count     = $value['count'];
      switch ($type) {
      case 'node':
        $find[$i++] = array('count' => $count, 'title' => $title, 'link' => url("node/$lno"), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words));
          break;
      case 'comment':
        $find[$i++] = array('count' => $count, 'title' => $title, 'link' => (strstr(request_uri(), 'admin') ? url("admin/comment/edit/$lno") : url("node/$nid", NULL, "comment-$lno")), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words));
          break;
        break;
      }
    }
  }

  return $find;
}

/**
 * Update the search_index table.
 *
 * @param $search_array
 *   An array as returned from hook_update_index().
 */
function update_index($search_array) {
  $last_update = variable_get($search_array['last_update'], 1);
  $node_type = $search_array['node_type'];
  $select = $search_array['select'];
  $minimum_word_size = variable_get('minimum_word_size', 2);

  //watchdog('user', "$last_update<br />$node_type<br />$select");

  $result = db_query($select);

  if (db_num_rows($result)) {
    // Results were found. Look through the nodes we just selected.
    while ($node = db_fetch_array ($result)) {

      // Trash any existing entries in the search index for this node,
      // in case it is a modified node.
      db_query("DELETE from {search_index} where lno = '". $node['lno'] ."' and type = '". $node_type ."'");

      // Build the word list (teaser not included, as it would give a
      // false count of the number of hits).
      $wordlist = $node['text1'] .' '. $node['text2'];

      // Strip heaps of stuff out of it.
      $wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", '', $wordlist);

      // Remove punctuation/special characters.
      $wordlist = preg_replace("'(\xBB|\xAB|!|\xA1|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", '', $wordlist);

      // Strip out (now mangled) http and tags.
      $wordlist = preg_replace("'http\w+'", '', $wordlist);
      $wordlist = preg_replace("'www\w+'", '', $wordlist);

      // Remove all newlines of any type.
      $wordlist = preg_replace("'([\r\n]|[\r]|[\n])'", ' ', $wordlist);

      // Lower case the whole thing.
      $wordlist = strtolower($wordlist);

      // Remove "noise words".
      $noise = explode(',', variable_get('noisewords', ''));
      foreach ($noise as $word) {
        $word = trim($word);
        $wordlist = trim(preg_replace("' $word '", ' ', ' ' .$wordlist. ' '));
      }

      // Remove whitespace.
      $wordlist = preg_replace("'[\s]+'", ' ', $wordlist);

      // Make it an array.
      $eachword = explode(' ', $wordlist);

      // Walk through the array, giving a "weight" to each word based on
      // the number of times it appears in a page.
      foreach ($eachword as $word) {
        if (strlen($word) >= $minimum_word_size) {
          if ($newwords[$word]) {
            $newwords[$word]++;
          }
          else {
            $newwords[$word] = 1;
          }
        }
      }

      // Walk through the weighted words array, inserting them into
      // the search index.
      if ($newwords) {
        foreach ($newwords as $key => $value) {
          db_query("INSERT INTO {search_index} VALUES('%s', %d, '%s', %d)", $key, $node['lno'], $node_type, $value);
        }
      }

      // Reset the weighted words array, so we don't add multiples.
      $newwords = array ();
    }
  }

  // Update the last time this process was run.
  variable_set($search_array['last_update'], time());

  return true;
}


function search_invalidate() {
  foreach (module_list() as $module) {
    $module_array = module_invoke($module, 'update_index');
    if ($module_array) {
      variable_set($module_array['last_update'], 1);
    }
    $module_array = null;
  }
  return;
}

/**
 * Save the values entered by the administrator for the search module
 *
 * @param $edit
 *   An array of fields as set up by calling form_textfield(),
 *   form_textarea(), etc.
 */
function search_save($edit) {
  variable_set('minimum_word_size', $edit['minimum_word_size']);

  $data = strtr($edit['noisewords'], "\n\r\t", '   ');
  $data = str_replace(' ', '', $data);
  variable_set('noisewords', $data);
  variable_set('help_pos', $edit['help_pos']);
  variable_set('remove_short', $edit['remove_short']);
}

/**
 * Menu callback; presents the search form and/or search results.
 */
function search_view() {
  global $type;
  $keys = isset($_GET['keys']) ? $_GET['keys'] : $_POST['keys'];

  if (user_access('search content')) {
    // Construct the search form.
    $output = search_form(NULL, $keys, TRUE);

    // Display form and search results.
    $help_link = l(t('search help'), 'search/help');
    switch (variable_get('help_pos', 1)) {
      case '1':
        $output = search_help(). $output .'<br />';
        break;
      case '2':
        $output .= search_help() .'<br />';
        break;
      case '3':
        $output = $help_link. '<br />'. $output .'<br />';
        break;
      case '4':
        $output .= '<br />'. $help_link .'<br />';
    }

    // Only perform search if there is non-whitespace search term:
    if (trim($keys)) {
      // Log the search keys:
      watchdog('search', t('search: "%keys"', array('%keys' => $keys)), l('view results', 'search', NULL, 'keys='. urlencode($keys)));

      // Collect the search results:
      $results = search_data($keys);

      if ($results) {
        $output .= theme('box', t('Search Results'), $results);
      }
      else {
        $output .= theme('box', t('Search Results'), t('Your search yielded no results.'));
      }
    }

    print theme('page', $output, t('Search'));
  }
  else {
    drupal_access_denied();
  }

}

/**
 * Menu callback; prints the search engine help page.
 */
function search_help_page() {
  print theme('page', search_help());
}

?>