'. t('Approximately %percentage of the site has been indexed.', array('%percentage' => $percentage)); $output .= form_group('Indexing status', $status); print theme('page', system_settings_form($output)); } /** * Marks a word as dirty (or retrieves the list of dirty words). This is used * during indexing (cron). Words which are dirty have outdated total counts in * the search_total table, and need to be recounted. */ function search_dirty($word = null) { static $dirty = array(); if ($word !== null) { $dirty[$word] = true; } else { return $dirty; } } /** * Implementation of hook_cron(). * * Fires hook_update_index() in all modules and cleans up dirty words (see * search_dirty). */ function search_cron() { /* Update word index */ foreach (module_list() as $module) { module_invoke($module, 'update_index'); } /* Update word counts for new/changed words */ foreach (search_dirty() as $word => $dummy) { db_query("DELETE FROM {search_total} WHERE word = '%s'", $word); $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word)); db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total); } } /** * Splits a string into component words according to indexing rules. */ function search_keywords_split($text) { static $last = null; static $lastsplit = null; if ($last == $text) { return $lastsplit; } // Decode entities to UTF-8 $text = decode_entities($text); // Call an external processor for word handling. search_preprocess($text); // To improve searching for numerical data such as dates, IP addresses // or version numbers, we consider a group of numerical characters // separated only by punctuation characters to be one piece. // This also means that searching for e.g. '20/03/1984' also returns // results with '20-03-1984' in them. // Readable regexp: ([number]+)[punctuation]+(?=[number]) $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text); // The dot, underscore and dash are simply removed. This allows meaningful // search behaviour with acronyms and URLs. $text = preg_replace('/[._-]+/', '', $text); // With the exception of the rules above, we consider all punctuation, // marks, spacers, etc, to be a word boundary. $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text); // Process words $words = explode(' ', $text); // Save last keyword result $last = $text; $lastsplit = $words; return $words; } /** * Invokes hook_search_preprocess() in modules. */ function search_preprocess(&$text) { static $modules = null; // Cache list of modules which implement this hook. This function gets called // a lot during reindexing. if (!is_array($modules)) { $modules = array(); foreach (module_list() as $module) { if (module_hook($module, 'search_preprocess')) { $modules[] = $module; } } } // Process $text if (count($modules) > 0) { foreach ($modules as $module) { $text = module_invoke($module, 'search_preprocess', $text); } } } /** * Update the search index for a particular item. * * @param $sid * A number identifying this particular item (e.g. node id). * * @param $type * A string defining this type of item (e.g. 'node') * * @param $text * The content of this item. Must be a piece of HTML text. */ function search_index($sid, $type, $text) { $minimum_word_size = variable_get('minimum_word_size', 3); global $base_url; $node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i'; // Multipliers for scores of words inside certain HTML tags. // Note: 'a' must be included for link ranking to work. $tags = array('h1' => 21, 'h2' => 18, 'h3' => 15, 'h4' => 12, 'h5' => 9, 'h6' => 6, 'u' => 5, 'b' => 5, 'strong' => 5, 'em' => 5, 'a' => 10); // Strip off all ignored tags to speed up processing, but insert space before/after // them to keep word boundaries. $text = str_replace(array('<', '>'), array(' <', '> '), $text); $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>'); // Split HTML tags from plain text. $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting $null as required). $tag = false; // Odd/even counter. Tag or no tag. $link = false; // State variable for link analyser $score = 1; // Starting score per word $results = array(0 => array()); foreach ($split as $value) { if ($tag) { // Increase or decrease score per word based on tag list($tagname) = explode(' ', $value, 2); $tagname = strtolower($tagname); if ($tagname{0} == '/') { $score -= $tags[substr($tagname, 1)]; if ($score < 1) { // possible due to bad HTML $score = 1; } if ($tagname == '/a') { $link = false; } } else { if ($tagname == 'a') { // Check if link points to a node on this site if (preg_match($node_regexp, $value, $match)) { $path = drupal_get_normal_path($match[1]); if (preg_match('!(node|book)/(?:view/)?([0-9]+)!i', $path, $match)) { $linknid = $match[1]; if ($linknid > 0) { $link = true; } } } } $score += $tags[$tagname]; } } else { // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values if ($value != '') { $words = search_keywords_split($value); foreach ($words as $word) { // Check wordlength if (string_length($word) >= $minimum_word_size) { $word = strtolower($word); if ($link) { if (!isset($results[$linknid])) { $results[$linknid] = array(); } $results[$linknid][$word] += $score; } else { $results[0][$word] += $score; } } } } } $tag = !$tag; } db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type); // Insert results into search index foreach ($results[0] as $word => $score) { db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score); search_dirty($word); } unset($results[0]); // Now insert links to nodes foreach ($results as $nid => $words) { foreach ($words as $word => $score) { db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %d)", $word, $nid, 'node', $sid, $type, $score); search_dirty($word); } } } /** * Perform a search on a word or words. * * This function is called by each module that supports the indexed search. * * The end result is an SQL select on the search_index table. As a guide for * writing the optional extra SQL fragments (see below), use this query: * * SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score * FROM {search_index} i * $join INNER JOIN {search_total} t ON i.word = t.word * WHERE $where AND (i.word = '...' OR ...) * GROUP BY i.type, i.sid * ORDER BY score DESC"; * * @param $keys * A search string as entered by the user. * * @param $type * A string identifying the calling module. * * @param $join * (optional) A string to be inserted into the JOIN part of the SQL query. * For example "INNER JOIN {node} n ON n.nid = i.sid". * * @param $where * (optional) A string to be inserted into the WHERE part of the SQL query. * For example "(n.status > 0)". * * @return * An array of SIDs for the search results. */ function do_search($keys, $type, $join = '', $where = '1') { // Note, we replace the wildcards with U+FFFD (Replacement character) to pass // through the keyword extractor. $keys = str_replace('*', '�', $keys); // Split into words $keys = search_keywords_split($keys); // Lowercase foreach ($keys as $k => $v) { $keys[$k] = strtolower($v); } $words = array(); $arguments = array(); // Build WHERE clause foreach ($keys as $word) { if (string_length($word) < variable_get('remove_short', 3)) { continue; } if (strpos($word, '�') !== false) { $words[] = "i.word LIKE '%s'"; $arguments[] = str_replace('�', '%', $word); } else { $words[] = "i.word = '%s'"; $arguments[] = $word; } } if (count($words) == 0) { return array(); } $where .= ' AND ('. implode(' OR ', $words) .')'; // Get result count (for pager) $count = db_result(db_query("SELECT COUNT(DISTINCT i.sid, i.type) FROM {search_index} i $join WHERE $where", $arguments)); if ($count == 0) { return array(); } $count_query = "SELECT $count"; // Do pager query $query = "SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $where GROUP BY i.type, i.sid ORDER BY score DESC"; $arguments = array_merge(array($query, 15, 0, $count_query), $arguments); $result = call_user_func_array('pager_query', $arguments); $results = array(); while ($item = db_fetch_object($result)) { $results[] = $item->sid; } return $results; } /** * Menu callback; presents the search form and/or search results. */ function search_view() { $keys = isset($_GET['keys']) ? $_GET['keys'] : $_POST['edit']['keys']; $type = isset($_GET['type']) ? $_GET['type'] : ($_POST['edit']['type'] ? $_POST['edit']['type'] : 'node'); if (user_access('search content')) { // Only perform search if there is non-whitespace search term: if (trim($keys)) { // Log the search keys: watchdog('search', t('Search: %keys (%type).', array('%keys' => "$keys", '%type' => $type)), l(t('results'), 'search', NULL, 'keys='. urlencode($keys) . '&type='. urlencode($type))); // Collect the search results: $results = search_data($keys, $type); if ($results) { $results = theme('box', t('Search results'), $results); } else { $results = theme('box', t('Your search yielded no results'), search_help('search#noresults')); } } else if (isset($_POST['edit'])) { form_set_error('keys', t('Please enter some keywords.')); } // Construct the search form. // Note, we do this last because of the form_set_error() above. $output = search_form(NULL, $keys, $type, TRUE); $output .= $results; print theme('page', $output, t('Search')); } else { drupal_access_denied(); } } /** * Menu callback; prints the search engine help page. */ function search_help_page() { print theme('page', search_help()); } /** * @defgroup search Search interface * @{ * The Drupal search interface manages a global search mechanism. * * Modules may plug into this system to provide searches of different types of * data. Most of the system is handled by search.module, so this must be enabled * for all of the search features to work. */ /** * Render a search form. * * This form must be usable not only within "http://example.com/search", but also * as a simple search box (without "Restrict search to", help text, etc.), in the * theme's header, and so forth. This means we must provide options to * conditionally render certain parts of this form. * * @param $action * Form action. Defaults to "search". * @param $keys * The search string entered by the user, containing keywords for the search. * @param $options * Whether to render the optional form fields and text ("Restrict search * to", help text, etc.). * @return * An HTML string containing the search form. */ function search_form($action = '', $keys = '', $type = null, $options = FALSE) { $edit = $_POST['edit']; if (!$action) { $action = url('search'); } $output = '
'. $item['snippet'] . '
' : '') . '' . implode(' - ', $info) .'