Search guidelines

The search page allows you to search the web site's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.

Words excluded from the search

Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also ignored.

", array('%number' => variable_get('minimum_word_size', 2))); case 'admin/modules#description': return t('Enables site-wide keyword searching.'); case 'admin/settings/search': return t('The search engine works by keeping an index of "interesting" words. To make sure we only get "interesting" words you need to set the following.'); } } /** * Implementation of hook_perm(). */ function search_perm() { return array('search content', 'administer search'); } /** * Implementation of hook_link(). */ function search_link($type) { $links = array(); if ($type == 'page' && user_access('search content')) { $links[] = l(t('search'), 'search', array('title' => t('Search for older content.'))); } return $links; } /** * Implementation of hook_menu(). */ function search_menu() { $items = array(); $items[] = array('path' => 'search', 'title' => t('search'), 'callback' => 'search_view', 'access' => user_access('search content'), 'type' => MENU_SUGGESTED_ITEM); $items[] = array('path' => 'search/help', 'title' => t('search help'), 'callback' => 'search_help_page', 'access' => user_access('search content'), 'type' => MENU_SUGGESTED_ITEM); $items[] = array('path' => 'search/search', 'title' => t('search'), 'type' => MENU_DEFAULT_LOCAL_TASK, 'weight' => -10); $items[] = array('path' => 'search/configure', 'title' => t('configure'), 'callback' => 'search_configure', 'access' => user_access('administer site configuration'), 'type' => MENU_LOCAL_TASK); return $items; } /** * Menu callback; displays the search module settings page. */ function search_configure() { if ($_POST) { system_settings_save(); } // Indexing settings: $group = form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 2), 10, 10, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.')); $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 0), 10, 10, t('The number of characters a word has to be to be searched for.')); $group .= form_textarea(t('Noise words'), 'noisewords', variable_get('noisewords', ''), 70, 10, t('These words will not be indexed. Enter a comma separated list; linebreaks and whitespace do not matter. Example: and, or, not, a, to, I, it, ...')); $output = form_group(t('Indexing settings'), $group); // Visual settings: $group = form_radios(t('Help text position'), 'help_pos', variable_get('help_pos', 1), array('1' => t('Above search output'), '2' => t('Below search output'), '3' => t('Link from above search output'), '4' => t('Link from below search output')), t('Where to show the help text for users on the search page.')); $output .= form_group(t('Viewing options'), $group); print theme('page', system_settings_form($output)); } /** * Implementation of hook_cron(). * * Fires hook_update_index() in all modules and uses the results to make * the search index current. */ function search_cron() { foreach (module_list() as $module) { $module_array = module_invoke($module, 'update_index'); if ($module_array) { update_index($module_array); } $module_array = null; } return; } /** * Perform a search on a word or words. * * This function is called by each node that supports the indexed search. * * @param $search_array * An array as returned from hook_search(). The format of this array is * array('keys' => ..., 'type' => ..., 'select' => ...). See the hook_search() * documentation for an explanation of the array values. * * @return * An array of search results, of which each element is an array with the * keys "count", "title", "link", "user" (name), "date", and "keywords". */ function do_search($search_array) { $keys = strtolower($search_array['keys']); $type = $search_array['type']; $select = $search_array['select']; // Replace wildcards with MySQL wildcards. $keys = str_replace('*', '%', $keys); // Split the words entered into an array. $words = explode(' ', $keys); foreach ($words as $word) { // If the word is too short, and we've got it set to skip them, loop. if (strlen($word) < variable_get('remove_short', 0)) { continue; } // Put the next search word into the query and do the query. $query = str_replace("'%'", "'". check_query($word) ."'", $select); $result = db_query($query); if (db_num_rows($result) != 0) { // At least one matching record was found. $found = 1; // Create an in memory array of the results. while ($row = db_fetch_array($result)) { $lno = $row['lno']; $nid = $row['nid']; $title = $row['title']; $created = $row['created']; $uid = $row['uid']; $name = $row['name']; $count = $row['count']; // Build reduction variable. $reduction[$lno][$word] = true; // Check whether the just-fetched row is already in the table. if ($results[$lno]['lno'] != $lno) { $results[$lno]['count'] = $count; $results[$lno]['lno'] = $lno; $results[$lno]['nid'] = $nid; $results[$lno]['title'] = $title; $results[$lno]['created'] = $created; $results[$lno]['uid'] = $uid; $results[$lno]['name'] = $name; } else { // Different word, but existing "lno". Increase the count of // matches against this "lno" by the number of times this // word appears in the text. $results[$lno]['count'] = $results[$lno]['count'] + $count; } } } } if ($found) { foreach ($results as $lno => $values) { $pass = true; foreach ($words as $word) { if (!$reduction[$lno][$word]) { $pass = false; } } if ($pass) { $fullresults[$lno] = $values; } } $results = $fullresults; if (!is_array($results)) { $found = 0; } } if ($found) { // Black magic here to sort the results. array_multisort($results, SORT_DESC); // Now, output the results. foreach ($results as $key => $value) { $lno = $value['lno']; $nid = $value['nid']; $title = $value['title']; $created = $value['created']; $uid = $value['uid']; $name = $value['name']; $count = $value['count']; switch ($type) { case 'node': $find[$i++] = array('count' => $count, 'title' => $title, 'link' => url("node/$lno"), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words)); break; case 'comment': $find[$i++] = array('count' => $count, 'title' => $title, 'link' => (strstr(request_uri(), 'admin') ? url("admin/comment/edit/$lno") : url("node/$nid", NULL, "comment-$lno")), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words)); break; break; } } } return $find; } /** * Update the search_index table. * * @param $search_array * An array as returned from hook_update_index(). */ function update_index($search_array) { $last_update = variable_get($search_array['last_update'], 1); $node_type = $search_array['node_type']; $select = $search_array['select']; $minimum_word_size = variable_get('minimum_word_size', 2); //watchdog('user', "$last_update
$node_type
$select"); $result = db_query($select); if (db_num_rows($result)) { // Results were found. Look through the nodes we just selected. while ($node = db_fetch_array ($result)) { // Trash any existing entries in the search index for this node, // in case it is a modified node. db_query("DELETE from {search_index} where lno = '". $node['lno'] ."' and type = '". $node_type ."'"); // Build the word list (teaser not included, as it would give a // false count of the number of hits). $wordlist = $node['text1'] .' '. $node['text2']; // Strip heaps of stuff out of it. $wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", '', $wordlist); // Remove punctuation/special characters. $wordlist = preg_replace("'(\xBB|\xAB|!|\xA1|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", '', $wordlist); // Strip out (now mangled) http and tags. $wordlist = preg_replace("'http\w+'", '', $wordlist); $wordlist = preg_replace("'www\w+'", '', $wordlist); // Remove all newlines of any type. $wordlist = preg_replace("'([\r\n]|[\r]|[\n])'", ' ', $wordlist); // Lower case the whole thing. $wordlist = strtolower($wordlist); // Remove "noise words". $noise = explode(',', variable_get('noisewords', '')); foreach ($noise as $word) { $word = trim($word); $wordlist = trim(preg_replace("' $word '", ' ', ' ' .$wordlist. ' ')); } // Remove whitespace. $wordlist = preg_replace("'[\s]+'", ' ', $wordlist); // Make it an array. $eachword = explode(' ', $wordlist); // Walk through the array, giving a "weight" to each word based on // the number of times it appears in a page. foreach ($eachword as $word) { if (strlen($word) >= $minimum_word_size && strlen($word) <= 50) { if ($newwords[$word]) { $newwords[$word]++; } else { $newwords[$word] = 1; } } } // Walk through the weighted words array, inserting them into // the search index. if ($newwords) { foreach ($newwords as $key => $value) { db_query("INSERT INTO {search_index} VALUES('%s', %d, '%s', %d)", $key, $node['lno'], $node_type, $value); } } // Reset the weighted words array, so we don't add multiples. $newwords = array (); } } // Update the last time this process was run. variable_set($search_array['last_update'], time()); return true; } function search_invalidate() { foreach (module_list() as $module) { $module_array = module_invoke($module, 'update_index'); if ($module_array) { variable_set($module_array['last_update'], 1); } $module_array = null; } return; } /** * Save the values entered by the administrator for the search module * * @param $edit * An array of fields as set up by calling form_textfield(), * form_textarea(), etc. */ function search_save($edit) { variable_set('minimum_word_size', $edit['minimum_word_size']); $data = strtr($edit['noisewords'], "\n\r\t", ' '); $data = str_replace(' ', '', $data); variable_set('noisewords', $data); variable_set('help_pos', $edit['help_pos']); variable_set('remove_short', $edit['remove_short']); } /** * Menu callback; presents the search form and/or search results. */ function search_view() { global $type; $keys = isset($_GET['keys']) ? $_GET['keys'] : $_POST['keys']; if (user_access('search content')) { // Construct the search form. $output = search_form(NULL, $keys, TRUE); // Display form and search results. $help_link = l(t('search help'), 'search/help'); switch (variable_get('help_pos', 1)) { case '1': $output = search_help(). $output .'
'; break; case '2': $output .= search_help() .'
'; break; case '3': $output = $help_link. '
'. $output .'
'; break; case '4': $output .= '
'. $help_link .'
'; } // Only perform search if there is non-whitespace search term: if (trim($keys)) { // Log the search keys: watchdog('search', t('search: "%keys"', array('%keys' => $keys)), l('view results', 'search', NULL, 'keys='. urlencode($keys))); // Collect the search results: $results = search_data($keys); if ($results) { $output .= theme('box', t('Search Results'), $results); } else { $output .= theme('box', t('Search Results'), t('Your search yielded no results.')); } } print theme('page', $output, t('Search')); } else { drupal_access_denied(); } } /** * Menu callback; prints the search engine help page. */ function search_help_page() { print theme('page', search_help()); } ?>