Search guidelines

The search page allows you to search the web site's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.

Words excluded from the search

Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also ignored.

", array("%number" => variable_get("minimum_word_size", 2))); break; case 'admin/system/modules#description': $output = t("Enables site wide keyword searching."); break; case 'admin/system/modules/search': $output = t("The search engine works by keeping an index of \"interesting\" words. To make sure we only get \"interesting\" words you need to set the following."); break; } return $output; } /** * Return an array of valid search access permissions */ function search_perm() { return array("search content", "administer search"); } /** * Return an array of links to be displayed * * @param $type The type of page requesting the link * */ function search_link($type) { $links = array(); if ($type == "page" && user_access("search content")) { $links[] = l(t("search"), "search", array("title" => t("Search for older content."))); } if ($type == "system" && user_access("search content")) { menu("search", t("search"), "search_page", 0, MENU_HIDE); } return $links; } function search_settings() { $output = form_textfield(t("Minimum word length to index"), "minimum_word_size", variable_get("minimum_word_size", 2), 10, 10, t("The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.")); $output .= form_textfield(t("Minimum word length to search for"), "remove_short", variable_get("remove_short", 0), 10, 10, t("The number of characters a word has to be to be searched for.")); $output .= form_textarea(t("Noise words"), "noisewords", variable_get("noisewords", ""), 70, 10, t("These words will not be indexed, enter comma separated list, linebreaks and whitespace do not matter. Example: and, or, not, a, to, I, it, ...")); $output .= form_radios(t("Help text position"), "help_pos", variable_get("help_pos", 1), array("1" => t("Above search output"), "2" => t("Below search output"), "3" => t("Link from above search output"), "4" => t("Link from below search output")), t("Where to show the help text for users on the search page.")); return $output; } /** * perform a regularly run action across all modules that have the * module_update_index function in them. */ function search_cron() { foreach (module_list() as $module) { $module_array = module_invoke($module, "update_index"); if ($module_array) { update_index($module_array); } $module_array = null; } return; } /** * Perform a search on a word(s). * * Search function called by each node that supports the indexed search. * * @param $search_array an array as returned from module_search of type * array("keys" => ..., "type" => ..., "select" => ...) * @see node_search for an explanation of array items * * @return array of search results, each element being an array indexed with * "count", "title", "link", "user" (name), "date", "keywords" */ function do_search($search_array) { $keys = strtolower($search_array["keys"]); $type = $search_array["type"]; $select = $search_array["select"]; // Replace wildcards with mysql wildcards $keys = str_replace("*", "%", $keys); // Split the words entered into an array $words = explode(" ", $keys); foreach ($words as $word) { // If the word is too short, and we've got it set to skip them, loop if (strlen($word) < variable_get("remove_short", 0)) { continue; } // Put the next search word into the query and do the query $query = str_replace("'%'", "'". check_query($word) ."'", $select); $result = db_query($query); // If we got any results if (db_num_rows($result) != 0) { $found = 1; // Create an in memory array of the results, while ($row = db_fetch_array($result)) { $lno = $row["lno"]; $nid = $row["nid"]; $title = $row["title"]; $created = $row["created"]; $uid = $row["uid"]; $name = $row["name"]; $count = $row["count"]; // Build reduction variable $reduction[$lno][$word] = true; // If the just fetched row is not already in the table if ($results[$lno]["lno"] != $lno) { $results[$lno]["count"] = $count; $results[$lno]["lno"] = $lno; $results[$lno]["nid"] = $nid; $results[$lno]["title"] = $title; $results[$lno]["created"] = $created; $results[$lno]["uid"] = $uid; $results[$lno]["name"] = $name; } else { /* ** Different word, but existing "lno", increase the count of ** matches against this "lno" by the number of times this ** word appears in the text */ $results[$lno]["count"] = $results[$lno]["count"] + $count; } } } } if ($found) { foreach ($results as $lno => $values) { $pass = true; foreach ($words as $word) { if (!$reduction[$lno][$word]) { $pass = false; } } if ($pass) { $fullresults[$lno] = $values; } } $results = $fullresults; if (!is_array($results)) { $found = 0; } } if ($found) { // Black magic here to sort the results array_multisort($results, SORT_DESC); // OK, time to output the results. foreach ($results as $key => $value) { $lno = $value["lno"]; $nid = $value["nid"]; $title = $value["title"]; $created = $value["created"]; $uid = $value["uid"]; $name = $value["name"]; $count = $value["count"]; switch ($type) { case "node": $find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr(request_uri(), "admin") ? url("admin/node/edit/$lno") : url("node/view/$lno")), "user" => $name, "date" => $created, "keywords" => implode("|", $words)); break; case "comment": $find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr(request_uri(), "admin") ? url("admin/comment/edit/$lno") : url("node/view/$nid", NULL, $lno)), "user" => $name, "date" => $created, "keywords" => implode("|", $words)); break; } } } return $find; } /** * Update the search_index table * * @param $search_array an array as returned from module_update_index * of type array("last_update" => ..., "node_type" => ..., "select" => ...) * @see node_update_index for an explanation of array items */ function update_index($search_array) { $last_update = variable_get($search_array["last_update"], 1); $node_type = $search_array["node_type"]; $select = $search_array["select"]; $minimum_word_size = variable_get("minimum_word_size", 2); //watchdog("user", "$last_update
$node_type
$select"); $result = db_query($select); if (db_num_rows($result)) { // Wohoo, found some, look through the nodes we just selected while ($node = db_fetch_array ($result)) { /* ** Trash any existing entries in the search index for this node, ** in case its a modified node. */ db_query("DELETE from {search_index} where lno = '". $node["lno"] ."' and type = '". $node_type ."'"); /* ** Build the wordlist, teaser not included, as it then gives a ** false count of the number of hits, and doesn't show up ** when clicking on a node from the search interface anyway. */ $wordlist = $node["text1"] ." ". $node["text2"]; // Strip heaps of stuff out of it $wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", "", $wordlist); // Remove punctuation and stuff $wordlist = preg_replace("'(\xBB|\xAB|!|\xA1|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", "", $wordlist); // Strip out (now mangled) http and tags. $wordlist = preg_replace("'http\w+'", "", $wordlist); $wordlist = preg_replace("'www\w+'", "", $wordlist); // Remove all newlines of any type $wordlist = preg_replace("'([\r\n]|[\r]|[\n])'", " ", $wordlist); // Lower case the whole thing. $wordlist = strtolower($wordlist); // Remove "noisewords" $noise = explode(",", variable_get("noisewords", "")); foreach ($noise as $word) { $word = trim($word); $wordlist = trim(preg_replace("' $word '", " ", " " .$wordlist. " ")); } // Remove whitespace $wordlist = preg_replace("'[\s]+'", " ", $wordlist); // Make it an array $eachword = explode(" ", $wordlist); /* ** walk through the array, giving a "weight" to each word, based on ** the number of times it appears in a page. */ foreach ($eachword as $word) { if (strlen($word) >= $minimum_word_size) { if ($newwords[$word]) { $newwords[$word]++; } else { $newwords[$word] = 1; } } } /* ** Walk through the weighted words array, inserting them into ** the search index */ if ($newwords) { foreach ($newwords as $key => $value) { db_query("INSERT INTO {search_index} VALUES('%s', %d, '%s', %d)", $key, $node["lno"], $node_type, $value); } } // Zap the weighted words array, so we don't add multiples. $newwords = array (); } } // update the last time this process was run. variable_set($search_array["last_update"], time()); return true; } function search_invalidate() { foreach (module_list() as $module) { $module_array = module_invoke($module, "update_index"); if ($module_array) { variable_set($module_array["last_update"], 1); } $module_array = null; } return; } /** * Save the values entered by the administrator for the search module * * @param $edit An array of fields as setup via calling form_textfield, * form_textarea etc */ function search_save($edit) { variable_set("minimum_word_size", $edit["minimum_word_size"]); $data = strtr($edit["noisewords"], "\n\r\t", " "); $data = str_replace(" ", "", $data); variable_set("noisewords", $data); variable_set("help_pos", $edit["help_pos"]); variable_set("remove_short", $edit["remove_short"]); } function search_view($keys) { global $type; if (user_access("search content")) { // Construct the search form: $output = search_form(NULL, $keys, TRUE); // Display form and search results: $help_link = l(t("search help"), "search/help"); switch (variable_get("help_pos", 1)) { case "1": $output = search_help(). $output ."
"; break; case "2": $output .= search_help() ."
"; break; case "3": $output = $help_link. "
". $output ."
"; break; case "4": $output .= "
". $help_link ."
"; } // Log the search keys: watchdog("search", "search: '$keys'", l("view results", "search", NULL, "keys=". urlencode($keys))); // Collect the search results: $results = search_data($keys); if ($keys) { if ($results) { $output .= theme("box", t("Search Results"), $results); } else { $output .= theme("box", t("Search Results"), t("Your search yielded no results.")); } } print theme("page", $output, t("Search")); } else { print theme("page", message_access()); } } function search_page() { $keys = isset($_GET["keys"]) ? $_GET["keys"] : $_POST["keys"]; switch (arg(1)) { case "help": print theme("page", search_help(), t("Search Help")); break; default: search_view($keys); } } ?>