2001-06-30 09:50:36 +00:00
<?php
2001-10-20 18:57:09 +00:00
// $Id$
2001-06-30 09:50:36 +00:00
2003-08-25 16:57:55 +00:00
function search_help($section = "admin/search/help") {
$output = "";
switch ($section) {
case 'admin/help':
case 'admin/search/help':
$output = "<b>Search guidelines</b>";
$output .= "<p>The search page allows you to search the web site's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.</p>";
$output .= "<b>Words excluded from the search</b>";
$output .= strtr("<p>Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also ignored.</p>", array("%number" => variable_get("minimum_word_size", 2)));
break;
case 'admin/system/modules':
$output = "Enables site wide keyword searching.";
break;
case 'admin/system/modules/search':
$output = "The search engine works by keeping an index of \"interesting\" words. To make sure we only get \"interesting\" words you need to set the following.";
break;
}
2002-03-05 20:15:17 +00:00
return $output;
2002-04-14 20:46:41 +00:00
}
2002-03-05 20:15:17 +00:00
2002-06-01 21:57:29 +00:00
function search_system($field){
2003-08-25 16:57:55 +00:00
$output = "";
if ($field == "description") {$output = search_help("admin/system/modules"); }
else if ($field == "admin_help") {$output = search_help("admin/system/modules/queue"); };
return $output;
2002-06-01 21:57:29 +00:00
}
2002-03-05 20:15:17 +00:00
/**
* Return an array of valid search access permissions
*/
2001-06-30 09:50:36 +00:00
function search_perm() {
2002-03-05 20:15:17 +00:00
return array("search content", "administer search");
2001-06-30 09:50:36 +00:00
}
2002-03-05 20:15:17 +00:00
/**
* Return an array of links to be displayed
*
* @param $type The type of page requesting the link
*
*/
2001-06-30 09:50:36 +00:00
function search_link($type) {
2003-04-21 14:55:03 +00:00
$links = array();
2001-06-30 09:50:36 +00:00
if ($type == "page" && user_access("search content")) {
2003-01-06 19:51:01 +00:00
$links[] = l(t("search"), "search", array("title" => t("Search for older content.")));
2001-06-30 09:50:36 +00:00
}
2003-04-21 14:55:03 +00:00
return $links;
2001-06-30 09:50:36 +00:00
}
2003-02-11 20:01:17 +00:00
function search_settings() {
2002-11-21 18:18:19 +00:00
$output = form_textfield(t("Minimum word length to index"), "minimum_word_size", variable_get("minimum_word_size", 2), 10, 10, t("The number of characters a word has to be to be indexed. Words shorter than this will not be searchable."));
$output .= form_textfield(t("Minimum word length to search for"), "remove_short", variable_get("remove_short", 0), 10, 10, t("The number of characters a word has to be to be searched for."));
$output .= form_textarea(t("Noise words"), "noisewords", variable_get("noisewords", ""), 70, 10, t("These words will not be indexed, enter comma separated list, linebreaks and whitespace do not matter. Example: and, or, not, a, to, I, it, ..."));
$output .= form_select(t("Help text position"), "help_pos", variable_get("help_pos", 1), array("1" => t("Above search output"), "2" => t("Below search output"), "3" => t("Link from above search output"), "4" => t("Link from below search output")), t("Where to show the help text for users on the search page."));
return $output;
}
2002-03-05 20:15:17 +00:00
/**
* search engine administration actions
*
*/
function search_admin() {
2003-05-13 18:36:38 +00:00
$op = $_POST["op"];
2002-03-05 20:15:17 +00:00
2002-11-18 22:27:52 +00:00
// Only allow people with sufficient access.
2002-03-05 20:15:17 +00:00
if (user_access("administer search")) {
2002-11-21 18:18:19 +00:00
if ($op == "reindex") {
2002-03-05 20:15:17 +00:00
search_invalidate();
2003-09-19 07:41:55 +00:00
$output = t("index invalidated") ."<br />\n";
2002-03-05 20:15:17 +00:00
search_cron();
2003-09-19 07:41:55 +00:00
$output .= t("index recreated") ."<br /><hr />\n";
return $output;
2002-03-05 20:15:17 +00:00
}
}
}
/**
2002-04-14 20:46:41 +00:00
* perform a regularly run action across all modules that have the
2002-03-05 20:15:17 +00:00
* <module>_update_index function in them.
*
*/
function search_cron() {
foreach (module_list() as $module) {
$module_array = module_invoke($module, "update_index");
if ($module_array) {
update_index($module_array);
}
$module_array = null;
}
return;
}
/**
* Perform a search on a word(s)
*
* Search function called by each node that supports the indexed search
*
2002-04-14 20:46:41 +00:00
* @param $search_array an array as returned from <module>_search
* of type array("keys" => ...,
2002-03-05 20:15:17 +00:00
* "type" => ..., "select" => ...)
2002-04-14 20:46:41 +00:00
* see node_search in node.module for an
2002-03-05 20:15:17 +00:00
* explanation of array items
*/
function do_search($search_array) {
$keys = strtolower($search_array["keys"]);
$type = $search_array["type"];
$select = $search_array["select"];
2002-11-18 22:27:52 +00:00
// Replace wildcards with mysql wildcards
2002-03-05 20:15:17 +00:00
$keys = str_replace("*", "%", $keys);
2002-11-18 22:27:52 +00:00
// Split the words entered into an array
2002-03-05 20:15:17 +00:00
$words = explode(" ", $keys);
foreach ($words as $word) {
2003-01-01 21:06:22 +00:00
// If the word is too short, and we've got it set to skip them, loop
2002-03-05 20:15:17 +00:00
if (strlen($word) < variable_get("remove_short", 0)) {
continue;
}
2002-11-18 22:27:52 +00:00
// Put the next search word into the query and do the query
2003-06-30 19:42:19 +00:00
$query = preg_replace("'\%'", check_query($word), $select);
2002-03-05 20:15:17 +00:00
$result = db_query($query);
2002-11-18 22:27:52 +00:00
// If we got any results
2002-03-05 20:15:17 +00:00
if (db_num_rows($result) != 0) {
$found = 1;
2002-04-14 20:46:41 +00:00
2002-11-18 22:27:52 +00:00
// Create an in memory array of the results,
2002-03-05 20:15:17 +00:00
while ($row = db_fetch_array($result)) {
$lno = $row["lno"];
$nid = $row["nid"];
$title = $row["title"];
$created = $row["created"];
$uid = $row["uid"];
$name = $row["name"];
$count = $row["count"];
2003-06-16 17:20:50 +00:00
// Build reduction variable
$reduction[$lno][$word] = true;
2002-11-18 22:27:52 +00:00
// If the just fetched row is not already in the table
2002-03-05 20:15:17 +00:00
if ($results[$lno]["lno"] != $lno) {
$results[$lno]["count"] = $count;
$results[$lno]["lno"] = $lno;
$results[$lno]["nid"] = $nid;
$results[$lno]["title"] = $title;
$results[$lno]["created"] = $created;
$results[$lno]["uid"] = $uid;
$results[$lno]["name"] = $name;
}
else {
2003-01-01 21:06:22 +00:00
/*
** Different word, but existing "lno", increase the count of
** matches against this "lno" by the number of times this
** word appears in the text
*/
2002-03-05 20:15:17 +00:00
$results[$lno]["count"] = $results[$lno]["count"] + $count;
}
}
2002-04-14 20:46:41 +00:00
}
2002-03-05 20:15:17 +00:00
}
2003-06-16 17:20:50 +00:00
if ($found) {
foreach ($results as $lno => $values) {
$pass = true;
foreach ($words as $word) {
if (!$reduction[$lno][$word]) {
$pass = false;
}
}
if ($pass) {
$fullresults[$lno] = $values;
}
}
$results = $fullresults;
if (!is_array($results)) {
$found = 0;
}
}
2002-03-05 20:15:17 +00:00
if ($found) {
2002-11-18 22:27:52 +00:00
// Black magic here to sort the results
2002-03-05 20:15:17 +00:00
array_multisort($results, SORT_DESC);
2002-11-18 22:27:52 +00:00
// OK, time to output the results.
2002-03-05 20:15:17 +00:00
foreach ($results as $key => $value) {
$lno = $value["lno"];
$nid = $value["nid"];
$title = $value["title"];
$created = $value["created"];
$uid = $value["uid"];
$name = $value["name"];
$count = $value["count"];
switch ($type) {
case "node":
2003-05-18 20:21:35 +00:00
$find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr(request_uri(), "admin") ? url("admin/node/edit/$lno") : url("node/view/$lno")), "user" => $name, "date" => $created, "keywords" => implode("|", $words));
2002-03-05 20:15:17 +00:00
break;
case "comment":
2003-05-18 20:21:35 +00:00
$find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr(request_uri(), "admin") ? url("admin/comment/edit/$lno") : url("node/view/$nid#$lno")), "user" => $name, "date" => $created, "keywords" => implode("|", $words));
2002-03-05 20:15:17 +00:00
break;
}
}
}
return $find;
}
/**
* Update the search_index table
*
2002-04-14 20:46:41 +00:00
* @param $search_array an array as returned from <module>_update_index
* of type array("last_update" => ...,
2002-03-05 20:15:17 +00:00
* "node_type" => ..., "select" => ...)
2002-04-14 20:46:41 +00:00
* see node_update_index in node.module for an
2002-03-05 20:15:17 +00:00
* explanation of array items
*/
function update_index($search_array) {
$last_update = variable_get($search_array["last_update"], 1);
$node_type = $search_array["node_type"];
$select = $search_array["select"];
$minimum_word_size = variable_get("minimum_word_size", 2);
//watchdog("user", "$last_update<br />$node_type<br />$select");
$result = db_query($select);
if (db_num_rows($result)) {
2002-11-18 22:27:52 +00:00
// Wohoo, found some, look through the nodes we just selected
2002-03-05 20:15:17 +00:00
while ($node = db_fetch_array ($result)) {
2002-04-14 20:46:41 +00:00
2002-11-18 19:10:44 +00:00
/*
** Trash any existing entries in the search index for this node,
** in case its a modified node.
*/
2003-07-10 17:46:44 +00:00
db_query("DELETE from {search_index} where lno = '". $node["lno"] ."' and type = '". $node_type ."'");
2002-03-05 20:15:17 +00:00
2002-11-18 19:10:44 +00:00
/*
** Build the wordlist, teaser not included, as it then gives a
2002-11-27 19:55:14 +00:00
** false count of the number of hits, and doesn't show up
2002-11-18 19:10:44 +00:00
** when clicking on a node from the search interface anyway.
*/
2002-11-25 15:52:04 +00:00
$wordlist = $node["text1"] ." ". $node["text2"];
2002-03-05 20:15:17 +00:00
// Strip heaps of stuff out of it
$wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", "", $wordlist);
// Remove punctuation and stuff
2003-06-08 17:17:53 +00:00
$wordlist = preg_replace("'(\xBB|\xAB|!|\xA1|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", "", $wordlist);
2002-03-05 20:15:17 +00:00
// Strip out (now mangled) http and tags.
$wordlist = preg_replace("'http\w+'", "", $wordlist);
$wordlist = preg_replace("'www\w+'", "", $wordlist);
// Remove all newlines of any type
$wordlist = preg_replace("'([\r\n]|[\r]|[\n])'", " ", $wordlist);
// Lower case the whole thing.
$wordlist = strtolower($wordlist);
// Remove "noisewords"
2002-11-18 22:27:52 +00:00
$noise = explode(",", variable_get("noisewords", ""));
foreach ($noise as $word) {
2003-09-10 16:48:00 +00:00
$word = trim($word);
$wordlist = trim(preg_replace("' $word '", " ", " " .$wordlist. " "));
2002-03-05 20:15:17 +00:00
}
// Remove whitespace
$wordlist = preg_replace("'[\s]+'", " ", $wordlist);
// Make it an array
$eachword = explode(" ", $wordlist);
2002-11-18 19:10:44 +00:00
/*
** walk through the array, giving a "weight" to each word, based on
** the number of times it appears in a page.
*/
2002-03-05 20:15:17 +00:00
foreach ($eachword as $word) {
2003-02-16 17:47:53 +00:00
if (strlen($word) >= $minimum_word_size) {
2002-03-05 20:15:17 +00:00
if ($newwords[$word]) {
$newwords[$word]++;
2002-04-14 20:46:41 +00:00
}
2002-03-05 20:15:17 +00:00
else {
$newwords[$word] = 1;
}
}
}
2002-11-18 19:10:44 +00:00
/*
** Walk through the weighted words array, inserting them into
** the search index
*/
2003-05-30 14:58:44 +00:00
if ($newwords) {
foreach ($newwords as $key => $value) {
2003-07-10 17:46:44 +00:00
db_query("INSERT INTO {search_index} VALUES('%s', %d, '%s', %d)", $key, $node["lno"], $node_type, $value);
2003-05-30 14:58:44 +00:00
}
2002-03-05 20:15:17 +00:00
}
- Replaced a "mysql_query()" by a "db_query" as identified by Moshe (see
his sandbox) and fixed some typos in the module's comments.
* Moshe: what are the $wordlist changes you made? Mind to elaborate a
bit on those?
* TODO: validate the SQL queries used in the search module using a SQL
validator tool - I wonder whether they are ANSI compliant.
- // If the word is preceeded by a "+", then this word is required, and
+ // If the word is proceeded by a "+", then this word is required, and
- $inputword = ("INSERT INTO search_index VALUES('$key', ". $node["lno"] .", '$node_type', $value)");
- mysql_query($inputword);
+ db_query("INSERT INTO search_index VALUES('$key', ". $node["lno"] .", '$node_type', $value)");
}
- // Zap the weighted words array, so we dont add multiples.
+ // Zap the weighted words array, so we don't add multiples.
2002-05-26 11:00:50 +00:00
// Zap the weighted words array, so we don't add multiples.
2002-03-05 20:15:17 +00:00
$newwords = array ();
}
}
// update the last time this process was run.
variable_set($search_array["last_update"], time());
return true;
}
2001-09-22 21:01:39 +00:00
2002-03-05 20:15:17 +00:00
function search_invalidate() {
foreach (module_list() as $module) {
$module_array = module_invoke($module, "update_index");
if ($module_array) {
variable_set($module_array["last_update"], 1);
}
$module_array = null;
}
return;
}
/**
* Save the values entered by the administrator for the search module
*
2002-04-14 20:46:41 +00:00
* @param $edit An array of fields as setup via calling form_textfield,
2002-03-05 20:15:17 +00:00
* form_textarea etc
*/
function search_save($edit) {
variable_set("minimum_word_size", $edit["minimum_word_size"]);
2002-11-21 18:18:19 +00:00
2002-11-18 22:27:52 +00:00
$data = strtr($edit["noisewords"], "\n\r\t", " ");
$data = str_replace(" ", "", $data);
variable_set("noisewords", $data);
2002-03-05 20:15:17 +00:00
variable_set("help_pos", $edit["help_pos"]);
variable_set("remove_short", $edit["remove_short"]);
}
2003-06-25 22:10:54 +00:00
function search_view($keys) {
2003-05-13 18:36:38 +00:00
global $type;
2001-06-30 09:50:36 +00:00
if (user_access("search content")) {
2003-01-01 21:06:22 +00:00
// Construct the search form:
2003-06-25 22:10:54 +00:00
$form = search_form(NULL, $keys, TRUE);
2001-06-30 09:50:36 +00:00
2003-01-01 21:06:22 +00:00
// Collect the search results:
2003-06-25 22:10:54 +00:00
$output = search_data($keys);
2001-09-22 21:01:39 +00:00
2003-01-01 21:06:22 +00:00
// Display form and search results:
2003-01-06 19:51:01 +00:00
$help_link = l(t("search help"), "search/help");
2002-03-05 20:15:17 +00:00
switch (variable_get("help_pos", 1)) {
case "1":
$form = search_help(). $form;
break;
case "2":
$form .= search_help();
break;
case "3":
2003-06-25 22:10:54 +00:00
$form = $help_link. "<br />". $form;
2002-03-05 20:15:17 +00:00
break;
case "4":
2003-06-25 22:10:54 +00:00
$form .= "<br />". $help_link;
2002-03-05 20:15:17 +00:00
}
2003-09-16 15:04:02 +00:00
theme("header", t("Search"));
2001-06-30 09:50:36 +00:00
if ($form) {
2003-02-15 11:39:56 +00:00
theme("box", t("Search"), $form);
2001-06-30 09:50:36 +00:00
}
if ($keys) {
2001-09-22 21:01:39 +00:00
if ($output) {
2003-06-25 22:10:54 +00:00
theme("box", t("Search Results"), $output);
2001-09-22 21:01:39 +00:00
}
else {
2003-06-27 17:48:20 +00:00
theme("box", t("Search Results"), t("Your search yielded no results."));
2001-09-22 21:01:39 +00:00
}
2001-06-30 09:50:36 +00:00
}
2003-02-15 11:39:56 +00:00
theme("footer");
2002-04-05 18:11:42 +00:00
}
2001-06-30 09:50:36 +00:00
else {
2003-09-16 15:04:02 +00:00
theme("header", t("Access denied"));
2003-02-15 11:39:56 +00:00
theme("box", t("Access denied"), message_access());
theme("footer");
2001-06-30 09:50:36 +00:00
}
2002-03-05 20:15:17 +00:00
}
function search_page() {
2003-06-08 17:17:53 +00:00
$keys = isset($_GET["keys"]) ? $_GET["keys"] : $_POST["keys"];
2002-03-05 20:15:17 +00:00
2003-01-06 19:51:01 +00:00
switch (arg(1)) {
2002-03-05 20:15:17 +00:00
case "help":
2003-02-15 11:39:56 +00:00
theme("header");
theme("box", t("Search Help"), search_help());
theme("footer");
2002-03-05 20:15:17 +00:00
break;
default:
2003-04-24 21:56:12 +00:00
search_view($keys);
2002-03-05 20:15:17 +00:00
}
2001-06-30 09:50:36 +00:00
}
2001-11-01 11:00:51 +00:00
2003-01-21 22:31:37 +00:00
?>