378 lines
12 KiB
Plaintext
378 lines
12 KiB
Plaintext
<?php
|
||
// $Id$
|
||
|
||
function search_help() {
|
||
$output = "<b>". t("Search guidelines") ."</b>";
|
||
$output .= "<p>". t("The search page allows you to search the website's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.") ."</p>";
|
||
$output .= "<b>". t("Words excluded from the search") ."</b>";
|
||
$output .= "<p>". t("Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also being filtered.", array("%number" => variable_get("minimum_word_size", 2))) ."</p>";
|
||
return $output;
|
||
}
|
||
|
||
function search_system($field){
|
||
$system["description"] = t("Enables site wide keyword searching.");
|
||
return $system[$field];
|
||
}
|
||
|
||
/**
|
||
* Return an array of valid search access permissions
|
||
*/
|
||
function search_perm() {
|
||
return array("search content", "administer search");
|
||
}
|
||
|
||
/**
|
||
* Return an array of links to be displayed
|
||
*
|
||
* @param $type The type of page requesting the link
|
||
*
|
||
*/
|
||
function search_link($type) {
|
||
if ($type == "page" && user_access("search content")) {
|
||
$links[] = l(t("search"), "search", array("title" => t("Search for older content.")));
|
||
}
|
||
|
||
return $links ? $links : array();
|
||
}
|
||
|
||
function search_settings() {
|
||
$output = form_textfield(t("Minimum word length to index"), "minimum_word_size", variable_get("minimum_word_size", 2), 10, 10, t("The number of characters a word has to be to be indexed. Words shorter than this will not be searchable."));
|
||
$output .= form_textfield(t("Minimum word length to search for"), "remove_short", variable_get("remove_short", 0), 10, 10, t("The number of characters a word has to be to be searched for."));
|
||
$output .= form_textarea(t("Noise words"), "noisewords", variable_get("noisewords", ""), 70, 10, t("These words will not be indexed, enter comma separated list, linebreaks and whitespace do not matter. Example: and, or, not, a, to, I, it, ..."));
|
||
$output .= form_select(t("Help text position"), "help_pos", variable_get("help_pos", 1), array("1" => t("Above search output"), "2" => t("Below search output"), "3" => t("Link from above search output"), "4" => t("Link from below search output")), t("Where to show the help text for users on the search page."));
|
||
|
||
return $output;
|
||
}
|
||
|
||
/**
|
||
* search engine administration actions
|
||
*
|
||
*/
|
||
function search_admin() {
|
||
global $op;
|
||
|
||
// Only allow people with sufficient access.
|
||
if (user_access("administer search")) {
|
||
if ($op == "reindex") {
|
||
search_invalidate();
|
||
print t("index invalidated") ."<br />\n";
|
||
search_cron();
|
||
print t("index recreated") ."<br /><hr />\n";
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* perform a regularly run action across all modules that have the
|
||
* <module>_update_index function in them.
|
||
*
|
||
*/
|
||
function search_cron() {
|
||
foreach (module_list() as $module) {
|
||
$module_array = module_invoke($module, "update_index");
|
||
if ($module_array) {
|
||
update_index($module_array);
|
||
}
|
||
$module_array = null;
|
||
}
|
||
return;
|
||
}
|
||
|
||
/**
|
||
* Perform a search on a word(s)
|
||
*
|
||
* Search function called by each node that supports the indexed search
|
||
*
|
||
* @param $search_array an array as returned from <module>_search
|
||
* of type array("keys" => ...,
|
||
* "type" => ..., "select" => ...)
|
||
* see node_search in node.module for an
|
||
* explanation of array items
|
||
*/
|
||
function do_search($search_array) {
|
||
global $PHP_SELF;
|
||
|
||
$keys = strtolower($search_array["keys"]);
|
||
$type = $search_array["type"];
|
||
$select = $search_array["select"];
|
||
|
||
// Replace wildcards with mysql wildcards
|
||
$keys = str_replace("*", "%", $keys);
|
||
|
||
// Split the words entered into an array
|
||
$words = explode(" ", $keys);
|
||
|
||
foreach ($words as $word) {
|
||
|
||
// If the word is too short, and we've got it set to skip them, loop
|
||
if (strlen($word) < variable_get("remove_short", 0)) {
|
||
continue;
|
||
}
|
||
|
||
// Put the next search word into the query and do the query
|
||
$query = preg_replace("'\%'", $word, $select);
|
||
$result = db_query($query);
|
||
|
||
// If we got any results
|
||
if (db_num_rows($result) != 0) {
|
||
$found = 1;
|
||
|
||
// Create an in memory array of the results,
|
||
while ($row = db_fetch_array($result)) {
|
||
$lno = $row["lno"];
|
||
$nid = $row["nid"];
|
||
$title = $row["title"];
|
||
$created = $row["created"];
|
||
$uid = $row["uid"];
|
||
$name = $row["name"];
|
||
$count = $row["count"];
|
||
|
||
// If the just fetched row is not already in the table
|
||
if ($results[$lno]["lno"] != $lno) {
|
||
$results[$lno]["count"] = $count;
|
||
|
||
$results[$lno]["lno"] = $lno;
|
||
$results[$lno]["nid"] = $nid;
|
||
$results[$lno]["title"] = $title;
|
||
$results[$lno]["created"] = $created;
|
||
$results[$lno]["uid"] = $uid;
|
||
$results[$lno]["name"] = $name;
|
||
}
|
||
else {
|
||
/*
|
||
** Different word, but existing "lno", increase the count of
|
||
** matches against this "lno" by the number of times this
|
||
** word appears in the text
|
||
*/
|
||
$results[$lno]["count"] = $results[$lno]["count"] + $count;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if ($found) {
|
||
// Black magic here to sort the results
|
||
array_multisort($results, SORT_DESC);
|
||
|
||
// OK, time to output the results.
|
||
foreach ($results as $key => $value) {
|
||
$lno = $value["lno"];
|
||
$nid = $value["nid"];
|
||
$title = $value["title"];
|
||
$created = $value["created"];
|
||
$uid = $value["uid"];
|
||
$name = $value["name"];
|
||
$count = $value["count"];
|
||
switch ($type) {
|
||
case "node":
|
||
$find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr($PHP_SELF, "admin") ? url("admin/node/edit/$lno") : url("node/view/$lno")), "user" => $name, "date" => $created, "keywords" => implode("|", $words));
|
||
break;
|
||
case "comment":
|
||
$find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr($PHP_SELF, "admin") ? url("admin/comment/edit/$lno") : url("node/view/$nid#$lno")), "user" => $name, "date" => $created, "keywords" => implode("|", $words));
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
return $find;
|
||
}
|
||
|
||
/**
|
||
* Update the search_index table
|
||
*
|
||
* @param $search_array an array as returned from <module>_update_index
|
||
* of type array("last_update" => ...,
|
||
* "node_type" => ..., "select" => ...)
|
||
* see node_update_index in node.module for an
|
||
* explanation of array items
|
||
*/
|
||
function update_index($search_array) {
|
||
$last_update = variable_get($search_array["last_update"], 1);
|
||
$node_type = $search_array["node_type"];
|
||
$select = $search_array["select"];
|
||
$minimum_word_size = variable_get("minimum_word_size", 2);
|
||
|
||
//watchdog("user", "$last_update<br />$node_type<br />$select");
|
||
|
||
$result = db_query($select);
|
||
|
||
if (db_num_rows($result)) {
|
||
// Wohoo, found some, look through the nodes we just selected
|
||
while ($node = db_fetch_array ($result)) {
|
||
|
||
/*
|
||
** Trash any existing entries in the search index for this node,
|
||
** in case its a modified node.
|
||
*/
|
||
db_query("DELETE from search_index where lno = '". $node["lno"] ."' and type = '". $node_type ."'");
|
||
|
||
/*
|
||
** Build the wordlist, teaser not included, as it then gives a
|
||
** false count of the number of hits, and doesn't show up
|
||
** when clicking on a node from the search interface anyway.
|
||
*/
|
||
$wordlist = $node["text1"] ." ". $node["text2"];
|
||
|
||
// Strip heaps of stuff out of it
|
||
$wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", "", $wordlist);
|
||
|
||
// Remove punctuation and stuff
|
||
$wordlist = preg_replace("'(<28>|<7C>|!|<7C>|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", "", $wordlist);
|
||
|
||
// Strip out (now mangled) http and tags.
|
||
$wordlist = preg_replace("'http\w+'", "", $wordlist);
|
||
$wordlist = preg_replace("'www\w+'", "", $wordlist);
|
||
|
||
// Remove all newlines of any type
|
||
$wordlist = preg_replace("'([\r\n]|[\r]|[\n])'", " ", $wordlist);
|
||
|
||
// Lower case the whole thing.
|
||
$wordlist = strtolower($wordlist);
|
||
|
||
// Remove "noisewords"
|
||
$noise = explode(",", variable_get("noisewords", ""));
|
||
foreach ($noise as $word) {
|
||
$wordlist = preg_replace("' $word '", " ", $wordlist);
|
||
}
|
||
|
||
// Remove whitespace
|
||
$wordlist = preg_replace("'[\s]+'", " ", $wordlist);
|
||
|
||
// Make it an array
|
||
$eachword = explode(" ", $wordlist);
|
||
|
||
/*
|
||
** walk through the array, giving a "weight" to each word, based on
|
||
** the number of times it appears in a page.
|
||
*/
|
||
foreach ($eachword as $word) {
|
||
if (strlen($word) > $minimum_word_size) {
|
||
if ($newwords[$word]) {
|
||
$newwords[$word]++;
|
||
}
|
||
else {
|
||
$newwords[$word] = 1;
|
||
}
|
||
}
|
||
}
|
||
|
||
/*
|
||
** Walk through the weighted words array, inserting them into
|
||
** the search index
|
||
*/
|
||
foreach ($newwords as $key => $value) {
|
||
db_query("INSERT INTO search_index VALUES('%s', '%s', '%s', '%s')", $key, $node["lno"], $node_type, $value);
|
||
}
|
||
|
||
// Zap the weighted words array, so we don't add multiples.
|
||
$newwords = array ();
|
||
}
|
||
}
|
||
|
||
// update the last time this process was run.
|
||
variable_set($search_array["last_update"], time());
|
||
|
||
return true;
|
||
}
|
||
|
||
|
||
function search_invalidate() {
|
||
foreach (module_list() as $module) {
|
||
$module_array = module_invoke($module, "update_index");
|
||
if ($module_array) {
|
||
variable_set($module_array["last_update"], 1);
|
||
}
|
||
$module_array = null;
|
||
}
|
||
return;
|
||
}
|
||
|
||
/**
|
||
* Save the values entered by the administrator for the search module
|
||
*
|
||
* @param $edit An array of fields as setup via calling form_textfield,
|
||
* form_textarea etc
|
||
*/
|
||
function search_save($edit) {
|
||
variable_set("minimum_word_size", $edit["minimum_word_size"]);
|
||
|
||
$data = strtr($edit["noisewords"], "\n\r\t", " ");
|
||
$data = str_replace(" ", "", $data);
|
||
variable_set("noisewords", $data);
|
||
variable_set("help_pos", $edit["help_pos"]);
|
||
variable_set("remove_short", $edit["remove_short"]);
|
||
}
|
||
|
||
function search_view() {
|
||
global $theme, $edit, $type, $keys;
|
||
|
||
if (user_access("search content")) {
|
||
|
||
// Verify the user input:
|
||
// TODO: is this necessary or is it / should it be done in search_{form|data}?
|
||
$type = check_input($type);
|
||
$keys = check_input($keys);
|
||
|
||
// Construct the search form:
|
||
$form = search_form(NULL, NULL, TRUE);
|
||
|
||
// Collect the search results:
|
||
$output = search_data();
|
||
|
||
// Display form and search results:
|
||
$help_link = l(t("search help"), "search/help");
|
||
switch (variable_get("help_pos", 1)) {
|
||
case "1":
|
||
$form = search_help(). $form;
|
||
break;
|
||
case "2":
|
||
$form .= search_help();
|
||
break;
|
||
case "3":
|
||
$form = $help_link. $form;
|
||
break;
|
||
case "4":
|
||
$form .= $help_link;
|
||
}
|
||
|
||
$theme->header();
|
||
|
||
if ($form) {
|
||
$theme->box(t("Search"), $form);
|
||
}
|
||
|
||
if ($keys) {
|
||
if ($output) {
|
||
$theme->box(t("Result"), $output);
|
||
}
|
||
else {
|
||
$theme->box(t("Result"), t("Your search yielded no results."));
|
||
}
|
||
}
|
||
|
||
$theme->footer();
|
||
}
|
||
else {
|
||
$theme->header();
|
||
$theme->box(t("Access denied"), message_access());
|
||
$theme->footer();
|
||
}
|
||
|
||
}
|
||
|
||
function search_page() {
|
||
global $theme;
|
||
|
||
switch (arg(1)) {
|
||
case "help":
|
||
$theme->header();
|
||
$theme->box(t("Search Help"), search_help());
|
||
$theme->footer();
|
||
break;
|
||
default:
|
||
search_view();
|
||
}
|
||
}
|
||
|
||
?>
|