- Made the search module use implicit AND'ing instead of OR'ing. Patch by

Gerhard.  Also updated the documentation a little.
4.1.x
Dries Buytaert 2002-11-27 19:55:14 +00:00
parent 0f5a7b6151
commit 1cb8f3b2c6
2 changed files with 28 additions and 82 deletions

View File

@ -2,15 +2,10 @@
// $Id$
function search_help() {
$output = "<b>". t("Search hints") ."</b>";
$output .= "<p>". t("The search allows you to search for words in the website's content. You can specify multiple words, and they will all be searched for, and the page that provides the highest hit count returned.") ."</p>";
$output .= "<p>". t("As this website provides multiple content types, the results are grouped by content type as well. If you only wish to search through certain types of content, you can modify the behaviour of this search using the 'Restrict search to' checkboxes below.") ."</p>";
$output .= "<p>". t("To specify that a word is <b>required</b> in the pages that are returned, place a '+' in front of it like this '+walk'.") ."</p>";
$output .= "<p>". t("You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and 'walkability'... Alright you got me, I made the last ones up.") ."</p>";
$output .= "<p>". t("Searches are not case sensitive, regardless of how you type them all letters will be searched for in lower case") ."</p>";
$output = "<b>". t("Search guidelines") ."</b>";
$output .= "<p>". t("The search page allows you to search the website's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.") ."</p>";
$output .= "<b>". t("Words excluded from the search") ."</b>";
$output .= "<p>". t("Some words which commonly occur are filtered out by the searching process, these are commonly called 'noise words'. Examples are 'a, at, and, are, as, ask', and the list goes on. Words shorter than %number letters are also filtered from the search index.", array("%number" => variable_get("minimum_word_size", 2)));
$output .= "<p>". t("These words will never be matched when specified, even if they appear in the node you are searching for.");
$output .= "<p>". t("Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also being filtered.", array("%number" => variable_get("minimum_word_size", 2)));
return $output;
}
@ -119,19 +114,8 @@ function do_search($search_array) {
if (strlen($word) < variable_get("remove_short", 0)) {
continue;
}
/*
** If the word is proceeded by a "+", then this word is required, and
** pages that match other words, but not this one will be removed
*/
if (substr($word, 0, 1) == "+") {
$word = substr($word, 1);
$required = 1;
$reqcount++;
$remove_rest = 1;
}
else {
$required = 0;
}
// All words are required
$reqcount++;
// Put the next search word into the query and do the query
$query = preg_replace("'\%'", $word, $select);
@ -162,23 +146,19 @@ function do_search($search_array) {
$results[$lno]["uid"] = $uid;
$results[$lno]["name"] = $name;
// If this is a required word, set it to "valid"
if ($required == 1) {
$results[$lno]["valid"] = 1;
}
// Set it to "valid"
$results[$lno]["valid"] = 1;
}
else {
/*
** Different word, but existing "lno", increase the count of
** matches against this "lno" by the number of times this
** matches against this "lno" by the number of times this
** word appears in the text
*/
$results[$lno]["count"] = $results[$lno]["count"] + $count;
// Another match on the a required word, increase valid
if ($required == 1) {
$results[$lno]["valid"]++;
}
// Another match, increase valid
$results[$lno]["valid"]++;
}
}
}
@ -197,10 +177,8 @@ function do_search($search_array) {
$uid = $value["uid"];
$name = $value["name"];
$count = $value["count"];
if ($remove_rest) {
if ($value["valid"] != $reqcount) {
continue;
}
if ($value["valid"] != $reqcount) {
continue;
}
switch ($type) {
case "node":
@ -247,7 +225,7 @@ function update_index($search_array) {
/*
** Build the wordlist, teaser not included, as it then gives a
** false count of the number of hist, and doesn't show up
** false count of the number of hits, and doesn't show up
** when clicking on a node from the search interface anyway.
*/
$wordlist = $node["text1"] ." ". $node["text2"];
@ -255,13 +233,8 @@ function update_index($search_array) {
// Strip heaps of stuff out of it
$wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", "", $wordlist);
// Remove all numbers
$wordlist = preg_replace("'[0-9]'", "", $wordlist);
// Remove punctuation and stuff
$wordlist = preg_replace("'(!|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'",
"",
$wordlist);
$wordlist = preg_replace("'(»|«|!|¡|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", "", $wordlist);
// Strip out (now mangled) http and tags.
$wordlist = preg_replace("'http\w+'", "", $wordlist);

View File

@ -2,15 +2,10 @@
// $Id$
function search_help() {
$output = "<b>". t("Search hints") ."</b>";
$output .= "<p>". t("The search allows you to search for words in the website's content. You can specify multiple words, and they will all be searched for, and the page that provides the highest hit count returned.") ."</p>";
$output .= "<p>". t("As this website provides multiple content types, the results are grouped by content type as well. If you only wish to search through certain types of content, you can modify the behaviour of this search using the 'Restrict search to' checkboxes below.") ."</p>";
$output .= "<p>". t("To specify that a word is <b>required</b> in the pages that are returned, place a '+' in front of it like this '+walk'.") ."</p>";
$output .= "<p>". t("You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and 'walkability'... Alright you got me, I made the last ones up.") ."</p>";
$output .= "<p>". t("Searches are not case sensitive, regardless of how you type them all letters will be searched for in lower case") ."</p>";
$output = "<b>". t("Search guidelines") ."</b>";
$output .= "<p>". t("The search page allows you to search the website's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.") ."</p>";
$output .= "<b>". t("Words excluded from the search") ."</b>";
$output .= "<p>". t("Some words which commonly occur are filtered out by the searching process, these are commonly called 'noise words'. Examples are 'a, at, and, are, as, ask', and the list goes on. Words shorter than %number letters are also filtered from the search index.", array("%number" => variable_get("minimum_word_size", 2)));
$output .= "<p>". t("These words will never be matched when specified, even if they appear in the node you are searching for.");
$output .= "<p>". t("Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also being filtered.", array("%number" => variable_get("minimum_word_size", 2)));
return $output;
}
@ -119,19 +114,8 @@ function do_search($search_array) {
if (strlen($word) < variable_get("remove_short", 0)) {
continue;
}
/*
** If the word is proceeded by a "+", then this word is required, and
** pages that match other words, but not this one will be removed
*/
if (substr($word, 0, 1) == "+") {
$word = substr($word, 1);
$required = 1;
$reqcount++;
$remove_rest = 1;
}
else {
$required = 0;
}
// All words are required
$reqcount++;
// Put the next search word into the query and do the query
$query = preg_replace("'\%'", $word, $select);
@ -162,23 +146,19 @@ function do_search($search_array) {
$results[$lno]["uid"] = $uid;
$results[$lno]["name"] = $name;
// If this is a required word, set it to "valid"
if ($required == 1) {
$results[$lno]["valid"] = 1;
}
// Set it to "valid"
$results[$lno]["valid"] = 1;
}
else {
/*
** Different word, but existing "lno", increase the count of
** matches against this "lno" by the number of times this
** matches against this "lno" by the number of times this
** word appears in the text
*/
$results[$lno]["count"] = $results[$lno]["count"] + $count;
// Another match on the a required word, increase valid
if ($required == 1) {
$results[$lno]["valid"]++;
}
// Another match, increase valid
$results[$lno]["valid"]++;
}
}
}
@ -197,10 +177,8 @@ function do_search($search_array) {
$uid = $value["uid"];
$name = $value["name"];
$count = $value["count"];
if ($remove_rest) {
if ($value["valid"] != $reqcount) {
continue;
}
if ($value["valid"] != $reqcount) {
continue;
}
switch ($type) {
case "node":
@ -247,7 +225,7 @@ function update_index($search_array) {
/*
** Build the wordlist, teaser not included, as it then gives a
** false count of the number of hist, and doesn't show up
** false count of the number of hits, and doesn't show up
** when clicking on a node from the search interface anyway.
*/
$wordlist = $node["text1"] ." ". $node["text2"];
@ -255,13 +233,8 @@ function update_index($search_array) {
// Strip heaps of stuff out of it
$wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", "", $wordlist);
// Remove all numbers
$wordlist = preg_replace("'[0-9]'", "", $wordlist);
// Remove punctuation and stuff
$wordlist = preg_replace("'(!|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'",
"",
$wordlist);
$wordlist = preg_replace("'(»|«|!|¡|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", "", $wordlist);
// Strip out (now mangled) http and tags.
$wordlist = preg_replace("'http\w+'", "", $wordlist);