1273 lines
44 KiB
Plaintext
1273 lines
44 KiB
Plaintext
<?php
|
|
// $Id$
|
|
|
|
/**
|
|
* @file
|
|
* Enables site-wide keyword searching.
|
|
*/
|
|
|
|
/**
|
|
* Matches Unicode character classes to exclude from the search index.
|
|
*
|
|
* See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
|
|
*
|
|
* The index only contains the following character classes:
|
|
* Lu Letter, Uppercase
|
|
* Ll Letter, Lowercase
|
|
* Lt Letter, Titlecase
|
|
* Lo Letter, Other
|
|
* Nd Number, Decimal Digit
|
|
* No Number, Other
|
|
*/
|
|
define('PREG_CLASS_SEARCH_EXCLUDE',
|
|
'\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
|
|
'\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
|
|
'\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
|
|
'\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
|
|
'\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
|
|
'\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
|
|
'\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
|
|
'\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
|
|
'\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
|
|
'\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
|
|
'\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
|
|
'\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
|
|
'\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
|
|
'\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
|
|
'\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
|
|
'\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
|
|
'\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
|
|
'\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
|
|
'\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
|
|
'\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
|
|
'\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
|
|
'\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
|
|
'\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
|
|
'\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
|
|
'\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
|
|
'\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
|
|
'\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
|
|
|
|
/**
|
|
* Matches all 'N' Unicode character classes (numbers)
|
|
*/
|
|
define('PREG_CLASS_NUMBERS',
|
|
'\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
|
|
'\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
|
|
'\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
|
|
'\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
|
|
'\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
|
|
'\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
|
|
'\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
|
|
'\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
|
|
'\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
|
|
|
|
/**
|
|
* Matches all 'P' Unicode character classes (punctuation)
|
|
*/
|
|
define('PREG_CLASS_PUNCTUATION',
|
|
'\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
|
|
'\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
|
|
'\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
|
|
'\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
|
|
'\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
|
|
'\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
|
|
'\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
|
|
'\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
|
|
'\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
|
|
'\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
|
|
'\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
|
|
'\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
|
|
'\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
|
|
'\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
|
|
'\x{ff65}');
|
|
|
|
/**
|
|
* Matches all CJK characters that are candidates for auto-splitting
|
|
* (Chinese, Japanese, Korean).
|
|
* Contains kana and BMP ideographs.
|
|
*/
|
|
define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
|
|
'\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
|
|
|
|
/**
|
|
* Implementation of hook_help().
|
|
*/
|
|
function search_help($path, $arg) {
|
|
switch ($path) {
|
|
case 'admin/help#search':
|
|
$output = '<p>'. t('The search module adds the ability to search for content by keywords. Search is often the only practical way to find content on a large site. Search is useful for finding users and posts by searching on keywords.') .'</p>';
|
|
$output .= '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. It indexes the posts and users. You can adjust the settings to tweak the indexing behavior. Note that the search requires cron to be set up correctly. The index percentage sets the maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.') .'</p>';
|
|
$output .= '<p>'. t('For more information please read the configuration and customization handbook <a href="@search">Search page</a>.', array('@search' => 'http://drupal.org/handbook/modules/search/')) .'</p>';
|
|
return $output;
|
|
case 'admin/settings/search':
|
|
return '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behavior. Note that the search requires cron to be set up correctly.') .'</p>';
|
|
case 'search#noresults':
|
|
return t('<ul>
|
|
<li>Check if your spelling is correct.</li>
|
|
<li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
|
|
<li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
|
|
</ul>');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Implementation of hook_theme()
|
|
*/
|
|
function search_theme() {
|
|
return array(
|
|
'search_theme_form' => array(
|
|
'arguments' => array('form' => NULL),
|
|
'template' => 'search-theme-form',
|
|
),
|
|
'search_block_form' => array(
|
|
'arguments' => array('form' => NULL),
|
|
'template' => 'search-block-form',
|
|
),
|
|
'search_result' => array(
|
|
'arguments' => array('result' => NULL, 'type' => NULL),
|
|
'file' => 'search.pages.inc',
|
|
'template' => 'search-result',
|
|
),
|
|
'search_results' => array(
|
|
'arguments' => array('results' => NULL, 'type' => NULL),
|
|
'file' => 'search.pages.inc',
|
|
'template' => 'search-results',
|
|
),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Implementation of hook_perm().
|
|
*/
|
|
function search_perm() {
|
|
return array('search content', 'use advanced search', 'administer search');
|
|
}
|
|
|
|
/**
|
|
* Implementation of hook_block().
|
|
*/
|
|
function search_block($op = 'list', $delta = 0) {
|
|
if ($op == 'list') {
|
|
$blocks[0]['info'] = t('Search form');
|
|
// Not worth caching.
|
|
$blocks[0]['cache'] = BLOCK_NO_CACHE;
|
|
return $blocks;
|
|
}
|
|
else if ($op == 'view' && user_access('search content')) {
|
|
$block['content'] = drupal_get_form('search_block_form');
|
|
$block['subject'] = t('Search');
|
|
return $block;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Implementation of hook_menu().
|
|
*/
|
|
function search_menu() {
|
|
$items['search'] = array(
|
|
'title' => 'Search',
|
|
'page callback' => 'search_view',
|
|
'access arguments' => array('search content'),
|
|
'type' => MENU_SUGGESTED_ITEM,
|
|
'file' => 'search.pages.inc',
|
|
);
|
|
$items['admin/settings/search'] = array(
|
|
'title' => 'Search settings',
|
|
'description' => 'Configure relevance settings for search and other indexing options',
|
|
'page callback' => 'drupal_get_form',
|
|
'page arguments' => array('search_admin_settings'),
|
|
'access arguments' => array('administer search'),
|
|
'type' => MENU_NORMAL_ITEM,
|
|
'file' => 'search.admin.inc',
|
|
);
|
|
$items['admin/settings/search/wipe'] = array(
|
|
'title' => 'Clear index',
|
|
'page callback' => 'drupal_get_form',
|
|
'page arguments' => array('search_wipe_confirm'),
|
|
'access arguments' => array('administer search'),
|
|
'type' => MENU_CALLBACK,
|
|
'file' => 'search.admin.inc',
|
|
);
|
|
$items['admin/reports/search'] = array(
|
|
'title' => 'Top search phrases',
|
|
'description' => 'View most popular search phrases.',
|
|
'page callback' => 'dblog_top',
|
|
'page arguments' => array('search'),
|
|
'file' => 'dblog.admin.inc',
|
|
'file path' => drupal_get_path('module', 'dblog'),
|
|
);
|
|
|
|
foreach (module_implements('search') as $name) {
|
|
$items['search/'. $name .'/%menu_tail'] = array(
|
|
'title callback' => 'module_invoke',
|
|
'title arguments' => array($name, 'search', 'name', TRUE),
|
|
'page callback' => 'search_view',
|
|
'page arguments' => array($name),
|
|
'access callback' => '_search_menu',
|
|
'access arguments' => array($name),
|
|
'type' => MENU_LOCAL_TASK,
|
|
'parent' => 'search',
|
|
'file' => 'search.pages.inc',
|
|
);
|
|
}
|
|
return $items;
|
|
}
|
|
|
|
function _search_menu($name) {
|
|
return user_access('search content') && module_invoke($name, 'search', 'name');
|
|
}
|
|
|
|
/**
|
|
* Wipes a part of or the entire search index.
|
|
*
|
|
* @param $sid
|
|
* (optional) The SID of the item to wipe. If specified, $type must be passed
|
|
* too.
|
|
* @param $type
|
|
* (optional) The type of item to wipe.
|
|
*/
|
|
function search_wipe($sid = NULL, $type = NULL, $reindex = FALSE) {
|
|
if ($type == NULL && $sid == NULL) {
|
|
module_invoke_all('search', 'reset');
|
|
}
|
|
else {
|
|
db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
|
|
db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
|
|
// Don't remove links if re-indexing.
|
|
if (!$reindex) {
|
|
db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Marks a word as dirty (or retrieves the list of dirty words). This is used
|
|
* during indexing (cron). Words which are dirty have outdated total counts in
|
|
* the search_total table, and need to be recounted.
|
|
*/
|
|
function search_dirty($word = NULL) {
|
|
static $dirty = array();
|
|
if ($word !== NULL) {
|
|
$dirty[$word] = TRUE;
|
|
}
|
|
else {
|
|
return $dirty;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Implementation of hook_cron().
|
|
*
|
|
* Fires hook_update_index() in all modules and cleans up dirty words (see
|
|
* search_dirty).
|
|
*/
|
|
function search_cron() {
|
|
// We register a shutdown function to ensure that search_total is always up
|
|
// to date.
|
|
register_shutdown_function('search_update_totals');
|
|
|
|
// Update word index
|
|
foreach (module_list() as $module) {
|
|
module_invoke($module, 'update_index');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This function is called on shutdown to ensure that search_total is always
|
|
* up to date (even if cron times out or otherwise fails).
|
|
*/
|
|
function search_update_totals() {
|
|
// Update word IDF (Inverse Document Frequency) counts for new/changed words
|
|
foreach (search_dirty() as $word => $dummy) {
|
|
// Get total count
|
|
$total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
|
|
// Apply Zipf's law to equalize the probability distribution
|
|
$total = log10(1 + 1/(max(1, $total)));
|
|
db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
|
|
if (!db_affected_rows()) {
|
|
db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
|
|
}
|
|
}
|
|
// Find words that were deleted from search_index, but are still in
|
|
// search_total. We use a LEFT JOIN between the two tables and keep only the
|
|
// rows which fail to join.
|
|
$result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
|
|
while ($word = db_fetch_object($result)) {
|
|
db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Simplifies a string according to indexing rules.
|
|
*/
|
|
function search_simplify($text) {
|
|
// Decode entities to UTF-8
|
|
$text = decode_entities($text);
|
|
|
|
// Lowercase
|
|
$text = drupal_strtolower($text);
|
|
|
|
// Call an external processor for word handling.
|
|
search_invoke_preprocess($text);
|
|
|
|
// Simple CJK handling
|
|
if (variable_get('overlap_cjk', TRUE)) {
|
|
$text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
|
|
}
|
|
|
|
// To improve searching for numerical data such as dates, IP addresses
|
|
// or version numbers, we consider a group of numerical characters
|
|
// separated only by punctuation characters to be one piece.
|
|
// This also means that searching for e.g. '20/03/1984' also returns
|
|
// results with '20-03-1984' in them.
|
|
// Readable regexp: ([number]+)[punctuation]+(?=[number])
|
|
$text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
|
|
|
|
// The dot, underscore and dash are simply removed. This allows meaningful
|
|
// search behavior with acronyms and URLs.
|
|
$text = preg_replace('/[._-]+/', '', $text);
|
|
|
|
// With the exception of the rules above, we consider all punctuation,
|
|
// marks, spacers, etc, to be a word boundary.
|
|
$text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', ' ', $text);
|
|
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
|
|
* sequences of characters ('minimum_word_size' long).
|
|
*/
|
|
function search_expand_cjk($matches) {
|
|
$min = variable_get('minimum_word_size', 3);
|
|
$str = $matches[0];
|
|
$l = drupal_strlen($str);
|
|
// Passthrough short words
|
|
if ($l <= $min) {
|
|
return ' '. $str .' ';
|
|
}
|
|
$tokens = ' ';
|
|
// FIFO queue of characters
|
|
$chars = array();
|
|
// Begin loop
|
|
for ($i = 0; $i < $l; ++$i) {
|
|
// Grab next character
|
|
$current = drupal_substr($str, 0, 1);
|
|
$str = substr($str, strlen($current));
|
|
$chars[] = $current;
|
|
if ($i >= $min - 1) {
|
|
$tokens .= implode('', $chars) .' ';
|
|
array_shift($chars);
|
|
}
|
|
}
|
|
return $tokens;
|
|
}
|
|
|
|
/**
|
|
* Splits a string into tokens for indexing.
|
|
*/
|
|
function search_index_split($text) {
|
|
static $last = NULL;
|
|
static $lastsplit = NULL;
|
|
|
|
if ($last == $text) {
|
|
return $lastsplit;
|
|
}
|
|
// Process words
|
|
$text = search_simplify($text);
|
|
$words = explode(' ', $text);
|
|
array_walk($words, '_search_index_truncate');
|
|
|
|
// Save last keyword result
|
|
$last = $text;
|
|
$lastsplit = $words;
|
|
|
|
return $words;
|
|
}
|
|
|
|
/**
|
|
* Helper function for array_walk in search_index_split.
|
|
*/
|
|
function _search_index_truncate(&$text) {
|
|
$text = truncate_utf8($text, 50);
|
|
}
|
|
|
|
/**
|
|
* Invokes hook_search_preprocess() in modules.
|
|
*/
|
|
function search_invoke_preprocess(&$text) {
|
|
foreach (module_implements('search_preprocess') as $module) {
|
|
$text = module_invoke($module, 'search_preprocess', $text);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update the full-text search index for a particular item.
|
|
*
|
|
* @param $sid
|
|
* A number identifying this particular item (e.g. node id).
|
|
*
|
|
* @param $type
|
|
* A string defining this type of item (e.g. 'node')
|
|
*
|
|
* @param $text
|
|
* The content of this item. Must be a piece of HTML text.
|
|
*
|
|
* @ingroup search
|
|
*/
|
|
function search_index($sid, $type, $text) {
|
|
$minimum_word_size = variable_get('minimum_word_size', 3);
|
|
|
|
// Link matching
|
|
global $base_url;
|
|
$node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
|
|
|
|
// Multipliers for scores of words inside certain HTML tags.
|
|
// Note: 'a' must be included for link ranking to work.
|
|
$tags = array('h1' => 25,
|
|
'h2' => 18,
|
|
'h3' => 15,
|
|
'h4' => 12,
|
|
'h5' => 9,
|
|
'h6' => 6,
|
|
'u' => 3,
|
|
'b' => 3,
|
|
'i' => 3,
|
|
'strong' => 3,
|
|
'em' => 3,
|
|
'a' => 10);
|
|
|
|
// Strip off all ignored tags to speed up processing, but insert space before/after
|
|
// them to keep word boundaries.
|
|
$text = str_replace(array('<', '>'), array(' <', '> '), $text);
|
|
$text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
|
|
|
|
// Split HTML tags from plain text.
|
|
$split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
|
// Note: PHP ensures the array consists of alternating delimiters and literals
|
|
// and begins and ends with a literal (inserting $null as required).
|
|
|
|
$tag = FALSE; // Odd/even counter. Tag or no tag.
|
|
$link = FALSE; // State variable for link analyser
|
|
$score = 1; // Starting score per word
|
|
$accum = ' '; // Accumulator for cleaned up data
|
|
$tagstack = array(); // Stack with open tags
|
|
$tagwords = 0; // Counter for consecutive words
|
|
$focus = 1; // Focus state
|
|
|
|
$results = array(0 => array()); // Accumulator for words for index
|
|
|
|
foreach ($split as $value) {
|
|
if ($tag) {
|
|
// Increase or decrease score per word based on tag
|
|
list($tagname) = explode(' ', $value, 2);
|
|
$tagname = drupal_strtolower($tagname);
|
|
// Closing or opening tag?
|
|
if ($tagname[0] == '/') {
|
|
$tagname = substr($tagname, 1);
|
|
// If we encounter unexpected tags, reset score to avoid incorrect boosting.
|
|
if (!count($tagstack) || $tagstack[0] != $tagname) {
|
|
$tagstack = array();
|
|
$score = 1;
|
|
}
|
|
else {
|
|
// Remove from tag stack and decrement score
|
|
$score = max(1, $score - $tags[array_shift($tagstack)]);
|
|
}
|
|
if ($tagname == 'a') {
|
|
$link = FALSE;
|
|
}
|
|
}
|
|
else {
|
|
if (isset($tagstack[0]) && $tagstack[0] == $tagname) {
|
|
// None of the tags we look for make sense when nested identically.
|
|
// If they are, it's probably broken HTML.
|
|
$tagstack = array();
|
|
$score = 1;
|
|
}
|
|
else {
|
|
// Add to open tag stack and increment score
|
|
array_unshift($tagstack, $tagname);
|
|
$score += $tags[$tagname];
|
|
}
|
|
if ($tagname == 'a') {
|
|
// Check if link points to a node on this site
|
|
if (preg_match($node_regexp, $value, $match)) {
|
|
$path = drupal_get_normal_path($match[1]);
|
|
if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
|
|
$linknid = $match[1];
|
|
if ($linknid > 0) {
|
|
// Note: ignore links to uncachable nodes to avoid redirect bugs.
|
|
$node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
|
|
if (filter_format_allowcache($node->format)) {
|
|
$link = TRUE;
|
|
$linktitle = $node->title;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// A tag change occurred, reset counter.
|
|
$tagwords = 0;
|
|
}
|
|
else {
|
|
// Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
|
|
if ($value != '') {
|
|
if ($link) {
|
|
// Check to see if the node link text is its URL. If so, we use the target node title instead.
|
|
if (preg_match('!^https?://!i', $value)) {
|
|
$value = $linktitle;
|
|
}
|
|
}
|
|
$words = search_index_split($value);
|
|
foreach ($words as $word) {
|
|
// Add word to accumulator
|
|
$accum .= $word .' ';
|
|
$num = is_numeric($word);
|
|
// Check wordlength
|
|
if ($num || drupal_strlen($word) >= $minimum_word_size) {
|
|
// Normalize numbers
|
|
if ($num) {
|
|
$word = (int)ltrim($word, '-0');
|
|
}
|
|
|
|
// Links score mainly for the target.
|
|
if ($link) {
|
|
if (!isset($results[$linknid])) {
|
|
$results[$linknid] = array();
|
|
}
|
|
$results[$linknid][] = $word;
|
|
// Reduce score of the link caption in the source.
|
|
$focus *= 0.2;
|
|
}
|
|
// Fall-through
|
|
if (!isset($results[0][$word])) {
|
|
$results[0][$word] = 0;
|
|
}
|
|
$results[0][$word] += $score * $focus;
|
|
|
|
// Focus is a decaying value in terms of the amount of unique words up to this point.
|
|
// From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
|
|
$focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
|
|
}
|
|
$tagwords++;
|
|
// Too many words inside a single tag probably mean a tag was accidentally left open.
|
|
if (count($tagstack) && $tagwords >= 15) {
|
|
$tagstack = array();
|
|
$score = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
$tag = !$tag;
|
|
}
|
|
|
|
search_wipe($sid, $type, TRUE);
|
|
|
|
// Insert cleaned up data into dataset
|
|
db_query("INSERT INTO {search_dataset} (sid, type, data, reindex) VALUES (%d, '%s', '%s', %d)", $sid, $type, $accum, 0);
|
|
|
|
// Insert results into search index
|
|
foreach ($results[0] as $word => $score) {
|
|
db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score);
|
|
search_dirty($word);
|
|
}
|
|
unset($results[0]);
|
|
|
|
// Get all previous links from this item.
|
|
$result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
|
|
$links = array();
|
|
while ($link = db_fetch_object($result)) {
|
|
$links[$link->nid] = $link->caption;
|
|
}
|
|
|
|
// Now store links to nodes.
|
|
foreach ($results as $nid => $words) {
|
|
$caption = implode(' ', $words);
|
|
if (isset($links[$nid])) {
|
|
if ($links[$nid] != $caption) {
|
|
// Update the existing link and mark the node for reindexing.
|
|
db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid);
|
|
search_touch_node($nid);
|
|
}
|
|
// Unset the link to mark it as processed.
|
|
unset($links[$nid]);
|
|
}
|
|
else {
|
|
// Insert the existing link and mark the node for reindexing.
|
|
db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid);
|
|
search_touch_node($nid);
|
|
}
|
|
}
|
|
// Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing.
|
|
foreach ($links as $nid) {
|
|
db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid);
|
|
search_touch_node($nid);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Change a node's changed timestamp to 'now' to force reindexing.
|
|
*
|
|
* @param $nid
|
|
* The nid of the node that needs reindexing.
|
|
*/
|
|
function search_touch_node($nid) {
|
|
db_query("UPDATE {search_dataset} SET reindex = %d WHERE sid = %d AND type = 'node'", time(), $nid);
|
|
}
|
|
|
|
/**
|
|
* Implementation of hook_nodeapi().
|
|
*/
|
|
function search_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) {
|
|
switch ($op) {
|
|
// Transplant links to a node into the target node.
|
|
case 'update index':
|
|
$result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid);
|
|
$output = array();
|
|
while ($link = db_fetch_object($result)) {
|
|
$output[] = $link->caption;
|
|
}
|
|
return '<a>('. implode(', ', $output) .')</a>';
|
|
// Reindex the node when it is updated. The node is automatically indexed
|
|
// when it is added, simply by being added to the node table.
|
|
case 'update':
|
|
search_touch_node($node->nid);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Implementation of hook_comment().
|
|
*/
|
|
function search_comment($a1, $op) {
|
|
switch ($op) {
|
|
// Reindex the node when comments are added or changed
|
|
case 'insert':
|
|
case 'update':
|
|
case 'delete':
|
|
case 'publish':
|
|
case 'unpublish':
|
|
search_touch_node(is_array($a1) ? $a1['nid'] : $a1->nid);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract a module-specific search option from a search query. e.g. 'type:book'
|
|
*/
|
|
function search_query_extract($keys, $option) {
|
|
if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
|
|
return $matches[2];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return a query with the given module-specific search option inserted in.
|
|
* e.g. 'type:book'.
|
|
*/
|
|
function search_query_insert($keys, $option, $value = '') {
|
|
if (search_query_extract($keys, $option)) {
|
|
$keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
|
|
}
|
|
if ($value != '') {
|
|
$keys .= ' '. $option .':'. $value;
|
|
}
|
|
return $keys;
|
|
}
|
|
|
|
/**
|
|
* Parse a search query into SQL conditions.
|
|
*
|
|
* We build two queries that matches the dataset bodies. @See do_search for
|
|
* more about these.
|
|
*
|
|
* @param $text
|
|
* The search keys.
|
|
* @return
|
|
* A list of six elements.
|
|
* * A series of statements AND'd together which will be used to provide all
|
|
* possible matches.
|
|
* * Arguments for this query part.
|
|
* * A series of exact word matches OR'd together.
|
|
* * Arguments for this query part.
|
|
* * A bool indicating whether this is a simple query or not. Negative
|
|
* terms, presence of both AND / OR make this FALSE.
|
|
* * A bool indicating the presence of a lowercase or. Maybe the user
|
|
* wanted to use OR.
|
|
*/
|
|
function search_parse_query($text) {
|
|
$keys = array('positive' => array(), 'negative' => array());
|
|
|
|
// Tokenize query string
|
|
preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);
|
|
|
|
if (count($matches) < 1) {
|
|
return NULL;
|
|
}
|
|
|
|
// Classify tokens
|
|
$or = FALSE;
|
|
$or_warning = FALSE;
|
|
$simple = TRUE;
|
|
foreach ($matches as $match) {
|
|
$phrase = FALSE;
|
|
// Strip off phrase quotes
|
|
if ($match[2]{0} == '"') {
|
|
$match[2] = substr($match[2], 1, -1);
|
|
$phrase = TRUE;
|
|
$simple = FALSE;
|
|
}
|
|
// Simplify keyword according to indexing rules and external preprocessors
|
|
$words = search_simplify($match[2]);
|
|
// Re-explode in case simplification added more words, except when matching a phrase
|
|
$words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
|
|
// Negative matches
|
|
if ($match[1] == '-') {
|
|
$keys['negative'] = array_merge($keys['negative'], $words);
|
|
}
|
|
// OR operator: instead of a single keyword, we store an array of all
|
|
// OR'd keywords.
|
|
elseif ($match[2] == 'OR' && count($keys['positive'])) {
|
|
$last = array_pop($keys['positive']);
|
|
// Starting a new OR?
|
|
if (!is_array($last)) {
|
|
$last = array($last);
|
|
}
|
|
$keys['positive'][] = $last;
|
|
$or = TRUE;
|
|
continue;
|
|
}
|
|
// Plain keyword
|
|
else {
|
|
if ($match[2] == 'or') {
|
|
$or_warning = TRUE;
|
|
}
|
|
if ($or) {
|
|
// Add to last element (which is an array)
|
|
$keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words);
|
|
}
|
|
else {
|
|
$keys['positive'] = array_merge($keys['positive'], $words);
|
|
}
|
|
}
|
|
$or = FALSE;
|
|
}
|
|
|
|
// Convert keywords into SQL statements.
|
|
$query = array();
|
|
$query2 = array();
|
|
$arguments = array();
|
|
$arguments2 = array();
|
|
$matches = 0;
|
|
$simple_and = FALSE;
|
|
$simple_or = FALSE;
|
|
// Positive matches
|
|
foreach ($keys['positive'] as $key) {
|
|
// Group of ORed terms
|
|
if (is_array($key) && count($key)) {
|
|
$simple_or = TRUE;
|
|
$queryor = array();
|
|
$any = FALSE;
|
|
foreach ($key as $or) {
|
|
list($q, $count) = _search_parse_query($or, $arguments2);
|
|
$any |= $count;
|
|
if ($q) {
|
|
$queryor[] = $q;
|
|
$arguments[] = $or;
|
|
}
|
|
}
|
|
if (count($queryor)) {
|
|
$query[] = '('. implode(' OR ', $queryor) .')';
|
|
// A group of OR keywords only needs to match once
|
|
$matches += ($any > 0);
|
|
}
|
|
}
|
|
// Single ANDed term
|
|
else {
|
|
$simple_and = TRUE;
|
|
list($q, $count) = _search_parse_query($key, $arguments2);
|
|
if ($q) {
|
|
$query[] = $q;
|
|
$arguments[] = $key;
|
|
// Each AND keyword needs to match at least once
|
|
$matches += $count;
|
|
}
|
|
}
|
|
}
|
|
if ($simple_and && $simple_or) {
|
|
$simple = FALSE;
|
|
}
|
|
// Negative matches
|
|
foreach ($keys['negative'] as $key) {
|
|
list($q) = _search_parse_query($key, $arguments2, TRUE);
|
|
if ($q) {
|
|
$query[] = $q;
|
|
$arguments[] = $key;
|
|
$simple = FALSE;
|
|
}
|
|
}
|
|
$query = implode(' AND ', $query);
|
|
|
|
// Build word-index conditions for the first pass
|
|
$query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4);
|
|
|
|
return array($query, $arguments, $query2, $arguments2, $matches, $simple, $or_warning);
|
|
}
|
|
|
|
/**
|
|
* Helper function for search_parse_query();
|
|
*/
|
|
function _search_parse_query(&$word, &$scores, $not = FALSE) {
|
|
$count = 0;
|
|
// Determine the scorewords of this word/phrase
|
|
if (!$not) {
|
|
$split = explode(' ', $word);
|
|
foreach ($split as $s) {
|
|
$num = is_numeric($s);
|
|
if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
|
|
$s = $num ? ((int)ltrim($s, '-0')) : $s;
|
|
if (!isset($scores[$s])) {
|
|
$scores[$s] = $s;
|
|
$count++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Return matching snippet and number of added words
|
|
return array("d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'", $count);
|
|
}
|
|
|
|
/**
|
|
* Do a query on the full-text search index for a word or words.
|
|
*
|
|
* This function is normally only called by each module that support the
|
|
* indexed search (and thus, implements hook_update_index()).
|
|
*
|
|
* Results are retrieved in two logical passes. However, the two passes are
|
|
* joined together into a single query. And in the case of most simple
|
|
* queries the second pass is not even used.
|
|
*
|
|
* The first pass selects a set of all possible matches, which has the benefit
|
|
* of also providing the exact result set for simple "AND" or "OR" searches.
|
|
*
|
|
* The second portion of the query further refines this set by verifying
|
|
* advanced text conditions (such negative or phrase matches)
|
|
*
|
|
* @param $keywords
|
|
* A search string as entered by the user.
|
|
*
|
|
* @param $type
|
|
* A string identifying the calling module.
|
|
*
|
|
* @param $join1
|
|
* (optional) Inserted into the JOIN part of the first SQL query.
|
|
* For example "INNER JOIN {node} n ON n.nid = i.sid".
|
|
*
|
|
* @param $where1
|
|
* (optional) Inserted into the WHERE part of the first SQL query.
|
|
* For example "(n.status > %d)".
|
|
*
|
|
* @param $arguments1
|
|
* (optional) Extra SQL arguments belonging to the first query.
|
|
*
|
|
* @param $columns2
|
|
* (optional) Inserted into the SELECT pat of the second query. Must contain
|
|
* a column selected as 'score'.
|
|
* defaults to 'i.relevance AS score'
|
|
*
|
|
* @param $join2
|
|
* (optional) Inserted into the JOIN par of the second SQL query.
|
|
* For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
|
|
*
|
|
* @param $arguments2
|
|
* (optional) Extra SQL arguments belonging to the second query parameter.
|
|
*
|
|
* @param $sort_parameters
|
|
* (optional) SQL arguments for sorting the final results.
|
|
* Default: 'ORDER BY score DESC'
|
|
*
|
|
* @return
|
|
* An array of SIDs for the search results.
|
|
*
|
|
* @ingroup search
|
|
*/
|
|
function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $columns2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') {
|
|
$query = search_parse_query($keywords);
|
|
|
|
if ($query[2] == '') {
|
|
form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3))));
|
|
}
|
|
if ($query[6]) {
|
|
form_set_error('keys', t('Try uppercase "OR" to search for either of two terms.'));
|
|
}
|
|
if ($query === NULL || $query[0] == '' || $query[2] == '') {
|
|
return array();
|
|
}
|
|
|
|
// Build query for keyword normalization.
|
|
$conditions = "$where1 AND ($query[2]) AND i.type = '%s'";
|
|
$arguments1 = array_merge($arguments1, $query[3], array($type));
|
|
$join = "INNER JOIN {search_total} t ON i.word = t.word $join1";
|
|
if (!$query[5]) {
|
|
$conditions .= " AND ($query[0])";
|
|
$arguments1 = array_merge($arguments1, $query[1]);
|
|
$join .= " INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type";
|
|
}
|
|
|
|
// Calculate maximum keyword relevance, to normalize it.
|
|
$select = "SELECT MAX(i.score * t.count) FROM {search_index} i $join WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d";
|
|
$arguments = array_merge($arguments1, array($query[4]));
|
|
$normalize = db_result(db_query($select, $arguments));
|
|
if (!$normalize) {
|
|
return array();
|
|
}
|
|
$columns2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * SUM(i.score * t.count))', $columns2);
|
|
|
|
// Build query to retrieve results.
|
|
$select = "SELECT i.type, i.sid, $columns2 FROM {search_index} i $join $join2 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d";
|
|
$count_select = "SELECT COUNT(*) FROM ($select) n1";
|
|
$arguments = array_merge($arguments2, $arguments1, array($query[4]));
|
|
|
|
// Do actual search query
|
|
$result = pager_query("$select $sort_parameters", 10, 0, $count_select, $arguments);
|
|
$results = array();
|
|
while ($item = db_fetch_object($result)) {
|
|
$results[] = $item;
|
|
}
|
|
return $results;
|
|
}
|
|
|
|
/**
|
|
* Helper function for grabbing search keys.
|
|
*/
|
|
function search_get_keys() {
|
|
static $return;
|
|
if (!isset($return)) {
|
|
// Extract keys as remainder of path
|
|
// Note: support old GET format of searches for existing links.
|
|
$path = explode('/', $_GET['q'], 3);
|
|
$keys = empty($_REQUEST['keys']) ? '' : $_REQUEST['keys'];
|
|
$return = count($path) == 3 ? $path[2] : $keys;
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
/**
|
|
* @defgroup search Search interface
|
|
* @{
|
|
* The Drupal search interface manages a global search mechanism.
|
|
*
|
|
* Modules may plug into this system to provide searches of different types of
|
|
* data. Most of the system is handled by search.module, so this must be enabled
|
|
* for all of the search features to work.
|
|
*
|
|
* There are three ways to interact with the search system:
|
|
* - Specifically for searching nodes, you can implement nodeapi('update index')
|
|
* and nodeapi('search result'). However, note that the search system already
|
|
* indexes all visible output of a node, i.e. everything displayed normally
|
|
* by hook_view() and hook_nodeapi('view'). This is usually sufficient.
|
|
* You should only use this mechanism if you want additional, non-visible data
|
|
* to be indexed.
|
|
* - Implement hook_search(). This will create a search tab for your module on
|
|
* the /search page with a simple keyword search form. You may optionally
|
|
* implement hook_search_item() to customize the display of your results.
|
|
* - Implement hook_update_index(). This allows your module to use Drupal's
|
|
* HTML indexing mechanism for searching full text efficiently.
|
|
*
|
|
* If your module needs to provide a more complicated search form, then you need
|
|
* to implement it yourself without hook_search(). In that case, you should
|
|
* define it as a local task (tab) under the /search page (e.g. /search/mymodule)
|
|
* so that users can easily find it.
|
|
*/
|
|
|
|
/**
|
|
* Render a search form.
|
|
*
|
|
* @param $action
|
|
* Form action. Defaults to "search".
|
|
* @param $keys
|
|
* The search string entered by the user, containing keywords for the search.
|
|
* @param $type
|
|
* The type of search to render the node for. Must be the name of module
|
|
* which implements hook_search(). Defaults to 'node'.
|
|
* @param $prompt
|
|
* A piece of text to put before the form (e.g. "Enter your keywords")
|
|
* @return
|
|
* An HTML string containing the search form.
|
|
*/
|
|
function search_form(&$form_state, $action = '', $keys = '', $type = NULL, $prompt = NULL) {
|
|
|
|
// Add CSS
|
|
drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE);
|
|
|
|
if (!$action) {
|
|
$action = url('search/'. $type);
|
|
}
|
|
if (is_null($prompt)) {
|
|
$prompt = t('Enter your keywords');
|
|
}
|
|
|
|
$form = array(
|
|
'#action' => $action,
|
|
'#attributes' => array('class' => 'search-form'),
|
|
);
|
|
$form['module'] = array('#type' => 'value', '#value' => $type);
|
|
$form['basic'] = array('#type' => 'item', '#title' => $prompt);
|
|
$form['basic']['inline'] = array('#prefix' => '<div class="container-inline">', '#suffix' => '</div>');
|
|
$form['basic']['inline']['keys'] = array(
|
|
'#type' => 'textfield',
|
|
'#title' => '',
|
|
'#default_value' => $keys,
|
|
'#size' => $prompt ? 40 : 20,
|
|
'#maxlength' => 255,
|
|
);
|
|
// processed_keys is used to coordinate keyword passing between other forms
|
|
// that hook into the basic search form.
|
|
$form['basic']['inline']['processed_keys'] = array('#type' => 'value', '#value' => array());
|
|
$form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search'));
|
|
|
|
return $form;
|
|
}
|
|
|
|
/**
|
|
* Form builder; Output a search form for the search block and the theme's search box.
|
|
*
|
|
* @ingroup forms
|
|
* @see search_box_form_submit().
|
|
* @see theme_search_box_form().
|
|
*/
|
|
function search_box(&$form_state, $form_id) {
|
|
$form[$form_id] = array(
|
|
'#title' => t('Search this site'),
|
|
'#type' => 'textfield',
|
|
'#size' => 15,
|
|
'#default_value' => '',
|
|
'#attributes' => array('title' => t('Enter the terms you wish to search for.')),
|
|
);
|
|
$form['submit'] = array('#type' => 'submit', '#value' => t('Search'));
|
|
$form['#submit'][] = 'search_box_form_submit';
|
|
$form['#validate'][] = 'search_box_form_validate';
|
|
|
|
return $form;
|
|
}
|
|
|
|
/**
|
|
* Process a block search form submission.
|
|
*/
|
|
function search_box_form_submit($form, &$form_state) {
|
|
$form_id = $form['form_id']['#value'];
|
|
$form_state['redirect'] = 'search/node/'. trim($form_state['values'][$form_id]);
|
|
}
|
|
|
|
/**
|
|
* Process variables for search-theme-form.tpl.php.
|
|
*
|
|
* The $variables array contains the following arguments:
|
|
* - $form
|
|
*
|
|
* @see search-theme-form.tpl.php
|
|
*/
|
|
function template_preprocess_search_theme_form(&$variables) {
|
|
$variables['search'] = array();
|
|
$hidden = array();
|
|
// Provide variables named after form keys so themers can print each element independently.
|
|
foreach (element_children($variables['form']) as $key) {
|
|
$type = $variables['form'][$key]['#type'];
|
|
if ($type == 'hidden' || $type == 'token') {
|
|
$hidden[] = drupal_render($variables['form'][$key]);
|
|
}
|
|
else {
|
|
$variables['search'][$key] = drupal_render($variables['form'][$key]);
|
|
}
|
|
}
|
|
// Hidden form elements have no value to themers. No need for separation.
|
|
$variables['search']['hidden'] = implode($hidden);
|
|
// Collect all form elements to make it easier to print the whole form.
|
|
$variables['search_form'] = implode($variables['search']);
|
|
}
|
|
|
|
/**
|
|
* Process variables for search-block-form.tpl.php.
|
|
*
|
|
* The $variables array contains the following arguments:
|
|
* - $form
|
|
*
|
|
* @see search-block-form.tpl.php
|
|
*/
|
|
function template_preprocess_search_block_form(&$variables) {
|
|
$variables['search'] = array();
|
|
$hidden = array();
|
|
// Provide variables named after form keys so themers can print each element independently.
|
|
foreach (element_children($variables['form']) as $key) {
|
|
$type = $variables['form'][$key]['#type'];
|
|
if ($type == 'hidden' || $type == 'token') {
|
|
$hidden[] = drupal_render($variables['form'][$key]);
|
|
}
|
|
else {
|
|
$variables['search'][$key] = drupal_render($variables['form'][$key]);
|
|
}
|
|
}
|
|
// Hidden form elements have no value to themers. No need for separation.
|
|
$variables['search']['hidden'] = implode($hidden);
|
|
// Collect all form elements to make it easier to print the whole form.
|
|
$variables['search_form'] = implode($variables['search']);
|
|
}
|
|
|
|
/**
|
|
* Perform a standard search on the given keys, and return the formatted results.
|
|
*/
|
|
function search_data($keys = NULL, $type = 'node') {
|
|
|
|
if (isset($keys)) {
|
|
if (module_hook($type, 'search')) {
|
|
$results = module_invoke($type, 'search', 'search', $keys);
|
|
if (isset($results) && is_array($results) && count($results)) {
|
|
if (module_hook($type, 'search_page')) {
|
|
return module_invoke($type, 'search_page', $results);
|
|
}
|
|
else {
|
|
return theme('search_results', $results, $type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns snippets from a piece of text, with certain keywords highlighted.
|
|
* Used for formatting search results.
|
|
*
|
|
* @param $keys
|
|
* A string containing a search query.
|
|
*
|
|
* @param $text
|
|
* The text to extract fragments from.
|
|
*
|
|
* @return
|
|
* A string containing HTML for the excerpt.
|
|
*/
|
|
function search_excerpt($keys, $text) {
|
|
// We highlight around non-indexable or CJK characters.
|
|
$boundary = '(?:(?<=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .'])|(?=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .']))';
|
|
|
|
// Extract positive keywords and phrases
|
|
preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' '. $keys, $matches);
|
|
$keys = array_merge($matches[2], $matches[3]);
|
|
|
|
// Prepare text
|
|
$text = ' '. strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)) .' ';
|
|
array_walk($keys, '_search_excerpt_replace');
|
|
$workkeys = $keys;
|
|
|
|
// Extract a fragment per keyword for at most 4 keywords.
|
|
// First we collect ranges of text around each keyword, starting/ending
|
|
// at spaces.
|
|
// If the sum of all fragments is too short, we look for second occurrences.
|
|
$ranges = array();
|
|
$included = array();
|
|
$length = 0;
|
|
while ($length < 256 && count($workkeys)) {
|
|
foreach ($workkeys as $k => $key) {
|
|
if (strlen($key) == 0) {
|
|
unset($workkeys[$k]);
|
|
unset($keys[$k]);
|
|
continue;
|
|
}
|
|
if ($length >= 256) {
|
|
break;
|
|
}
|
|
// Remember occurrence of key so we can skip over it if more occurrences
|
|
// are desired.
|
|
if (!isset($included[$key])) {
|
|
$included[$key] = 0;
|
|
}
|
|
// Locate a keyword (position $p), then locate a space in front (position
|
|
// $q) and behind it (position $s)
|
|
if (preg_match('/'. $boundary . $key . $boundary .'/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
|
|
$p = $match[0][1];
|
|
if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) {
|
|
$end = substr($text, $p, 80);
|
|
if (($s = strrpos($end, ' ')) !== FALSE) {
|
|
$ranges[$q] = $p + $s;
|
|
$length += $p + $s - $q;
|
|
$included[$key] = $p + 1;
|
|
}
|
|
else {
|
|
unset($workkeys[$k]);
|
|
}
|
|
}
|
|
else {
|
|
unset($workkeys[$k]);
|
|
}
|
|
}
|
|
else {
|
|
unset($workkeys[$k]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we didn't find anything, return the beginning.
|
|
if (count($ranges) == 0) {
|
|
return truncate_utf8($text, 256) .' ...';
|
|
}
|
|
|
|
// Sort the text ranges by starting position.
|
|
ksort($ranges);
|
|
|
|
// Now we collapse overlapping text ranges into one. The sorting makes it O(n).
|
|
$newranges = array();
|
|
foreach ($ranges as $from2 => $to2) {
|
|
if (!isset($from1)) {
|
|
$from1 = $from2;
|
|
$to1 = $to2;
|
|
continue;
|
|
}
|
|
if ($from2 <= $to1) {
|
|
$to1 = max($to1, $to2);
|
|
}
|
|
else {
|
|
$newranges[$from1] = $to1;
|
|
$from1 = $from2;
|
|
$to1 = $to2;
|
|
}
|
|
}
|
|
$newranges[$from1] = $to1;
|
|
|
|
// Fetch text
|
|
$out = array();
|
|
foreach ($newranges as $from => $to) {
|
|
$out[] = substr($text, $from, $to - $from);
|
|
}
|
|
$text = (isset($newranges[0]) ? '' : '... ') . implode(' ... ', $out) .' ...';
|
|
|
|
// Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
|
|
$text = preg_replace('/'. $boundary .'('. implode('|', $keys) .')'. $boundary .'/iu', '<strong>\0</strong>', $text);
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* @} End of "defgroup search".
|
|
*/
|
|
|
|
/**
|
|
* Helper function for array_walk in search_except.
|
|
*/
|
|
function _search_excerpt_replace(&$text) {
|
|
$text = preg_quote($text, '/');
|
|
}
|
|
|
|
function search_forms() {
|
|
$forms['search_theme_form']= array(
|
|
'callback' => 'search_box',
|
|
'callback arguments' => array('search_theme_form'),
|
|
);
|
|
$forms['search_block_form']= array(
|
|
'callback' => 'search_box',
|
|
'callback arguments' => array('search_block_form'),
|
|
);
|
|
return $forms;
|
|
}
|