drupal/modules/search.module

<?php
// $Id$

/**
 * @file
 * Enables site-wide keyword searching.
 */

/**
 * Matches Unicode character classes to exclude from the search index.
 *
 * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
 *
 * The index only contains the following character classes:
 * Lu     Letter, Uppercase
 * Ll     Letter, Lowercase
 * Lt     Letter, Titlecase
 * Lm     Letter, Modifier
 * Lo     Letter, Other
 * Mn     Mark, Nonspacing
 * Mc     Mark, Spacing Combining
 * Nd     Number, Decimal Digit
 * Nl     Number, Letter
 * No     Number, Other
 * Sm     Symbol, Math
 * Sc     Symbol, Currency
 * Sk     Symbol, Modifier
 * So     Symbol, Other
 *
 * All character classes not in the list above (enclosing marks, punctuation, control codes and spacers):
 * 'Me', 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Cs', 'Co'
 */
define('PREG_CLASS_SEARCH_EXCLUDE', '\x{0}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{7f}-\x{a1}\x{ab}\x{ad}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{488}\x{489}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{600}-\x{603}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{6dd}\x{6de}\x{700}-\x{70d}\x{70f}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17b4}\x{17b5}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{180e}\x{1944}\x{1945}\x{2000}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{205f}-\x{2063}\x{206a}-\x{206f}\x{207d}\x{207e}\x{208d}\x{208e}\x{20dd}-\x{20e0}\x{20e2}-\x{20e4}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3000}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{d800}\x{db7f}\x{db80}\x{dbff}\x{dc00}\x{dfff}\x{e000}\x{f8ff}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{feff}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{fff9}-\x{fffb}\x{10100}\x{10101}\x{1039f}\x{1d173}-\x{1d17a}\x{e0001}\x{e0020}-\x{e007f}\x{f0000}\x{ffffd}\x{100000}');

/**
 * Matches all 'N' Unicode character classes (numbers)
 */
define('PREG_CLASS_NUMBERS', '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}\x{10107}-\x{10133}\x{10320}-\x{10323}\x{1034a}\x{104a0}-\x{104a9}\x{1d7ce}-\x{1d7ff}');

/**
 * Matches all 'P' Unicode character classes (punctuation)
 */
define('PREG_CLASS_PUNCTUATION', '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{10100}\x{10101}\x{1039f}');

/**
 * Implementation of hook_help().
 */
function search_help($section = 'admin/help#search') {
  switch ($section) {
    case 'admin/modules#description':
      return t('Enables site-wide keyword searching.');
    case 'admin/settings/search':
      return t('
<p>The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that the search requires cron to be set up correctly.</p>
');
    case 'search#noresults':
      return t('<p><ul>
<li>Check if your spelling is correct.</li>
<li>Try using wildcards: <em>walk*</em> matches <em>walker</em>, <em>walking</em>, ...</li>
<li>Use longer words (words shorter than %number letters are ignored).</li>
</ul></p>', array('%number' => variable_get('minimum_word_size', 3)));
  }
}

/**
 * Implementation of hook_perm().
 */
function search_perm() {
  return array('search content', 'administer search');
}

/**
 * Implementation of hook_block().
 */
function search_block($op = 'list', $delta = 0) {
  global $user;
  if ($op == 'list') {
    $blocks[0]['info'] = t('Search form');
    return $blocks;
  }
  else if ($op == 'view' && user_access('search content') && arg(0) != 'search') {
    $block['content'] = search_form('', '', null, '');
    $block['subject'] = t('Search');
    return $block;
  }
}

/**
 * Implementation of hook_menu().
 */
function search_menu($may_cache) {
  $items = array();

  if ($may_cache) {
    $items[] = array('path' => 'search', 'title' => t('search'),
      'callback' => 'search_view',
      'access' => user_access('search content'),
      'type' => MENU_SUGGESTED_ITEM);

    $items[] = array('path' => 'admin/settings/search', 'title' => t('search'),
      'callback' => 'search_admin',
      'type' => MENU_NORMAL_ITEM,
      'access' => user_access('administer site configuration'));
  }
  else if (arg(0) == 'search') {
    // To remember the user's search keywords when switching across tabs,
    // we dynamically add the keywords to the search tabs' paths.
    $keys = search_get_keys();
    $keys = strlen($keys) ? '/'. $keys : '';
    foreach (module_list() as $name) {
      if (module_hook($name, 'search') && $title = module_invoke($name, 'search', 'name')) {
        $items[] = array('path' => 'search/'. $name . $keys, 'title' => $title,
          'callback' => 'search_view',
          'access' => user_access('search content'),
          'type' => MENU_LOCAL_TASK);
      }
    }
  }

  return $items;
}


/**
 * Menu callback; displays the search module settings page.
 */
function search_admin() {
  if ($_POST) {
    // If the word length settings change, the index needs to be rebuilt.
    if (variable_get('minimum_word_size', 3) != $_POST['edit']['minimum_word_size']) {
      drupal_set_message(t('The index will be rebuilt.'));
      search_wipe();
      system_settings_save();
    }
    else {
      system_settings_save();
    }
  }

  // Collect some stats
  $remaining = 0;
  $total = 0;
  foreach (module_list() as $module) {
    if (module_hook($module, 'search')) {
      $status = module_invoke($module, 'search', 'status');
      $remaining += $status['remaining'];
      $total += $status['total'];
    }
  }
  $count = format_plural($remaining, 'There is 1 item left to index.', 'There are %count items left to index.');
  $percentage = ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) . '%';
  $status = '<p><strong>'. t('%percentage of the site has been indexed.', array('%percentage' => $percentage)) .' '. $count .'</strong></p>';
  $output = form_group('Indexing status', $status);

  // Indexing throttle:
  $items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500));
  $group = form_select(t('Items to index per cron run'), 'search_cron_limit', variable_get('search_cron_limit', 100), $items, t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
  $output .= form_group(t('Indexing throttle'), $group);
  // Indexing settings:
  $group = '<em>'. t('<p>Changing the setting below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>';
  $group .= form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 3), 5, 3, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
  $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 3), 5, 3, t('The number of characters a word has to be to be searched for, including wildcard characters.'));
  $output .= form_group(t('Indexing settings'), $group);

  return system_settings_form($output);
}

/**
 * Wipes a part of or the entire search index.
 *
 * @param $sid
 *  (optional) The SID of the item to wipe. If specified, $type must be passed
 *  too.
 * @param $type
 *  (optional) The type of item to wipe.
 */
function search_wipe($sid = NULL, $type = NULL) {
  if ($type == NULL && $sid == NULL) {
    module_invoke_all('search', 'reset');
  }
  else {
    db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
    db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
  }
}

/**
 * Marks a word as dirty (or retrieves the list of dirty words). This is used
 * during indexing (cron). Words which are dirty have outdated total counts in
 * the search_total table, and need to be recounted.
 */
function search_dirty($word = null) {
  static $dirty = array();
  if ($word !== null) {
    $dirty[$word] = true;
  }
  else {
    return $dirty;
  }
}

/**
 * Implementation of hook_cron().
 *
 * Fires hook_update_index() in all modules and cleans up dirty words (see
 * search_dirty).
 */
function search_cron() {
  // Update word index
  foreach (module_list() as $module) {
    module_invoke($module, 'update_index');
  }
  // Update word counts for new/changed words
  foreach (search_dirty() as $word => $dummy) {
    $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
    db_query("UPDATE {search_total} SET count = %d WHERE word = '%s'", $total, $word);
    if (!db_affected_rows()) {
      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total);
    }
  }
  // Find words that were deleted from search_index, but are still in
  // search_total. We use a LEFT JOIN between the two tables and keep only the
  // rows which fail to join.
  $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
  while ($word = db_fetch_object($result)) {
    db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
  }
}

/**
 * Splits a string into component words according to indexing rules.
 */
function search_keywords_split($text) {
  static $last = null;
  static $lastsplit = null;

  if ($last == $text) {
    return $lastsplit;
  }

  // Decode entities to UTF-8
  $text = decode_entities($text);

  // Call an external processor for word handling.
  search_preprocess($text);

  // To improve searching for numerical data such as dates, IP addresses
  // or version numbers, we consider a group of numerical characters
  // separated only by punctuation characters to be one piece.
  // This also means that searching for e.g. '20/03/1984' also returns
  // results with '20-03-1984' in them.
  // Readable regexp: ([number]+)[punctuation]+(?=[number])
  $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);

  // The dot, underscore and dash are simply removed. This allows meaningful
  // search behaviour with acronyms and URLs.
  $text = preg_replace('/[._-]+/', '', $text);

  // With the exception of the rules above, we consider all punctuation,
  // marks, spacers, etc, to be a word boundary.
  $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);

  // Process words
  $words = explode(' ', $text);
  array_walk($words, '_search_keywords_truncate');

  // Save last keyword result
  $last = $text;
  $lastsplit = $words;

  return $words;
}

/**
 * Helper function for array_walk in search_keywords_split.
 */
function _search_keywords_truncate(&$text) {
  $text = truncate_utf8($text, 50);
}

/**
 * Loosens up a set of search keywords by adding wildcards, if possible.
 *
 * @param $text
 *   The keywords as entered by the user.
 * @return
 *   If more wildcards can be added, the adjusted keywords are returned.
 *   If the query is already as loose as possible, NULL is returned.
 */
function search_keywords_variation($text) {
  $text = trim($text);
  $new = preg_replace('/\*+/', '*', '*'. implode('* *', explode(' ', trim($text))) .'*');
  return ($new != $text) ? $new : NULL;
}

/**
 * Invokes hook_search_preprocess() in modules.
 */
function search_preprocess(&$text) {
  foreach (module_implements('search_preprocess') as $module) {
    $text = module_invoke($module, 'search_preprocess', $text);
  }
}


/**
 * Update the full-text search index for a particular item.
 *
 * @param $sid
 *   A number identifying this particular item (e.g. node id).
 *
 * @param $type
 *   A string defining this type of item (e.g. 'node')
 *
 * @param $text
 *   The content of this item. Must be a piece of HTML text.
 *
 * @ingroup search
 */
function search_index($sid, $type, $text) {
  $minimum_word_size = variable_get('minimum_word_size', 3);

  global $base_url;
  $node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';

  // Multipliers for scores of words inside certain HTML tags.
  // Note: 'a' must be included for link ranking to work.
  $tags = array('h1' => 21,
                'h2' => 18,
                'h3' => 15,
                'h4' => 12,
                'h5' => 9,
                'h6' => 6,
                'u' => 5,
                'b' => 5,
                'strong' => 5,
                'em' => 5,
                'a' => 10);

  // Strip off all ignored tags to speed up processing, but insert space before/after
  // them to keep word boundaries.
  $text = str_replace(array('<', '>'), array(' <', '> '), $text);
  $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');

  // Split HTML tags from plain text.
  $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting $null as required).

  $tag = false; // Odd/even counter. Tag or no tag.
  $link = false; // State variable for link analyser
  $score = 1; // Starting score per word

  $results = array(0 => array());

  foreach ($split as $value) {
    if ($tag) {
      // Increase or decrease score per word based on tag
      list($tagname) = explode(' ', $value, 2);
      $tagname = strtolower($tagname);
      if ($tagname{0} == '/') {
        $score -= $tags[substr($tagname, 1)];
        if ($score < 1) { // possible due to bad HTML
          $score = 1;
        }
        if ($tagname == '/a') {
          $link = false;
        }
      }
      else {
        if ($tagname == 'a') {
          // Check if link points to a node on this site
          if (preg_match($node_regexp, $value, $match)) {
            $path = drupal_get_normal_path($match[1]);
            if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
              $linknid = $match[1];
              if ($linknid > 0) {
                $link = true;
              }
            }
          }
        }
        $score += $tags[$tagname];
      }
    }
    else {
      // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
      if ($value != '') {
        $words = search_keywords_split($value);
        foreach ($words as $word) {
          // Check wordlength
          if (string_length($word) >= $minimum_word_size) {
            // Note: strtolower can be used because the value is only used internally.
            $word = strtolower($word);
            if ($link) {
              if (!isset($results[$linknid])) {
                $results[$linknid] = array();
              }
              $results[$linknid][$word] += $score;
            }
            else {
              $results[0][$word] += $score;
            }
          }
        }
      }
    }
    $tag = !$tag;
  }

  search_wipe($sid, $type);

  // Insert results into search index
  foreach ($results[0] as $word => $score) {
    db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
    search_dirty($word);
  }
  unset($results[0]);

  // Now insert links to nodes
  foreach ($results as $nid => $words) {
    foreach ($words as $word => $score) {
      db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %d)", $word, $nid, 'node', $sid, $type, $score);
      search_dirty($word);
    }
  }
}

/**
 * Do a query on the full-text search index for a word or words.
 *
 * This function is normally only called by each module that support the
 * indexed search (and thus, implements hook_update_index()).
 *
 * The final query is an SQL select on the search_index table. As a guide for
 * writing the optional extra SQL fragments (see below), use this query:
 *
 * SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score
 * FROM {search_index} i
 * $join INNER JOIN {search_total} t ON i.word = t.word
 * WHERE $where AND (i.word = '...' OR ...)
 * GROUP BY i.type, i.sid
 * ORDER BY score DESC";
 *
 * @param $keywords
 *   A search string as entered by the user.
 *
 * @param $type
 *   A string identifying the calling module.
 *
 * @param $join
 *   (optional) A string to be inserted into the JOIN part of the SQL query.
 *   For example "INNER JOIN {node} n ON n.nid = i.sid".
 *
 * @param $where
 *   (optional) A string to be inserted into the WHERE part of the SQL query.
 *   For example "(n.status > 0)".
 *
 * @param $variation
 *   Used internally. Must not be specified.
 *
 * @return
 *   An array of SIDs for the search results.
 *
 * @ingroup search
 */
function do_search($keywords, $type, $join = '', $where = '1', $variation = true) {
  // Note, we replace the wildcards with U+FFFD (Replacement character) to pass
  // through the keyword extractor. Multiple wildcards are collapsed into one.
  $keys = preg_replace('!\*+!', '<27>', $keywords);

  // Split into words
  $keys = search_keywords_split($keys);

  $words = array();
  $arguments = array();
  $refused = array();
  // Build WHERE clause
  foreach ($keys as $word) {
    if (string_length($word) < variable_get('remove_short', 3)) {
      if ($word != '') {
        $refused[] = str_replace('<27>', '*', $word);
      }
      continue;
    }
    if (strpos($word, '<27>') !== false) {
      // Note: strtolower can be used because the value is only used internally.
      $words[] = "i.word LIKE '%s'";
      $arguments[] = str_replace('<27>', '%', strtolower($word));
    }
    else {
      $words[] = "i.word = '%s'";
      $arguments[] = strtolower($word);
    }
  }
  // Tell the user which words were excluded
  if (count($refused) && $variation) {
    $message = format_plural(count($refused),
                             'The word %words was not included because it is too short.',
                             'The words %words were not included because they were too short.');
    drupal_set_message(strtr($message, array('%words' => theme('placeholder', implode(', ', $refused)))));
  }

  if (count($words) == 0) {
    return array();
  }
  $conditions = $where .' AND ('. implode(' OR ', $words) .')';

  // Get result count (for pager)
  $count = db_num_rows(db_query("SELECT DISTINCT i.sid, i.type FROM {search_index} i $join WHERE $conditions", $arguments));
  if ($count == 0) {
    // Try out a looser search query if nothing was found.
    if ($variation && $loose = search_keywords_variation($keywords)) {
      return do_search($loose, $type, $join, $where, false);
    }
    else {
      return array();
    }
  }
  $count_query = "SELECT $count";

  // Do pager query
  $query = "SELECT i.type, i.sid, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $conditions GROUP BY i.type, i.sid ORDER BY score DESC";
  $result = pager_query($query, 15, 0, $count_query, $arguments);

  $results = array();
  while ($item = db_fetch_object($result)) {
    $results[] = $item->sid;
  }

  return $results;
}

/**
 * Helper function for grabbing search keys.
 */
function search_get_keys() {
  // Extract keys as remainder of path
  // Note: support old GET format of searches for existing links.
  $path = explode('/', $_GET['q'], 3);
  return count($path) == 3 ? $path[2] : $_REQUEST['keys'];
}

/**
 * Menu callback; presents the search form and/or search results.
 */
function search_view() {
  $type = arg(1);

  // Search form submits with POST but redirects to GET. This way we can keep
  // the search query URL clean as a whistle:
  // search/type/keyword+keyword
  if ($_POST['edit']['keys']) {
    if ($type == '') {
      $type = 'node';
    }
    drupal_goto('search/'. urlencode($type) .'/'. urlencode($_POST['edit']['keys']));
  }
  else if ($type == '') {
    // Note: search/node can not be a default tab because it would take on the
    // path of its parent (search). It would prevent remembering keywords when
    // switching tabs. This is why we drupal_goto to it from the parent instead.
    drupal_goto('search/node');
  }
  $keys = search_get_keys();

  if (user_access('search content')) {
    // Only perform search if there is non-whitespace search term:
    if (trim($keys)) {
      // Log the search keys:
      watchdog('search',
        t('Search: %keys (%type).', array('%keys' => theme('placeholder', $keys), '%type' => module_invoke($type, 'search', 'name'))),
        WATCHDOG_NOTICE,
        l(t('results'), 'search/'. urlencode($type) .'/'. urlencode($keys))
        );

      // Collect the search results:
      $results = search_data($keys, $type);

      if ($results) {
        $results = theme('box', t('Search results'), $results);
      }
      else {
        $results = theme('box', t('Your search yielded no results'), search_help('search#noresults'));
      }
    }
    else if (isset($_POST['edit'])) {
      form_set_error('keys', t('Please enter some keywords.'));
    }

    // Construct the search form.
    // Note, we do this last because of the form_set_error() above.
    $output = search_form(NULL, $keys, $type);

    $output .= $results;

    return $output;
  }
  else {
    drupal_access_denied();
  }
}

/**
 * @defgroup search Search interface
 * @{
 * The Drupal search interface manages a global search mechanism.
 *
 * Modules may plug into this system to provide searches of different types of
 * data. Most of the system is handled by search.module, so this must be enabled
 * for all of the search features to work.
 *
 * There are three ways to interact with the search system:
 * - Specifically for searching nodes, you can implement nodeapi('update index')
 *   and nodeapi('search result'). However, note that the search system already
 *   indexes all visible output of a node, i.e. everything displayed normally
 *   by hook_view() and hook_nodeapi('view'). This is usually sufficient.
 *   You should only use this mechanism if you want additional, non-visible data
 *   to be indexed.
 * - Implement hook_search(). This will create a search tab for your module on
 *   the /search page with a simple keyword search form. You may optionally
 *   implement hook_search_item() to customize the display of your results.
 * - Implement hook_update_index(). This allows your module to use Drupal's
 *   HTML indexing mechanism for searching full text efficiently.
 *
 * If your module needs to provide a more complicated search form, then you need
 * to implement it yourself without hook_search(). In that case, you should
 * define it as a local task (tab) under the /search page (e.g. /search/mymodule)
 * so that users can easily find it.
 */

/**
 * Render a search form.
 *
 * @param $action
 *   Form action. Defaults to "search".
 * @param $keys
 *   The search string entered by the user, containing keywords for the search.
 * @param $type
 *   The type of search to render the node for. Must be the name of module
 *   which implements hook_search(). Defaults to 'node'.
 * @param $prompt
 *   A piece of text to put before the form (e.g. "Enter your keywords")
 * @return
 *   An HTML string containing the search form.
 */
function search_form($action = '', $keys = '', $type = null, $prompt = null) {
  $edit = $_POST['edit'];

  if (!$action) {
    $action = url('search/'. $type);
  }
  if (!$type) {
    $type = 'node';
  }
  if (is_null($prompt)) {
    $prompt = t('Enter your keywords');
  }

  $output = ' <div class="search-form">';
  $box = '<div class="container-inline">';
  $box .= form_textfield('', 'keys', $keys, $prompt ? 40 : 30, 255);
  $box .= form_submit(t('Search'));
  $box .= '</div>';
  $output .= form_item($prompt, $box);
  $output .= '</div>';

  return form($output, 'post', $action);
}

/**
 * Perform a standard search on the given keys, and return the formatted results.
 */
function search_data($keys = NULL, $type = 'node') {
  $output = '';

  if (isset($keys)) {
    if (module_hook($type, 'search')) {
      $results = module_invoke($type, 'search', 'search', $keys);
      if (is_array($results) && count($results)) {
        $output .= '<dl class="search-results">';
        foreach ($results as $entry) {
          $output .= theme('search_item', $entry, $type);
        }
        $output .= '</dl>';
        $output .= theme('pager', NULL, 15, 0);
      }
    }
  }

  return $output;
}

/**
 * Returns snippets from a piece of text, with certain keywords highlighted.
 * Used for formatting search results.
 *
 * @param $keys
 *   A string containing keywords. They are split into words using the same
 *   rules as search indexing.
 *
 * @param $text
 *   The text to extract fragments from.
 *
 * @return
 *   A string containing HTML for the excerpt.
 */
function search_excerpt($keys, $text) {
  $keys = search_keywords_split($keys);
  $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text));
  array_walk($keys, '_search_excerpt_replace');
  $workkeys = $keys;

  // Extract a fragment per keyword for at most 4 keywords.
  // First we collect ranges of text around each keyword, starting/ending
  // at spaces.
  // If the sum of all fragments is too short, we look for second occurrences.
  $ranges = array();
  $included = array();
  $length = 0;
  while ($length < 256 && count($workkeys)) {
    foreach ($workkeys as $k => $key) {
      if (strlen($key) == 0) {
        unset($workkeys[$k]);
        continue;
      }
      if ($length >= 256) {
        break;
      }
      // Remember occurrence of key so we can skip over it if more occurrences
      // are desired.
      if (!isset($included[$key])) {
        $included[$key] = 0;
      }
      // Locate a keyword (position $p), then locate a space in front (position
      // $q) and behind it (position $s)
      if (preg_match('/\b'. $key .'\b/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
        $p = $match[0][1];
        if (($q = strpos($text, ' ', max(0, $p - 60))) !== false) {
          $end = substr($text, $p, 80);
          if (($s = strrpos($end, ' ')) !== false) {
            $ranges[$q] = $p + $s;
            $length += $p + $s - $q;
            $included[$key] = $p + 1;
          }
          else {
            unset($workkeys[$k]);
          }
        }
        else {
          unset($workkeys[$k]);
        }
      }
      else {
        unset($workkeys[$k]);
      }
    }
  }

  // If we didn't find anything, return the beginning.
  if (count($ranges) == 0) {
    return truncate_utf8($text, 256) . ' ...';
  }

  // Sort the text ranges by starting position.
  ksort($ranges);

  // Now we collapse overlapping text ranges into one. The sorting makes it O(n).
  $newranges = array();
  foreach ($ranges as $from2 => $to2) {
    if (!isset($from1)) {
      $from1 = $from2;
      $to1 = $to2;
      continue;
    }
    if ($from2 <= $to1) {
      $to1 = max($to1, $to2);
    }
    else {
      $newranges[$from1] = $to1;
      $from1 = $from2;
      $to1 = $to2;
    }
  }
  $newranges[$from1] = $to1;

  // Fetch text
  $out = array();
  foreach ($newranges as $from => $to) {
    $out[] = substr($text, $from, $to - $from);
  }
  $text = (isset($newranges[0]) ? '' : '... '). implode(' ... ', $out) .' ...';

  // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
  $text = preg_replace('/\b('. implode('|', $keys) .')\b/iu', '<strong>\0</strong>', $text);
  return $text;
}

/**
 * @} End of "defgroup search".
 */

/**
 * Helper function for array_walk in search_except.
 */
function _search_excerpt_replace(&$text) {
  $text = preg_quote($text, '/');
}

/**
 * Format a single result entry of a search query.
 *
 * Modules may implement hook_search_item() in order to override this default
 * function to display search results.
 *
 * @param $item
 *   A single search result as returned by hook_search(). The result should be
 *   an array with keys "link", "title", "type", "user", "date", and "snippet".
 *   Optionally, "extra" can be an array of extra info to show along with the
 *   result.
 * @param $type
 *   The type of item found, such as "user" or "node".
 *
 * @ingroup themeable
 */
function theme_search_item($item, $type) {
  if (module_hook($type, 'search_item')) {
    $output = module_invoke($type, 'search_item', $item);
  }
  else {
    $output = ' <dt class="title"><a href="'. check_url($item['link']) .'">'. check_plain($item['title']) .'</a></dt>';
    $info = array();
    if ($item['type']) {
      $info[] = $item['type'];
    }
    if ($item['user']) {
      $info[] = $item['user'];
    }
    if ($item['date']) {
      $info[] = format_date($item['date'], 'small');
    }
    if (is_array($item['extra'])) {
      $info = array_merge($info, $item['extra']);
    }
    $output .= ' <dd>'. ($item['snippet'] ? '<p>'. $item['snippet'] . '</p>' : '') . '<p class="search-info">' . implode(' - ', $info) .'</p></dd>';
  }

  return $output;
}


?>
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
+								<?php
-												- added the CVS keyword $Id$ to all files to make future version tracking
  easier. Also changed the <? tag to <?php in some cases.

											
										
										
											2001-10-20 18:57:09 +00:00
+								// $Id$
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
-- Patch by JonBob: for consistency and readability, add brief descriptions of each source file inside the @file comment block at the head of the file. This helps with Doxygen indexing, and also allows neophytes to see what a file does immediately on opening the source, regardless of the organization of the hooks.

											
										
										
											2004-08-21 06:42:38 +00:00
+								/**
 								 * @file
 								 * Enables site-wide keyword searching.
 								 */
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								/**
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 * Matches Unicode character classes to exclude from the search index.
-												Tweak api.module output

											
										
										
											2005-01-11 07:04:37 +00:00
+								 *
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
 								 *
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 * The index only contains the following character classes:
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * Lu     Letter, Uppercase
 								 * Ll     Letter, Lowercase
 								 * Lt     Letter, Titlecase
 								 * Lm     Letter, Modifier
 								 * Lo     Letter, Other
 								 * Mn     Mark, Nonspacing
 								 * Mc     Mark, Spacing Combining
 								 * Nd     Number, Decimal Digit
 								 * Nl     Number, Letter
 								 * No     Number, Other
 								 * Sm     Symbol, Math
 								 * Sc     Symbol, Currency
 								 * Sk     Symbol, Modifier
 								 * So     Symbol, Other
 								 *
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 * All character classes not in the list above (enclosing marks, punctuation, control codes and spacers):
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * 'Me', 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Cs', 'Co'
 								 */
 								define('PREG_CLASS_SEARCH_EXCLUDE', '\x{0}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{7f}-\x{a1}\x{ab}\x{ad}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{488}\x{489}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{600}-\x{603}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{6dd}\x{6de}\x{700}-\x{70d}\x{70f}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17b4}\x{17b5}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{180e}\x{1944}\x{1945}\x{2000}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{205f}-\x{2063}\x{206a}-\x{206f}\x{207d}\x{207e}\x{208d}\x{208e}\x{20dd}-\x{20e0}\x{20e2}-\x{20e4}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3000}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{d800}\x{db7f}\x{db80}\x{dbff}\x{dc00}\x{dfff}\x{e000}\x{f8ff}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{feff}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{fff9}-\x{fffb}\x{10100}\x{10101}\x{1039f}\x{1d173}-\x{1d17a}\x{e0001}\x{e0020}-\x{e007f}\x{f0000}\x{ffffd}\x{100000}');
 								/**
-												Tweak api.module output

											
										
										
											2005-01-11 07:04:37 +00:00
+								 * Matches all 'N' Unicode character classes (numbers)
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 */
 								define('PREG_CLASS_NUMBERS', '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}\x{10107}-\x{10133}\x{10320}-\x{10323}\x{1034a}\x{104a0}-\x{104a9}\x{1d7ce}-\x{1d7ff}');
 								/**
-												Tweak api.module output

											
										
										
											2005-01-11 07:04:37 +00:00
+								 * Matches all 'P' Unicode character classes (punctuation)
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 */
 								define('PREG_CLASS_PUNCTUATION', '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{10100}\x{10101}\x{1039f}');
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								/**
 								 * Implementation of hook_help().
 								 */
 								function search_help($section = 'admin/help#search') {
-- More help system updates by Michael F.

											
										
										
											2003-08-25 16:57:55 +00:00
+								  switch ($section) {
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								    case 'admin/modules#description':
 								      return t('Enables site-wide keyword searching.');
-												- Patch #12232 by Steven: more search improvements:

   + When a comment is posted, a node needs to be re-indexed. Luckily, we can use node_comment_statistics for this easily.
   + When a node is deleted, it should be deleted from the search index as well.
   + The search wipe didn't properly remove links to nodes from the index.
   + Section url was faulty in _help.
   + Minor code rearrangement.

											
										
										
											2004-11-04 06:47:03 +00:00
+								    case 'admin/settings/search':
 								      return t('
-												- Readding cron note.

											
										
										
											2005-01-11 09:46:51 +00:00
+								<p>The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that the search requires cron to be set up correctly.</p>
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								');
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    case 'search#noresults':
 								      return t('<p><ul>
 								<li>Check if your spelling is correct.</li>
 								<li>Try using wildcards: <em>walk*</em> matches <em>walker</em>, <em>walking</em>, ...</li>
 								<li>Use longer words (words shorter than %number letters are ignored).</li>
 								</ul></p>', array('%number' => variable_get('minimum_word_size', 3)));
-- More help system updates by Michael F.

											
										
										
											2003-08-25 16:57:55 +00:00
+								  }
-- Added Marco's long-awaited taxonmy module and patches - a replacement
  for the meta system.  The patches add some extra functionality to the
  comment system (for example, comments can be set read-only) and fix a
  couple of small problems.

  + I integrated the required SQL updates from the varius *.mysql files
    into the "update.php" script.  Upgrading should be easy ...

  + I did not apply/commit the "user.diff" as requested by Marco ...

  + I didn't know what to do with "forum.module" and "forum2.module":
    what do you want me to do with it Marco?  Which one should go in?

  + Can we remove "node_index()" now; both from "node.module" and the
    themes?

  + Thanks Marco!

											
										
										
											2002-04-14 20:46:41 +00:00
+								}
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
 								/**
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								 * Implementation of hook_perm().
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								 */
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
+								function search_perm() {
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								  return array('search content', 'administer search');
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
+								}
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								/**
 								 * Implementation of hook_block().
 								 */
 								function search_block($op = 'list', $delta = 0) {
 								  global $user;
 								  if ($op == 'list') {
 								    $blocks[0]['info'] = t('Search form');
 								    return $blocks;
 								  }
 								  else if ($op == 'view' && user_access('search content') && arg(0) != 'search') {
 								    $block['content'] = search_form('', '', null, '');
 								    $block['subject'] = t('Search');
 								    return $block;
 								  }
 								}
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								/**
 								 * Implementation of hook_menu().
 								 */
-- Patch #8179 by JonBob: reintroduced menu caching.

											
										
										
											2004-09-16 07:17:56 +00:00
+								function search_menu($may_cache) {
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								  $items = array();
-- Patch #8179 by JonBob: reintroduced menu caching.

											
										
										
											2004-09-16 07:17:56 +00:00
 								  if ($may_cache) {
-												- Small change

											
										
										
											2004-12-31 09:31:54 +00:00
+								    $items[] = array('path' => 'search', 'title' => t('search'),
-- Patch #8179 by JonBob: reintroduced menu caching.

											
										
										
											2004-09-16 07:17:56 +00:00
+								      'callback' => 'search_view',
 								      'access' => user_access('search content'),
 								      'type' => MENU_SUGGESTED_ITEM);
-												- Patch #14917 by UnConeD/Steven:

1) The different types of search, which used to be radio button options in the search form, are now subtabs of "search" (default "search/node"). This seems better from a UI point of view, but also has another advantage: modules which implement a custom search form (flexinode, project) can add it as a subtab of search. This means that all search forms will be located in the same place, and also without needing an extra api call to search.module.

2) The current code was a bit hackish, as the indexing of comments along with nodes was hardcoded in node.module. Instead, I created a nodeapi operation "update index" which allows modules to add more data for a node that is being indexed. Comments are now indexed using this mechanism and from comment.module, which is a lot cleaner.

3) The search results format was also hardcoded to include "N comments". I replaced this with a nodeapi operation "search result" and moved the comment code to comment.module where it belongs. This op is quite useful, as for example I also modified upload.module to add "N attachments" to a search result if any are present.

											
										
										
											2004-12-31 09:30:12 +00:00
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								    $items[] = array('path' => 'admin/settings/search', 'title' => t('search'),
 								      'callback' => 'search_admin',
 								      'type' => MENU_NORMAL_ITEM,
 								      'access' => user_access('administer site configuration'));
 								  }
 								  else if (arg(0) == 'search') {
 								    // To remember the user's search keywords when switching across tabs,
 								    // we dynamically add the keywords to the search tabs' paths.
 								    $keys = search_get_keys();
 								    $keys = strlen($keys) ? '/'. $keys : '';
-												- Patch #14917 by UnConeD/Steven:

1) The different types of search, which used to be radio button options in the search form, are now subtabs of "search" (default "search/node"). This seems better from a UI point of view, but also has another advantage: modules which implement a custom search form (flexinode, project) can add it as a subtab of search. This means that all search forms will be located in the same place, and also without needing an extra api call to search.module.

2) The current code was a bit hackish, as the indexing of comments along with nodes was hardcoded in node.module. Instead, I created a nodeapi operation "update index" which allows modules to add more data for a node that is being indexed. Comments are now indexed using this mechanism and from comment.module, which is a lot cleaner.

3) The search results format was also hardcoded to include "N comments". I replaced this with a nodeapi operation "search result" and moved the comment code to comment.module where it belongs. This op is quite useful, as for example I also modified upload.module to add "N attachments" to a search result if any are present.

											
										
										
											2004-12-31 09:30:12 +00:00
+								    foreach (module_list() as $name) {
-												- #4166: Respect 'access userlist' permission for profile data.

											
										
										
											2005-04-11 22:48:27 +00:00
+								      if (module_hook($name, 'search') && $title = module_invoke($name, 'search', 'name')) {
 								        $items[] = array('path' => 'search/'. $name . $keys, 'title' => $title,
-												- Patch #14917 by UnConeD/Steven:

1) The different types of search, which used to be radio button options in the search form, are now subtabs of "search" (default "search/node"). This seems better from a UI point of view, but also has another advantage: modules which implement a custom search form (flexinode, project) can add it as a subtab of search. This means that all search forms will be located in the same place, and also without needing an extra api call to search.module.

2) The current code was a bit hackish, as the indexing of comments along with nodes was hardcoded in node.module. Instead, I created a nodeapi operation "update index" which allows modules to add more data for a node that is being indexed. Comments are now indexed using this mechanism and from comment.module, which is a lot cleaner.

3) The search results format was also hardcoded to include "N comments". I replaced this with a nodeapi operation "search result" and moved the comment code to comment.module where it belongs. This op is quite useful, as for example I also modified upload.module to add "N attachments" to a search result if any are present.

											
										
										
											2004-12-31 09:30:12 +00:00
+								          'callback' => 'search_view',
 								          'access' => user_access('search content'),
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								          'type' => MENU_LOCAL_TASK);
-												- Patch #14917 by UnConeD/Steven:

1) The different types of search, which used to be radio button options in the search form, are now subtabs of "search" (default "search/node"). This seems better from a UI point of view, but also has another advantage: modules which implement a custom search form (flexinode, project) can add it as a subtab of search. This means that all search forms will be located in the same place, and also without needing an extra api call to search.module.

2) The current code was a bit hackish, as the indexing of comments along with nodes was hardcoded in node.module. Instead, I created a nodeapi operation "update index" which allows modules to add more data for a node that is being indexed. Comments are now indexed using this mechanism and from comment.module, which is a lot cleaner.

3) The search results format was also hardcoded to include "N comments". I replaced this with a nodeapi operation "search result" and moved the comment code to comment.module where it belongs. This op is quite useful, as for example I also modified upload.module to add "N attachments" to a search result if any are present.

											
										
										
											2004-12-31 09:30:12 +00:00
+								      }
 								    }
-- Patch #8179 by JonBob: reintroduced menu caching.

											
										
										
											2004-09-16 07:17:56 +00:00
+								  }
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								  return $items;
 								}
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								/**
 								 * Menu callback; displays the search module settings page.
 								 */
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								function search_admin() {
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								  if ($_POST) {
-												- Use format_plural() for search status (items left to index)

											
										
										
											2005-03-18 20:31:00 +00:00
+								    // If the word length settings change, the index needs to be rebuilt.
 								    if (variable_get('minimum_word_size', 3) != $_POST['edit']['minimum_word_size']) {
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								      drupal_set_message(t('The index will be rebuilt.'));
 								      search_wipe();
-												- Fix search index not wiping on admin settings change.

											
										
										
											2005-04-23 07:34:22 +00:00
+								      system_settings_save();
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								    }
 								    else {
 								      system_settings_save();
 								    }
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								  }
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								  // Collect some stats
 								  $remaining = 0;
 								  $total = 0;
 								  foreach (module_list() as $module) {
 								    if (module_hook($module, 'search')) {
 								      $status = module_invoke($module, 'search', 'status');
 								      $remaining += $status['remaining'];
 								      $total += $status['total'];
 								    }
-												- Stripped white-space.

											
										
										
											2005-01-15 09:03:39 +00:00
+								  }
-												- Use format_plural() for search status (items left to index)

											
										
										
											2005-03-18 20:31:00 +00:00
+								  $count = format_plural($remaining, 'There is 1 item left to index.', 'There are %count items left to index.');
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								  $percentage = ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) . '%';
-												- Use format_plural() for search status (items left to index)

											
										
										
											2005-03-18 20:31:00 +00:00
+								  $status = '<p><strong>'. t('%percentage of the site has been indexed.', array('%percentage' => $percentage)) .' '. $count .'</strong></p>';
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								  $output = form_group('Indexing status', $status);
 								  // Indexing throttle:
 								  $items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500));
 								  $group = form_select(t('Items to index per cron run'), 'search_cron_limit', variable_get('search_cron_limit', 100), $items, t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
 								  $output .= form_group(t('Indexing throttle'), $group);
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								  // Indexing settings:
-												- Use format_plural() for search status (items left to index)

											
										
										
											2005-03-18 20:31:00 +00:00
+								  $group = '<em>'. t('<p>Changing the setting below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>';
-												- Patch #25603 by Stefan: made the sizes of forms consistent.
  TODO: document the defaults in the PHPdoc comments.

											
										
										
											2005-06-27 18:33:33 +00:00
+								  $group .= form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 3), 5, 3, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
 								  $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 3), 5, 3, t('The number of characters a word has to be to be searched for, including wildcard characters.'));
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								  $output .= form_group(t('Indexing settings'), $group);
-- Search configuration settings are now a part of the standard
  configuration page.  Patch by Kjartan.

											
										
										
											2002-11-21 18:18:19 +00:00
-- Patch 20910 by chx: centralize print theme page.

											
										
										
											2005-04-24 16:34:36 +00:00
+								  return system_settings_form($output);
-- Search configuration settings are now a part of the standard
  configuration page.  Patch by Kjartan.

											
										
										
											2002-11-21 18:18:19 +00:00
+								}
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								/**
-												- Patch #12232 by Steven: more search improvements:

   + When a comment is posted, a node needs to be re-indexed. Luckily, we can use node_comment_statistics for this easily.
   + When a node is deleted, it should be deleted from the search index as well.
   + The search wipe didn't properly remove links to nodes from the index.
   + Section url was faulty in _help.
   + Minor code rearrangement.

											
										
										
											2004-11-04 06:47:03 +00:00
+								 * Wipes a part of or the entire search index.
 								 *
 								 * @param $sid
 								 *  (optional) The SID of the item to wipe. If specified, $type must be passed
 								 *  too.
 								 * @param $type
 								 *  (optional) The type of item to wipe.
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								 */
-												Search: wrong parameter order on search_wipe().

											
										
										
											2004-12-02 06:45:18 +00:00
+								function search_wipe($sid = NULL, $type = NULL) {
-												- Patch #12232 by Steven: more search improvements:

   + When a comment is posted, a node needs to be re-indexed. Luckily, we can use node_comment_statistics for this easily.
   + When a node is deleted, it should be deleted from the search index as well.
   + The search wipe didn't properly remove links to nodes from the index.
   + Section url was faulty in _help.
   + Minor code rearrangement.

											
										
										
											2004-11-04 06:47:03 +00:00
+								  if ($type == NULL && $sid == NULL) {
 								    module_invoke_all('search', 'reset');
 								  }
 								  else {
 								    db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
 								    db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
 								  }
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								}
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								/**
 								 * Marks a word as dirty (or retrieves the list of dirty words). This is used
 								 * during indexing (cron). Words which are dirty have outdated total counts in
 								 * the search_total table, and need to be recounted.
 								 */
 								function search_dirty($word = null) {
 								  static $dirty = array();
 								  if ($word !== null) {
 								    $dirty[$word] = true;
 								  }
 								  else {
 								    return $dirty;
 								  }
 								}
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								/**
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								 * Implementation of hook_cron().
 								 *
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * Fires hook_update_index() in all modules and cleans up dirty words (see
 								 * search_dirty).
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								 */
 								function search_cron() {
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								  // Update word index
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								  foreach (module_list() as $module) {
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    module_invoke($module, 'update_index');
 								  }
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								  // Update word counts for new/changed words
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  foreach (search_dirty() as $word => $dummy) {
 								    $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								    db_query("UPDATE {search_total} SET count = %d WHERE word = '%s'", $total, $word);
 								    if (!db_affected_rows()) {
-												- Patch #22786 by mathias: now that db_affected_rows() returns the number of rows matched instead of only changed we can get rid of the hacks that worked around this.

											
										
										
											2005-05-14 17:26:02 +00:00
+								      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total);
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								    }
 								  }
 								  // Find words that were deleted from search_index, but are still in
 								  // search_total. We use a LEFT JOIN between the two tables and keep only the
 								  // rows which fail to join.
 								  $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
 								  while ($word = db_fetch_object($result)) {
 								    db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								  }
 								}
 								/**
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * Splits a string into component words according to indexing rules.
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								 */
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								function search_keywords_split($text) {
 								  static $last = null;
 								  static $lastsplit = null;
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  if ($last == $text) {
 								    return $lastsplit;
 								  }
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Decode entities to UTF-8
 								  $text = decode_entities($text);
-												search: Removing iso-8859-1 characters from update_index() and adding the same stripping to do_search().
This fixes the embarrasing bug of not being able to search for "4.5.0" because it is indexed as "450".

											
										
										
											2004-10-15 22:01:41 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Call an external processor for word handling.
 								  search_preprocess($text);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // To improve searching for numerical data such as dates, IP addresses
 								  // or version numbers, we consider a group of numerical characters
 								  // separated only by punctuation characters to be one piece.
 								  // This also means that searching for e.g. '20/03/1984' also returns
 								  // results with '20-03-1984' in them.
 								  // Readable regexp: ([number]+)[punctuation]+(?=[number])
 								  $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // The dot, underscore and dash are simply removed. This allows meaningful
 								  // search behaviour with acronyms and URLs.
 								  $text = preg_replace('/[._-]+/', '', $text);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // With the exception of the rules above, we consider all punctuation,
 								  // marks, spacers, etc, to be a word boundary.
 								  $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Process words
 								  $words = explode(' ', $text);
-												- Ensure word length <= 50 bytes

											
										
										
											2005-01-11 04:50:00 +00:00
+								  array_walk($words, '_search_keywords_truncate');
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Save last keyword result
 								  $last = $text;
 								  $lastsplit = $words;
 								  return $words;
 								}
-												- Ensure word length <= 50 bytes

											
										
										
											2005-01-11 04:50:00 +00:00
+								/**
 								 * Helper function for array_walk in search_keywords_split.
 								 */
 								function _search_keywords_truncate(&$text) {
-												- Oopsie

											
										
										
											2005-01-11 05:01:15 +00:00
+								  $text = truncate_utf8($text, 50);
-												- Ensure word length <= 50 bytes

											
										
										
											2005-01-11 04:50:00 +00:00
+								}
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								/**
 								 * Loosens up a set of search keywords by adding wildcards, if possible.
-												- Patch #19739 by Uwe: corrected many typo's in the documentation and code comments

											
										
										
											2005-03-31 21:18:08 +00:00
+								 *
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								 * @param $text
 								 *   The keywords as entered by the user.
 								 * @return
 								 *   If more wildcards can be added, the adjusted keywords are returned.
 								 *   If the query is already as loose as possible, NULL is returned.
 								 */
 								function search_keywords_variation($text) {
 								  $text = trim($text);
 								  $new = preg_replace('/\*+/', '*', '*'. implode('* *', explode(' ', trim($text))) .'*');
 								  return ($new != $text) ? $new : NULL;
 								}
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								/**
 								 * Invokes hook_search_preprocess() in modules.
 								 */
 								function search_preprocess(&$text) {
-												- #19063: Use module_implements instead of hand-rolled routine.

											
										
										
											2005-03-18 06:50:41 +00:00
+								  foreach (module_implements('search_preprocess') as $module) {
 								    $text = module_invoke($module, 'search_preprocess', $text);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								  }
 								}
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								/**
-												- Some search doxygen tweaks

											
										
										
											2005-01-13 17:34:01 +00:00
+								 * Update the full-text search index for a particular item.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 *
 								 * @param $sid
 								 *   A number identifying this particular item (e.g. node id).
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								 *
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * @param $type
 								 *   A string defining this type of item (e.g. 'node')
 								 *
 								 * @param $text
 								 *   The content of this item. Must be a piece of HTML text.
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 *
 								 * @ingroup search
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								 */
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								function search_index($sid, $type, $text) {
 								  $minimum_word_size = variable_get('minimum_word_size', 3);
 								  global $base_url;
 								  $node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';
 								  // Multipliers for scores of words inside certain HTML tags.
 								  // Note: 'a' must be included for link ranking to work.
 								  $tags = array('h1' => 21,
 								                'h2' => 18,
 								                'h3' => 15,
 								                'h4' => 12,
 								                'h5' => 9,
 								                'h6' => 6,
 								                'u' => 5,
 								                'b' => 5,
 								                'strong' => 5,
 								                'em' => 5,
 								                'a' => 10);
 								  // Strip off all ignored tags to speed up processing, but insert space before/after
 								  // them to keep word boundaries.
 								  $text = str_replace(array('<', '>'), array(' <', '> '), $text);
 								  $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
 								  // Split HTML tags from plain text.
 								  $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 								  // Note: PHP ensures the array consists of alternating delimiters and literals
 								  // and begins and ends with a literal (inserting $null as required).
 								  $tag = false; // Odd/even counter. Tag or no tag.
 								  $link = false; // State variable for link analyser
 								  $score = 1; // Starting score per word
 								  $results = array(0 => array());
 								  foreach ($split as $value) {
 								    if ($tag) {
 								      // Increase or decrease score per word based on tag
 								      list($tagname) = explode(' ', $value, 2);
 								      $tagname = strtolower($tagname);
 								      if ($tagname{0} == '/') {
 								        $score -= $tags[substr($tagname, 1)];
 								        if ($score < 1) { // possible due to bad HTML
 								          $score = 1;
 								        }
 								        if ($tagname == '/a') {
 								          $link = false;
 								        }
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								      }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      else {
 								        if ($tagname == 'a') {
 								          // Check if link points to a node on this site
 								          if (preg_match($node_regexp, $value, $match)) {
 								            $path = drupal_get_normal_path($match[1]);
-												- Fixing duplicate rows error during indexing
- Fixing broken "pagerank" detection

											
										
										
											2005-01-11 03:37:13 +00:00
+								            if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								              $linknid = $match[1];
 								              if ($linknid > 0) {
 								                $link = true;
 								              }
 								            }
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								          }
 								        }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								        $score += $tags[$tagname];
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								      }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    }
 								    else {
 								      // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
 								      if ($value != '') {
 								        $words = search_keywords_split($value);
 								        foreach ($words as $word) {
 								          // Check wordlength
 								          if (string_length($word) >= $minimum_word_size) {
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								            // Note: strtolower can be used because the value is only used internally.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								            $word = strtolower($word);
 								            if ($link) {
 								              if (!isset($results[$linknid])) {
 								                $results[$linknid] = array();
 								              }
 								              $results[$linknid][$word] += $score;
 								            }
 								            else {
 								              $results[0][$word] += $score;
 								            }
 								          }
-- Fixed a warning.  Patch by Jacobo Tarrio.

											
										
										
											2003-05-30 14:58:44 +00:00
+								        }
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								      }
 								    }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    $tag = !$tag;
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								  }
-												- Patch #12232 by Steven: more search improvements:

   + When a comment is posted, a node needs to be re-indexed. Luckily, we can use node_comment_statistics for this easily.
   + When a node is deleted, it should be deleted from the search index as well.
   + The search wipe didn't properly remove links to nodes from the index.
   + Section url was faulty in _help.
   + Minor code rearrangement.

											
										
										
											2004-11-04 06:47:03 +00:00
+								  search_wipe($sid, $type);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Insert results into search index
 								  foreach ($results[0] as $word => $score) {
 								    db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
 								    search_dirty($word);
 								  }
 								  unset($results[0]);
-- improved search module (vaguely derived from axel's code)

											
										
										
											2001-09-22 21:01:39 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Now insert links to nodes
 								  foreach ($results as $nid => $words) {
 								    foreach ($words as $word => $score) {
 								      db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %d)", $word, $nid, 'node', $sid, $type, $score);
 								      search_dirty($word);
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								    }
 								  }
 								}
 								/**
-												- Some search doxygen tweaks

											
										
										
											2005-01-13 17:34:01 +00:00
+								 * Do a query on the full-text search index for a word or words.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 *
-												- Some search doxygen tweaks

											
										
										
											2005-01-13 17:34:01 +00:00
+								 * This function is normally only called by each module that support the
 								 * indexed search (and thus, implements hook_update_index()).
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 *
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 * The final query is an SQL select on the search_index table. As a guide for
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * writing the optional extra SQL fragments (see below), use this query:
 								 *
 								 * SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score
 								 * FROM {search_index} i
 								 * $join INNER JOIN {search_total} t ON i.word = t.word
 								 * WHERE $where AND (i.word = '...' OR ...)
 								 * GROUP BY i.type, i.sid
 								 * ORDER BY score DESC";
 								 *
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								 * @param $keywords
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 *   A search string as entered by the user.
 								 *
 								 * @param $type
 								 *   A string identifying the calling module.
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								 *
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * @param $join
 								 *   (optional) A string to be inserted into the JOIN part of the SQL query.
 								 *   For example "INNER JOIN {node} n ON n.nid = i.sid".
 								 *
 								 * @param $where
 								 *   (optional) A string to be inserted into the WHERE part of the SQL query.
 								 *   For example "(n.status > 0)".
 								 *
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								 * @param $variation
 								 *   Used internally. Must not be specified.
 								 *
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * @return
 								 *   An array of SIDs for the search results.
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 *
 								 * @ingroup search
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								 */
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								function do_search($keywords, $type, $join = '', $where = '1', $variation = true) {
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Note, we replace the wildcards with U+FFFD (Replacement character) to pass
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  // through the keyword extractor. Multiple wildcards are collapsed into one.
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								  $keys = preg_replace('!\*+!', '<27>', $keywords);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
 								  // Split into words
 								  $keys = search_keywords_split($keys);
 								  $words = array();
 								  $arguments = array();
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  $refused = array();
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  // Build WHERE clause
 								  foreach ($keys as $word) {
 								    if (string_length($word) < variable_get('remove_short', 3)) {
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								      if ($word != '') {
 								        $refused[] = str_replace('<27>', '*', $word);
 								      }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      continue;
 								    }
-												Fixing bad UTF-8.

											
										
										
											2004-11-04 03:33:26 +00:00
+								    if (strpos($word, '<27>') !== false) {
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								      // Note: strtolower can be used because the value is only used internally.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      $words[] = "i.word LIKE '%s'";
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								      $arguments[] = str_replace('<27>', '%', strtolower($word));
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    }
 								    else {
 								      $words[] = "i.word = '%s'";
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								      $arguments[] = strtolower($word);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    }
 								  }
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  // Tell the user which words were excluded
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								  if (count($refused) && $variation) {
 								    $message = format_plural(count($refused),
 								                             'The word %words was not included because it is too short.',
 								                             'The words %words were not included because they were too short.');
-												- #18817: Clean up plain-text checking (see drupal-devel!)

											
										
										
											2005-03-31 09:25:33 +00:00
+								    drupal_set_message(strtr($message, array('%words' => theme('placeholder', implode(', ', $refused)))));
-												- Oops: don't show refused words message if there are none.

											
										
										
											2005-02-27 23:31:47 +00:00
+								  }
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  if (count($words) == 0) {
 								    return array();
 								  }
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								  $conditions = $where .' AND ('. implode(' OR ', $words) .')';
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
 								  // Get result count (for pager)
-												- Part of #17747 was not committed (pgsql fixes).

											
										
										
											2005-03-29 03:28:02 +00:00
+								  $count = db_num_rows(db_query("SELECT DISTINCT i.sid, i.type FROM {search_index} i $join WHERE $conditions", $arguments));
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  if ($count == 0) {
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								    // Try out a looser search query if nothing was found.
 								    if ($variation && $loose = search_keywords_variation($keywords)) {
 								      return do_search($loose, $type, $join, $where, false);
 								    }
 								    else {
 								      return array();
 								    }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  }
 								  $count_query = "SELECT $count";
 								  // Do pager query
-												- #18434: (search) Try wildcards automatically if there were no results.
- Fix missing format_plural()

											
										
										
											2005-03-08 18:08:17 +00:00
+								  $query = "SELECT i.type, i.sid, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $conditions GROUP BY i.type, i.sid ORDER BY score DESC";
-												- Patch #13581 by Steven: Db_query() allows a variable amount of parameters so you can pass the query arguments in. There is however an alternative syntax: instead of passing the query arguments as function arguments, you can also pass a single array with the query arguments in it. For example the following two statements are equivalent:

db_query($query, $a, $b, $c);
db_query($query, array($a, $b, $c));

This usage is particularly interesting when the query is constructed dynamically, and the amount of arguments to pass varies. In that case we use the second method to avoid using call_user_func_array(). This behaviour is not documented explicitly, but it is used in several places.

However, db_query_range() and pager_query() do not support this syntax properly, which means there are several pieces of code which still revert to the ugly call_user_func_array() call.

This patch updates db_query_range() and pager_query() so they support the array-passing method. I also added documentation about this method to each of the db functions.

I also cleaned up the code for db_query (it was weird and hard to understand) and moved db_query() and db_queryd() from database.xxxxx.inc to database.inc: it was the same between both mysql and pgsql, as it doesn't do anything database specific. It just prefixes the tables and inserts the arguments. The actual db query is performed in _db_query(), which is still in database.xxxxx.inc.

Finally, I updated several places with the new syntax, and the code is a lot cleaner. For example:
- array_unshift($params, "SELECT u.* FROM {users} u WHERE $query u.status < 3");
- $params[] = 0;
- $params[] = 1;
- $result = call_user_func_array('db_query_range', $params);
+ $result = db_query_range("SELECT u.* FROM {users} u WHERE $query u.status < 3", $params, 0, 1);

and

- return call_user_func_array('db_query_range', array_merge(array($query), $args, array((int)$pager_from_array[$element], (int)$limit)));
+ return db_query_range($query, $args, (int)$pager_from_array[$element], (int)$limit);

I've tested it on mysql. I didn't alter the actual db behaviour, so pgsql should be okay too.

This patch is important because many people avoid the call_user_func_array() method and put data directly into the db query.  This is very, very bad because the database prefix will be applied to it, and strip out braces. It's also generally bad form as you have to call check_query() yourself.  With the new, documented syntax, there is no more excuse to put data directly in the query.

											
										
										
											2004-11-29 13:13:29 +00:00
+								  $result = pager_query($query, 15, 0, $count_query, $arguments);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
 								  $results = array();
 								  while ($item = db_fetch_object($result)) {
 								    $results[] = $item->sid;
 								  }
 								  return $results;
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								}
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								/**
 								 * Helper function for grabbing search keys.
 								 */
 								function search_get_keys() {
 								  // Extract keys as remainder of path
 								  // Note: support old GET format of searches for existing links.
 								  $path = explode('/', $_GET['q'], 3);
 								  return count($path) == 3 ? $path[2] : $_REQUEST['keys'];
 								}
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								/**
 								 * Menu callback; presents the search form and/or search results.
 								 */
 								function search_view() {
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  $type = arg(1);
 								  // Search form submits with POST but redirects to GET. This way we can keep
 								  // the search query URL clean as a whistle:
 								  // search/type/keyword+keyword
 								  if ($_POST['edit']['keys']) {
-												- Fix submissions from search boxes in themes

											
										
										
											2005-02-27 18:39:18 +00:00
+								    if ($type == '') {
 								      $type = 'node';
 								    }
-												- #18817: Clean up plain-text checking (see drupal-devel!)

											
										
										
											2005-03-31 09:25:33 +00:00
+								    drupal_goto('search/'. urlencode($type) .'/'. urlencode($_POST['edit']['keys']));
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  }
 								  else if ($type == '') {
 								    // Note: search/node can not be a default tab because it would take on the
-												- Patch #19739 by Uwe: corrected many typo's in the documentation and code comments

											
										
										
											2005-03-31 21:18:08 +00:00
+								    // path of its parent (search). It would prevent remembering keywords when
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								    // switching tabs. This is why we drupal_goto to it from the parent instead.
 								    drupal_goto('search/node');
 								  }
 								  $keys = search_get_keys();
-- Fixed a typo in the PostgreSQL database scheme.  Patch by Michael Frankowski.

- Fixed a typo in the MSSQL database scheme.  Patch by Michael Frankowski.

- Removed dependency on "register_globals = on"!  Patches by Michael Frankowski.

  Notes:

  + Updated the patches to use $foo["bar"] instead of $foo['bar'].
  + Updated the INSTALL and CHANGELOG files as well.

- Tiny improvement to the "./scripts/code-clean.sh" script.

											
										
										
											2003-05-13 18:36:38 +00:00
-Tabs patch!

CHANGES
-------

 + Introduced tabs. First, we extended the menu system to support tabs. Next, a tab was added for every link that was (1) an administrative action other than the implicit 'view' (2) relevant to that particular page only. This is illustrated by the fact that all tabs are verbs and that clicking a page's tab leads you to a subpage of that page.

 + Flattened the administration menu. The tabs helped simplify the navigation menu as I could separate 'actions' from 'navigation'. In addition, I removed the 'administer > configuration'-menu, renamed 'blocks' to 'sidebars' which I hope is a bit more descriptive, and made a couple more changes. Earlier, we already renamed 'taxonomy' to 'categorization' and we move 'statistics' under 'logs'.

 + Grouped settings. All settings have been grouped under 'administer > settings'.

TODO
----

 + Update core themes: only Xtemplate default supports tabs and even those look ugly.  Need help.

 + Update contributed modules.  The menu() hook changed drastically.  Updating your code adhere the new menu() function should be 90% of the work.  Moreover, ensure that your modue's admin links are still valid and that URLs to node get updated to the new scheme ('node/view/x' -> 'node/x').

											
										
										
											2004-06-18 15:04:37 +00:00
+								  if (user_access('search content')) {
-												- Fixed empty searches being performed.
- Fixed empty searches being logged.
- Trim whitespace from start and end of search key.

											
										
										
											2004-05-30 21:04:07 +00:00
+								    // Only perform search if there is non-whitespace search term:
 								    if (trim($keys)) {
 								      // Log the search keys:
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								      watchdog('search',
-												- #18817: Clean up plain-text checking (see drupal-devel!)

											
										
										
											2005-03-31 09:25:33 +00:00
+								        t('Search: %keys (%type).', array('%keys' => theme('placeholder', $keys), '%type' => module_invoke($type, 'search', 'name'))),
-												- Updated watchod call.

											
										
										
											2005-01-09 10:07:17 +00:00
+								        WATCHDOG_NOTICE,
-												- #18817: Clean up plain-text checking (see drupal-devel!)

											
										
										
											2005-03-31 09:25:33 +00:00
+								        l(t('results'), 'search/'. urlencode($type) .'/'. urlencode($keys))
-												- Patch by Steven:
   + Display 'friendly' name rather than module name in search watchdog
     messages.
   + Remove left-over from search_total table.
   + Add index wipe button to the admin
   + Moved the admin to admin/settings/search
   + Prevented menu bug when node modules update the breadcrumb in view
     (thanks JonBob).
   + Changed search_total table's word key to PRIMARY.

											
										
										
											2004-11-03 16:46:58 +00:00
+								        );
-- Log the search terms in a new watchdog category.

											
										
										
											2004-01-02 16:44:11 +00:00
-												- Fixed empty searches being performed.
- Fixed empty searches being logged.
- Trim whitespace from start and end of search key.

											
										
										
											2004-05-30 21:04:07 +00:00
+								      // Collect the search results:
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      $results = search_data($keys, $type);
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
-- Committed phase 4 of JonBob's menu system changes.

											
										
										
											2003-11-25 19:26:21 +00:00
+								      if ($results) {
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								        $results = theme('box', t('Search results'), $results);
-- improved search module (vaguely derived from axel's code)

											
										
										
											2001-09-22 21:01:39 +00:00
+								      }
 								      else {
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								        $results = theme('box', t('Your search yielded no results'), search_help('search#noresults'));
-- improved search module (vaguely derived from axel's code)

											
										
										
											2001-09-22 21:01:39 +00:00
+								      }
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
+								    }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    else if (isset($_POST['edit'])) {
 								      form_set_error('keys', t('Please enter some keywords.'));
 								    }
 								    // Construct the search form.
 								    // Note, we do this last because of the form_set_error() above.
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								    $output = search_form(NULL, $keys, $type);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
 								    $output .= $results;
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
-- Patch 20910 by chx: centralize print theme page.

											
										
										
											2005-04-24 16:34:36 +00:00
+								    return $output;
-												- my editor got set up to insert tabs instead of spaces. Cleaning up.

											
										
										
											2002-04-05 18:11:42 +00:00
+								  }
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
+								  else {
-- Added support for 403 handling.  Patch by JonBob.  As a side benefit,
  administrators will be able to define a custom 403 page, just as they
  can define 404 pages now.

  This needs to be documented in the "Changes since / migrating to ..."
  pages.

											
										
										
											2004-04-21 13:56:38 +00:00
+								    drupal_access_denied();
-- Made search.php a module, being search.module.

- Updated the permission names to be more consistent.

- Small improvement to node.php.

											
										
										
											2001-06-30 09:50:36 +00:00
+								  }
-												- applied search patch.
- added who is online block.
- made weblog module more configurable.
- users may now delete their own accounts (Feature #8)
- users may now request a password using email address *or* username.
  formerly required both items to match an account which was onerous.
- the link to request a new password is now presented whenever a user
  fails login.
- there is now a confirmation message after submitting edits to your
  user information.
- error messages in user.module may now be stylized by themes.
- <hook>_form has a $param setting you can fill with form parameters.
- improved wording for a few config settings.
- fixed various non-coding standard things.

											
										
										
											2002-03-05 20:15:17 +00:00
+								}
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								/**
 								 * @defgroup search Search interface
 								 * @{
 								 * The Drupal search interface manages a global search mechanism.
 								 *
 								 * Modules may plug into this system to provide searches of different types of
 								 * data. Most of the system is handled by search.module, so this must be enabled
 								 * for all of the search features to work.
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 *
 								 * There are three ways to interact with the search system:
 								 * - Specifically for searching nodes, you can implement nodeapi('update index')
 								 *   and nodeapi('search result'). However, note that the search system already
 								 *   indexes all visible output of a node, i.e. everything displayed normally
 								 *   by hook_view() and hook_nodeapi('view'). This is usually sufficient.
 								 *   You should only use this mechanism if you want additional, non-visible data
 								 *   to be indexed.
 								 * - Implement hook_search(). This will create a search tab for your module on
 								 *   the /search page with a simple keyword search form. You may optionally
 								 *   implement hook_search_item() to customize the display of your results.
 								 * - Implement hook_update_index(). This allows your module to use Drupal's
 								 *   HTML indexing mechanism for searching full text efficiently.
 								 *
 								 * If your module needs to provide a more complicated search form, then you need
-												- Stripped white-space.

											
										
										
											2005-01-15 09:03:39 +00:00
+								 * to implement it yourself without hook_search(). In that case, you should
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 * define it as a local task (tab) under the /search page (e.g. /search/mymodule)
 								 * so that users can easily find it.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 */
 								/**
 								 * Render a search form.
 								 *
 								 * @param $action
 								 *   Form action. Defaults to "search".
 								 * @param $keys
 								 *   The search string entered by the user, containing keywords for the search.
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 * @param $type
 								 *   The type of search to render the node for. Must be the name of module
 								 *   which implements hook_search(). Defaults to 'node'.
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								 * @param $prompt
 								 *   A piece of text to put before the form (e.g. "Enter your keywords")
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * @return
 								 *   An HTML string containing the search form.
 								 */
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								function search_form($action = '', $keys = '', $type = null, $prompt = null) {
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  $edit = $_POST['edit'];
 								  if (!$action) {
-												Fixing search tabs going back to "content" when searching on other tabs.

											
										
										
											2005-01-14 03:24:20 +00:00
+								    $action = url('search/'. $type);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  }
-												- Patch #14917 by UnConeD/Steven:

1) The different types of search, which used to be radio button options in the search form, are now subtabs of "search" (default "search/node"). This seems better from a UI point of view, but also has another advantage: modules which implement a custom search form (flexinode, project) can add it as a subtab of search. This means that all search forms will be located in the same place, and also without needing an extra api call to search.module.

2) The current code was a bit hackish, as the indexing of comments along with nodes was hardcoded in node.module. Instead, I created a nodeapi operation "update index" which allows modules to add more data for a node that is being indexed. Comments are now indexed using this mechanism and from comment.module, which is a lot cleaner.

3) The search results format was also hardcoded to include "N comments". I replaced this with a nodeapi operation "search result" and moved the comment code to comment.module where it belongs. This op is quite useful, as for example I also modified upload.module to add "N attachments" to a search result if any are present.

											
										
										
											2004-12-31 09:30:12 +00:00
+								  if (!$type) {
 								    $type = 'node';
 								  }
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  if (is_null($prompt)) {
 								    $prompt = t('Enter your keywords');
 								  }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
 								  $output = ' <div class="search-form">';
 								  $box = '<div class="container-inline">';
-												- Patch #25603 by Stefan: made the sizes of forms consistent.
  TODO: document the defaults in the PHPdoc comments.

											
										
										
											2005-06-27 18:33:33 +00:00
+								  $box .= form_textfield('', 'keys', $keys, $prompt ? 40 : 30, 255);
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  $box .= form_submit(t('Search'));
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  $box .= '</div>';
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								  $output .= form_item($prompt, $box);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  $output .= '</div>';
 								  return form($output, 'post', $action);
 								}
 								/**
-												- Some search doxygen tweaks

											
										
										
											2005-01-13 17:34:01 +00:00
+								 * Perform a standard search on the given keys, and return the formatted results.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 */
 								function search_data($keys = NULL, $type = 'node') {
 								  $output = '';
 								  if (isset($keys)) {
 								    if (module_hook($type, 'search')) {
 								      $results = module_invoke($type, 'search', 'search', $keys);
 								      if (is_array($results) && count($results)) {
 								        $output .= '<dl class="search-results">';
 								        foreach ($results as $entry) {
 								          $output .= theme('search_item', $entry, $type);
 								        }
 								        $output .= '</dl>';
-												More search usability improvements!
- Clean URLs: search/type/keywords e.g. "search/node/drupal release". The search
  form is POST submitted, but drupal_gotos to a GET page. This makes it easy to
  copy/paste search URLs, and makes the pager a lot cleaner.

- Remember the search keywords when switching between the search tabs. This is
  done through the same GET URLs rather than the session, so it does not mess up
  between multiple browser tabs.

- Report which keywords were ignored because they were too short.

- #820: Provide search block

- Treat multiple wildcards in a row as one

											
										
										
											2005-02-27 02:15:57 +00:00
+								        $output .= theme('pager', NULL, 15, 0);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      }
 								    }
 								  }
 								  return $output;
 								}
 								/**
 								 * Returns snippets from a piece of text, with certain keywords highlighted.
 								 * Used for formatting search results.
 								 *
 								 * @param $keys
 								 *   A string containing keywords. They are split into words using the same
 								 *   rules as search indexing.
 								 *
 								 * @param $text
 								 *   The text to extract fragments from.
 								 *
 								 * @return
 								 *   A string containing HTML for the excerpt.
 								 */
 								function search_excerpt($keys, $text) {
 								  $keys = search_keywords_split($keys);
 								  $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text));
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								  array_walk($keys, '_search_excerpt_replace');
 								  $workkeys = $keys;
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
 								  // Extract a fragment per keyword for at most 4 keywords.
 								  // First we collect ranges of text around each keyword, starting/ending
 								  // at spaces.
-												- Patch #19739 by Uwe: corrected many typo's in the documentation and code comments

											
										
										
											2005-03-31 21:18:08 +00:00
+								  // If the sum of all fragments is too short, we look for second occurrences.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  $ranges = array();
 								  $included = array();
 								  $length = 0;
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								  while ($length < 256 && count($workkeys)) {
 								    foreach ($workkeys as $k => $key) {
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      if (strlen($key) == 0) {
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								        unset($workkeys[$k]);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								        continue;
 								      }
 								      if ($length >= 256) {
 								        break;
 								      }
-												- Patch #19739 by Uwe: corrected many typo's in the documentation and code comments

											
										
										
											2005-03-31 21:18:08 +00:00
+								      // Remember occurrence of key so we can skip over it if more occurrences
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      // are desired.
 								      if (!isset($included[$key])) {
 								        $included[$key] = 0;
 								      }
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								      // Locate a keyword (position $p), then locate a space in front (position
 								      // $q) and behind it (position $s)
 								      if (preg_match('/\b'. $key .'\b/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
 								        $p = $match[0][1];
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								        if (($q = strpos($text, ' ', max(0, $p - 60))) !== false) {
 								          $end = substr($text, $p, 80);
 								          if (($s = strrpos($end, ' ')) !== false) {
 								            $ranges[$q] = $p + $s;
 								            $length += $p + $s - $q;
 								            $included[$key] = $p + 1;
 								          }
 								          else {
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								            unset($workkeys[$k]);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								          }
 								        }
 								        else {
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								          unset($workkeys[$k]);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								        }
 								      }
 								      else {
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								        unset($workkeys[$k]);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								      }
 								    }
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								  }
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								  // If we didn't find anything, return the beginning.
 								  if (count($ranges) == 0) {
 								    return truncate_utf8($text, 256) . ' ...';
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  }
 								  // Sort the text ranges by starting position.
 								  ksort($ranges);
 								  // Now we collapse overlapping text ranges into one. The sorting makes it O(n).
 								  $newranges = array();
 								  foreach ($ranges as $from2 => $to2) {
 								    if (!isset($from1)) {
 								      $from1 = $from2;
 								      $to1 = $to2;
 								      continue;
 								    }
 								    if ($from2 <= $to1) {
 								      $to1 = max($to1, $to2);
 								    }
 								    else {
 								      $newranges[$from1] = $to1;
 								      $from1 = $from2;
 								      $to1 = $to2;
 								    }
 								  }
 								  $newranges[$from1] = $to1;
 								  // Fetch text
 								  $out = array();
 								  foreach ($newranges as $from => $to) {
 								    $out[] = substr($text, $from, $to - $from);
 								  }
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								  $text = (isset($newranges[0]) ? '' : '... '). implode(' ... ', $out) .' ...';
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
 								  // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
-												- Fix search excerpt highlighter marking substrings of words too + small improvements

											
										
										
											2005-01-10 23:37:26 +00:00
+								  $text = preg_replace('/\b('. implode('|', $keys) .')\b/iu', '<strong>\0</strong>', $text);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								  return $text;
 								}
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								/**
 								 * @} End of "defgroup search".
 								 */
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								/**
 								 * Helper function for array_walk in search_except.
 								 */
-												- Make the search settings page more user-friendly.

											
										
										
											2005-01-11 09:41:49 +00:00
+								function _search_excerpt_replace(&$text) {
 								  $text = preg_quote($text, '/');
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								}
 								/**
 								 * Format a single result entry of a search query.
 								 *
 								 * Modules may implement hook_search_item() in order to override this default
 								 * function to display search results.
 								 *
 								 * @param $item
 								 *   A single search result as returned by hook_search(). The result should be
-												- Patch #14917 by UnConeD/Steven:

1) The different types of search, which used to be radio button options in the search form, are now subtabs of "search" (default "search/node"). This seems better from a UI point of view, but also has another advantage: modules which implement a custom search form (flexinode, project) can add it as a subtab of search. This means that all search forms will be located in the same place, and also without needing an extra api call to search.module.

2) The current code was a bit hackish, as the indexing of comments along with nodes was hardcoded in node.module. Instead, I created a nodeapi operation "update index" which allows modules to add more data for a node that is being indexed. Comments are now indexed using this mechanism and from comment.module, which is a lot cleaner.

3) The search results format was also hardcoded to include "N comments". I replaced this with a nodeapi operation "search result" and moved the comment code to comment.module where it belongs. This op is quite useful, as for example I also modified upload.module to add "N attachments" to a search result if any are present.

											
										
										
											2004-12-31 09:30:12 +00:00
+								 *   an array with keys "link", "title", "type", "user", "date", and "snippet".
 								 *   Optionally, "extra" can be an array of extra info to show along with the
 								 *   result.
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 * @param $type
-												Various additions, improvements and fixes to the search documentation.

											
										
										
											2005-01-11 06:49:11 +00:00
+								 *   The type of item found, such as "user" or "node".
 								 *
 								 * @ingroup themeable
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								 */
 								function theme_search_item($item, $type) {
 								  if (module_hook($type, 'search_item')) {
 								    $output = module_invoke($type, 'search_item', $item);
 								  }
 								  else {
-												- #18817: Clean up plain-text checking (see drupal-devel!)

											
										
										
											2005-03-31 09:25:33 +00:00
+								    $output = ' <dt class="title"><a href="'. check_url($item['link']) .'">'. check_plain($item['title']) .'</a></dt>';
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    $info = array();
 								    if ($item['type']) {
 								      $info[] = $item['type'];
 								    }
 								    if ($item['user']) {
 								      $info[] = $item['user'];
 								    }
 								    if ($item['date']) {
 								      $info[] = format_date($item['date'], 'small');
 								    }
-												- Patch #14917 by UnConeD/Steven:

1) The different types of search, which used to be radio button options in the search form, are now subtabs of "search" (default "search/node"). This seems better from a UI point of view, but also has another advantage: modules which implement a custom search form (flexinode, project) can add it as a subtab of search. This means that all search forms will be located in the same place, and also without needing an extra api call to search.module.

2) The current code was a bit hackish, as the indexing of comments along with nodes was hardcoded in node.module. Instead, I created a nodeapi operation "update index" which allows modules to add more data for a node that is being indexed. Comments are now indexed using this mechanism and from comment.module, which is a lot cleaner.

3) The search results format was also hardcoded to include "N comments". I replaced this with a nodeapi operation "search result" and moved the comment code to comment.module where it belongs. This op is quite useful, as for example I also modified upload.module to add "N attachments" to a search result if any are present.

											
										
										
											2004-12-31 09:30:12 +00:00
+								    if (is_array($item['extra'])) {
 								      $info = array_merge($info, $item['extra']);
-												- Patch #12232 by Steven/UnConed: search module improvements.

1) Clean up the text analyser: make it handle UTF-8 and all sorts of characters. The word splitter now does intelligent splitting into words and supports all Unicode characters. It has smart handling of acronyms, URLs, dates, ...

2) It now indexes the filtered output, which means it can take advantage of HTML tags. Meaningful tags (headers, strong, em, ...) are analysed and used to boost certain words scores. This has the side-effect of allowing the indexing of PHP nodes.

3) Link analyser for node links. The HTML analyser also checks for links. If they point to a node on the current site (handles path aliases) then the link's words are counted as part of the target node. This helps bring out commonly linked FAQs and answers to the top of the results.

4) Index comments along with the node. This means that the search can make a difference between a single node/comment about 'X' and a whole thread about 'X'. It also makes the search results much shorter and more relevant (before this patch, comments were even shown first).

5) We now keep track of total counts as well as a per item count for a word. This allows us to divide the word score by the total before adding up the scores for different words, and automatically makes noisewords have less influence than rare words. This dramatically improves the relevancy of multiword searches. This also makes the disadvantage of now using OR searching instead of AND searching less problematic.

6) Includes support for text preprocessors through a hook. This is required to index Chinese and Japanese, because these languages do not use spaces between words. An external utility can be used to split these into words through a simple wrapper module. Other uses could be spell checking (although it would have no UI).

7) Indexing is now regulated: only a certain amount of items will be indexed per cron run. This prevents PHP from running out of memory or timing out. This also makes the reindexing required for this patch automatic. I also added an index coverage estimate to the search admin screen.

8) Code cleanup! Moved all the search stuff from common.inc into search.module, rewired some hooks and simplified the functions used. The search form and results now also use valid XHTML and form_ functions. The search admin was moved from search/configure to admin/search for consistency.

9) Improved search output: we also show much more info per item: date, author, node type, amount of comments and a cool dynamic excerpt à la Google. The search form is now much more simpler and the help is only displayed as tips when no search results are found.

10) By moving all search logic to SQL, I was able to add a pager to the search results. This improves usability and performance dramatically.

											
										
										
											2004-10-31 03:03:27 +00:00
+								    }
 								    $output .= ' <dd>'. ($item['snippet'] ? '<p>'. $item['snippet'] . '</p>' : '') . '<p class="search-info">' . implode(' - ', $info) .'</p></dd>';
 								  }
 								  return $output;
 								}
-- Removed some cruft: left-over xxx_help_page() functions.

											
										
										
											2004-12-11 14:13:24 +00:00
+								?>