2009-08-29 12:43:18 +00:00
< ? php
// $Id$
/**
* @ file
* Search query extender and helper functions .
*/
/**
* Do a query on the full - text search index for a word or words .
*
* This function is normally only called by each module that supports the
* indexed search ( and thus , implements hook_update_index ()) .
*
* Results are retrieved in two logical passes . However , the two passes are
* joined together into a single query . And in the case of most simple
* queries the second pass is not even used .
*
* The first pass selects a set of all possible matches , which has the benefit
* of also providing the exact result set for simple " AND " or " OR " searches .
*
* The second portion of the query further refines this set by verifying
* advanced text conditions ( such as negative or phrase matches ) .
*
* The used query object has the tag 'search_$type' and can be further extended
* with hook_query_alter () .
*/
class SearchQuery extends SelectQueryExtender {
/**
* The search query that is used for searching .
*
* @ var string
*/
protected $searchExpression ;
/**
* Type of search .
*
* This maps to the value of the type column in search_index .
*
* @ var string
*/
protected $type ;
/**
* Positive and negative search keys .
*
* @ var array
*/
protected $keys = array ( 'positive' => array (), 'negative' => array ());
/**
* Indicates if the first pass query requires complex conditions ( LIKE ) .
*
* @ var boolean .
*/
protected $simple = TRUE ;
/**
* Conditions that are used for exact searches .
*
* This is always used for the second pass query but not for the first pass ,
* unless $this -> simple is FALSE .
*
* @ var DatabaseCondition
*/
protected $conditions ;
/**
* Indicates how many matches for a search query are necessary .
*
* @ var int
*/
protected $matches = 0 ;
/**
* Array of search words .
*
* These words have to match against { search_index } . word .
*
* @ var array
*/
protected $words = array ();
/**
* Multiplier for the normalized search score .
*
* This value is calculated by the first pass query and multiplied with the
* actual score of a specific word to make sure that the resulting calculated
* score is between 0 and 1.
*
* @ var float
*/
protected $normalize ;
/**
* Indicates if the first pass query has been executed .
*
* @ var boolean
*/
protected $executedFirstPass = FALSE ;
/**
* Stores score expressions .
*
* @ var array
*/
protected $scores = array ();
/**
* Stores arguments for score expressions .
*
* @ var array
*/
protected $scoresArguments = array ();
/**
* Total value of all the multipliers .
*
* @ var array
*/
protected $multiply = array ();
/**
* Search items for the given search query string and type .
*
* @ param $query
* A search query string , that can contain options .
* @ param $type
* The type of search , this maps to { search_index } . type .
* @ return
* The SearchQuery object .
*/
public function searchExpression ( $expression , $type ) {
$this -> searchExpression = $expression ;
$this -> type = $type ;
return $this ;
}
/**
* Apply a search option and remove it from the search query string .
*
* These options are in the form option : value , value2 , value3 .
*
* @ param $option
* Name of the option .
* @ param $column
* Name of the db column to which the value should be applied .
* @ return
* TRUE if at least a value for that option has been found , FALSE if not .
*/
public function setOption ( $option , $column ) {
if ( $values = search_expression_extract ( $this -> searchExpression , $option )) {
$or = db_or ();
foreach ( explode ( ',' , $values ) as $value ) {
$or -> condition ( $column , $value );
}
$this -> condition ( $or );
$this -> searchExpression = search_expression_insert ( $this -> searchExpression , $option );
return TRUE ;
}
return FALSE ;
}
/**
* Parse a search query into SQL conditions .
*
* We build two queries that matches the dataset bodies .
*/
protected function parseSearchExpression () {
// Matchs words optionally prefixed by a dash. A word in this case is
// something between two spaces, optionally quoted.
preg_match_all ( '/ (-?)("[^"]+"|[^" ]+)/i' , ' ' . $this -> searchExpression , $keywords , PREG_SET_ORDER );
if ( count ( $keywords ) == 0 ) {
return ;
}
// Classify tokens.
$or = FALSE ;
$warning = '' ;
foreach ( $keywords as $match ) {
$phrase = FALSE ;
// Strip off phrase quotes.
if ( $match [ 2 ]{ 0 } == '"' ) {
$match [ 2 ] = substr ( $match [ 2 ], 1 , - 1 );
$phrase = TRUE ;
$this -> simple = FALSE ;
}
// Simplify keyword according to indexing rules and external preprocessors.
$words = search_simplify ( $match [ 2 ]);
// Re-explode in case simplification added more words, except when
// matching a phrase.
$words = $phrase ? array ( $words ) : preg_split ( '/ /' , $words , - 1 , PREG_SPLIT_NO_EMPTY );
// Negative matches.
if ( $match [ 1 ] == '-' ) {
$this -> keys [ 'negative' ] = array_merge ( $this -> keys [ 'negative' ], $words );
}
// OR operator: instead of a single keyword, we store an array of all
// OR'd keywords.
elseif ( $match [ 2 ] == 'OR' && count ( $this -> keys [ 'positive' ])) {
$last = array_pop ( $this -> keys [ 'positive' ]);
// Starting a new OR?
if ( ! is_array ( $last )) {
$last = array ( $last );
}
$this -> keys [ 'positive' ][] = $last ;
$or = TRUE ;
continue ;
}
// AND operator: implied, so just ignore it.
elseif ( $match [ 2 ] == 'AND' || $match [ 2 ] == 'and' ) {
$warning = $match [ 2 ];
continue ;
}
// Plain keyword.
else {
if ( $match [ 2 ] == 'or' ) {
$warning = $match [ 2 ];
}
if ( $or ) {
// Add to last element (which is an array).
$this -> keys [ 'positive' ][ count ( $this -> keys [ 'positive' ]) - 1 ] = array_merge ( $this -> keys [ 'positive' ][ count ( $this -> keys [ 'positive' ]) - 1 ], $words );
}
else {
$this -> keys [ 'positive' ] = array_merge ( $this -> keys [ 'positive' ], $words );
}
}
$or = FALSE ;
}
// Convert keywords into SQL statements.
$this -> conditions = db_and ();
$simple_and = FALSE ;
$simple_or = FALSE ;
// Positive matches.
foreach ( $this -> keys [ 'positive' ] as $key ) {
// Group of ORed terms.
if ( is_array ( $key ) && count ( $key )) {
$simple_or = TRUE ;
$any = FALSE ;
$queryor = db_or ();
foreach ( $key as $or ) {
list ( $num_new_scores ) = $this -> parseWord ( $or );
$any |= $num_new_scores ;
$queryor -> condition ( 'd.data' , " % $or % " , 'LIKE' );
}
if ( count ( $queryor )) {
$this -> conditions -> condition ( $queryor );
// A group of OR keywords only needs to match once.
$this -> matches += ( $any > 0 );
}
}
// Single ANDed term.
else {
$simple_and = TRUE ;
list ( $num_new_scores , $num_valid_words ) = $this -> parseWord ( $key );
$this -> conditions -> condition ( 'd.data' , " % $key % " , 'LIKE' );
if ( ! $num_valid_words ) {
$this -> simple = FALSE ;
}
// Each AND keyword needs to match at least once.
$this -> matches += $num_new_scores ;
}
}
if ( $simple_and && $simple_or ) {
$this -> simple = FALSE ;
}
// Negative matches.
foreach ( $this -> keys [ 'negative' ] as $key ) {
$this -> conditions -> condition ( 'd.data' , " % $key % " , 'NOT LIKE' );
$this -> simple = FALSE ;
}
if ( $warning == 'or' ) {
drupal_set_message ( t ( 'Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.' ));
}
}
/**
* Helper function for parseQuery () .
*/
protected function parseWord ( $word ) {
$num_new_scores = 0 ;
$num_valid_words = 0 ;
// Determine the scorewords of this word/phrase.
$split = explode ( ' ' , $word );
foreach ( $split as $s ) {
$num = is_numeric ( $s );
if ( $num || drupal_strlen ( $s ) >= variable_get ( 'minimum_word_size' , 3 )) {
$s = $num ? (( int ) ltrim ( $s , '-0' )) : $s ;
if ( ! isset ( $this -> words [ $s ])) {
$this -> words [ $s ] = $s ;
$num_new_scores ++ ;
}
$num_valid_words ++ ;
}
}
// Return matching snippet and number of added words.
return array ( $num_new_scores , $num_valid_words );
}
/**
* Execute the first pass query .
*
* This can either be done explicitly , so that additional scores and
* conditions can be applied to the second pass query or implicitly by
* addScore () or execute () .
*
* @ return
* TRUE if search items exist , FALSE if not .
*/
public function executeFirstPass () {
$this -> parseSearchExpression ();
if ( count ( $this -> words ) == 0 ) {
form_set_error ( 'keys' , format_plural ( variable_get ( 'minimum_word_size' , 3 ), 'You must include at least one positive keyword with 1 character or more.' , 'You must include at least one positive keyword with @count characters or more.' ));
return FALSE ;
}
$this -> executedFirstPass = TRUE ;
if ( ! empty ( $this -> words )) {
$or = db_or ();
foreach ( $this -> words as $word ) {
$or -> condition ( 'i.word' , $word );
}
$this -> condition ( $or );
}
// Build query for keyword normalization.
$this -> join ( 'search_total' , 't' , 'i.word = t.word' );
$this
-> condition ( 'i.type' , $this -> type )
-> groupBy ( 'i.type' )
-> groupBy ( 'i.sid' )
-> having ( 'COUNT(*) >= :matches' , array ( ':matches' => $this -> matches ));
// Clone the query object to do the firstPass query;
$first = clone $this -> query ;
// For complex search queries, add the LIKE conditions to the first pass query.
if ( ! $this -> simple ) {
$first -> join ( 'search_dataset' , 'd' , 'i.sid = d.sid AND i.type = d.type' );
$first -> condition ( $this -> conditions );
}
// Calculate maximum keyword relevance, to normalize it.
$first -> addExpression ( 'SUM(i.score * t.count)' , 'calculated_score' );
$this -> normalize = $first
-> range ( 0 , 1 )
-> orderBy ( 'calculated_score' , 'DESC' )
-> execute ()
-> fetchField ();
if ( $this -> normalize ) {
return TRUE ;
}
return FALSE ;
}
/**
* Adds a custom score expression to the search query .
*
* Each score expression can optionally use a multiplicator and multiple
* expressions are combined .
*
* @ param $score
* The score expression .
* @ param $arguments
* Custom query arguments for that expression .
* @ param $multiply
* If set , the score is multiplied with that value . Search query ensures
* that the search scores are still normalized .
*/
public function addScore ( $score , $arguments = array (), $multiply = FALSE ) {
if ( $multiply ) {
$i = count ( $this -> multiply );
$score = " CAST(:multiply_ $i AS DECIMAL) * COALESCE(( " . $score . " ), 0) / CAST(:total_ $i AS DECIMAL) " ;
$arguments [ ':multiply_' . $i ] = $multiply ;
$this -> multiply [] = $multiply ;
}
$this -> scores [] = $score ;
$this -> scoresArguments += $arguments ;
return $this ;
}
/**
* Execute the search .
*
* If not already done , this executes the first pass query , then the complex
* conditions are applied to the query including score expressions and
* ordering .
*
* @ return
* FALSE if the first pass query returned no results and a database result
* set if not .
*/
public function execute ()
{
if ( ! $this -> executedFirstPass ) {
$this -> executeFirstPass ();
}
if ( ! $this -> normalize ) {
2010-01-08 07:43:55 +00:00
return new DatabaseStatementEmpty ();
2009-08-29 12:43:18 +00:00
}
$this -> join ( 'search_dataset' , 'd' , 'i.sid = d.sid AND i.type = d.type' );
$this -> condition ( $this -> conditions );
if ( empty ( $this -> scores )) {
// Add default score.
$this -> addScore ( 'i.relevance' );
}
if ( count ( $this -> getOrderBy ()) == 0 ) {
// Add default order.
$this -> orderBy ( 'calculated_score' , 'DESC' );
}
if ( count ( $this -> multiply )) {
// Add the total multiplicator as many times as requested to maintain
// normalization as far as possible.
$i = 0 ;
$sum = array_sum ( $this -> multiply );
foreach ( $this -> multiply as $total ) {
$this -> scoresArguments [ 'total_' . $i ] = $sum ;
}
}
// Replace i.relevance pseudo-field with the actual, normalized value.
$this -> scores = str_replace ( 'i.relevance' , '(' . ( 1.0 / $this -> normalize ) . ' * i.score * t.count)' , $this -> scores );
// Convert scores to an expression.
$this -> addExpression ( 'SUM(' . implode ( ' + ' , $this -> scores ) . ')' , 'calculated_score' , $this -> scoresArguments );
// Add tag and useful metadata.
$this
-> addTag ( 'search_' . $this -> type )
-> addMetaData ( 'normalize' , $this -> normalize )
-> fields ( 'i' , array ( 'type' , 'sid' ));
return $this -> query -> execute ();
}
2010-01-13 23:19:54 +00:00
}