mirror of https://github.com/milvus-io/milvus.git
enhance: add materialized view search info (#30888)
issue: #29892 This PR 1. Pass Materialized View (MV) search information obtained from the expression parsing planning procedure to Knowhere. It only performs when MV is enabled and the partition key is involved in the expression. The search information includes: 1. Touched field_id and the count of related categories in the expression. E.g., `color == red && color == blue` yields `field_id -> 2`. 2. Whether the expression only includes AND (&&) logical operator, default `true`. 3. Whether the expression has NOT (!) operator, default `false`. 4. Store if turning on MV on the proxy to eliminate reading from paramtable for every search request. 5. Renames to MV. ## Rebuttals 1. Did not write in `ExtractInfoPlanNodeVisitor` since the new scalar framework was introduced and this part might be removed in the future. 2. Currently only interested in `==` and `in` expression, `string` data type, anything else is a bonus. 3. Leave handling expressions like `F == A || F == A` for future works of the optimizer. ## Detailed MV Info  Signed-off-by: Patrick Weizhi Xu <weizhi.xu@zilliz.com>pull/31003/head
parent
c81909bfab
commit
982dd2834b
|
@ -32,6 +32,7 @@ struct SearchInfo {
|
|||
knowhere::Json search_params_;
|
||||
std::optional<FieldId> group_by_field_id_;
|
||||
tracer::TraceContext trace_ctx_;
|
||||
bool materialized_view_involved = false;
|
||||
};
|
||||
|
||||
using SearchInfoPtr = std::shared_ptr<SearchInfo>;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/Exception.h"
|
||||
#include "common/Schema.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/Utils.h"
|
||||
|
@ -29,6 +30,86 @@
|
|||
namespace milvus {
|
||||
namespace expr {
|
||||
|
||||
// Collect information from expressions
|
||||
struct ExprInfo {
|
||||
struct GenericValueEqual {
|
||||
using GenericValue = proto::plan::GenericValue;
|
||||
bool
|
||||
operator()(const GenericValue& lhs, const GenericValue& rhs) const {
|
||||
if (lhs.val_case() != rhs.val_case())
|
||||
return false;
|
||||
switch (lhs.val_case()) {
|
||||
case GenericValue::kBoolVal:
|
||||
return lhs.bool_val() == rhs.bool_val();
|
||||
case GenericValue::kInt64Val:
|
||||
return lhs.int64_val() == rhs.int64_val();
|
||||
case GenericValue::kFloatVal:
|
||||
return lhs.float_val() == rhs.float_val();
|
||||
case GenericValue::kStringVal:
|
||||
return lhs.string_val() == rhs.string_val();
|
||||
case GenericValue::VAL_NOT_SET:
|
||||
return true;
|
||||
default:
|
||||
throw NotImplementedException(
|
||||
"Not supported GenericValue type");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct GenericValueHasher {
|
||||
using GenericValue = proto::plan::GenericValue;
|
||||
std::size_t
|
||||
operator()(const GenericValue& value) const {
|
||||
std::size_t h = 0;
|
||||
switch (value.val_case()) {
|
||||
case GenericValue::kBoolVal:
|
||||
h = std::hash<bool>()(value.bool_val());
|
||||
break;
|
||||
case GenericValue::kInt64Val:
|
||||
h = std::hash<int64_t>()(value.int64_val());
|
||||
break;
|
||||
case GenericValue::kFloatVal:
|
||||
h = std::hash<float>()(value.float_val());
|
||||
break;
|
||||
case GenericValue::kStringVal:
|
||||
h = std::hash<std::string>()(value.string_val());
|
||||
break;
|
||||
case GenericValue::VAL_NOT_SET:
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException(
|
||||
"Not supported GenericValue type");
|
||||
}
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
/* For Materialized View (vectors and scalars), that is when performing filtered search. */
|
||||
// The map describes which scalar field is involved during search,
|
||||
// and the set of category values
|
||||
// for example, if we have scalar field `color` with field id `111` and it has three categories: red, green, blue
|
||||
// expression `color == "red"`, yields `111 -> (red)`
|
||||
// expression `color == "red" && color == "green"`, yields `111 -> (red, green)`
|
||||
std::unordered_map<int64_t,
|
||||
std::unordered_set<proto::plan::GenericValue,
|
||||
GenericValueHasher,
|
||||
GenericValueEqual>>
|
||||
field_id_to_values;
|
||||
// whether the search exression has AND (&&) logical operator only
|
||||
bool is_pure_and = true;
|
||||
// whether the search expression has NOT (!) logical unary operator
|
||||
bool has_not = false;
|
||||
};
|
||||
|
||||
inline bool
|
||||
IsMaterializedViewSupported(const DataType& data_type) {
|
||||
return data_type == DataType::BOOL || data_type == DataType::INT8 ||
|
||||
data_type == DataType::INT16 || data_type == DataType::INT32 ||
|
||||
data_type == DataType::INT64 || data_type == DataType::FLOAT ||
|
||||
data_type == DataType::DOUBLE || data_type == DataType::VARCHAR ||
|
||||
data_type == DataType::STRING;
|
||||
}
|
||||
|
||||
struct ColumnInfo {
|
||||
FieldId field_id_;
|
||||
DataType data_type_;
|
||||
|
@ -111,6 +192,9 @@ class ITypeExpr {
|
|||
return inputs_;
|
||||
}
|
||||
|
||||
virtual void
|
||||
GatherInfo(ExprInfo& info) const {};
|
||||
|
||||
protected:
|
||||
DataType type_;
|
||||
std::vector<std::shared_ptr<const ITypeExpr>> inputs_;
|
||||
|
@ -235,6 +319,31 @@ class UnaryRangeFilterExpr : public ITypeFilterExpr {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
void
|
||||
GatherInfo(ExprInfo& info) const override {
|
||||
if (IsMaterializedViewSupported(column_.data_type_)) {
|
||||
info.field_id_to_values[column_.field_id_.get()].insert(val_);
|
||||
|
||||
// for expression `Field == Value`, we do nothing else
|
||||
if (op_type_ == proto::plan::OpType::Equal) {
|
||||
return;
|
||||
}
|
||||
|
||||
// for expression `Field != Value`, we consider it equivalent
|
||||
// as `not (Field == Value)`, so we set `has_not` to true
|
||||
if (op_type_ == proto::plan::OpType::NotEqual) {
|
||||
info.has_not = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// for other unary range filter <, >, <=, >=
|
||||
// we add a dummy value to indicate multiple values
|
||||
// this double insertion is intentional and the default GenericValue
|
||||
// will be considered as equal in the unordered_set
|
||||
info.field_id_to_values[column_.field_id_.get()].emplace();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
const ColumnInfo column_;
|
||||
const proto::plan::OpType op_type_;
|
||||
|
@ -293,6 +402,15 @@ class LogicalUnaryExpr : public ITypeFilterExpr {
|
|||
inputs_[0]->ToString());
|
||||
}
|
||||
|
||||
void
|
||||
GatherInfo(ExprInfo& info) const override {
|
||||
if (op_type_ == OpType::LogicalNot) {
|
||||
info.has_not = true;
|
||||
}
|
||||
assert(inputs_.size() == 1);
|
||||
inputs_[0]->GatherInfo(info);
|
||||
}
|
||||
|
||||
const OpType op_type_;
|
||||
};
|
||||
|
||||
|
@ -323,6 +441,14 @@ class TermFilterExpr : public ITypeFilterExpr {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
void
|
||||
GatherInfo(ExprInfo& info) const override {
|
||||
if (IsMaterializedViewSupported(column_.data_type_)) {
|
||||
info.field_id_to_values[column_.field_id_.get()].insert(
|
||||
vals_.begin(), vals_.end());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
const ColumnInfo column_;
|
||||
const std::vector<proto::plan::GenericValue> vals_;
|
||||
|
@ -368,6 +494,16 @@ class LogicalBinaryExpr : public ITypeFilterExpr {
|
|||
return GetOpTypeString();
|
||||
}
|
||||
|
||||
void
|
||||
GatherInfo(ExprInfo& info) const override {
|
||||
if (op_type_ == OpType::Or) {
|
||||
info.is_pure_and = false;
|
||||
}
|
||||
assert(inputs_.size() == 2);
|
||||
inputs_[0]->GatherInfo(info);
|
||||
inputs_[1]->GatherInfo(info);
|
||||
}
|
||||
|
||||
public:
|
||||
const OpType op_type_;
|
||||
};
|
||||
|
@ -400,6 +536,14 @@ class BinaryRangeFilterExpr : public ITypeFilterExpr {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
void
|
||||
GatherInfo(ExprInfo& info) const override {
|
||||
if (IsMaterializedViewSupported(column_.data_type_)) {
|
||||
info.field_id_to_values[column_.field_id_.get()].insert(lower_val_);
|
||||
info.field_id_to_values[column_.field_id_.get()].insert(upper_val_);
|
||||
}
|
||||
}
|
||||
|
||||
const ColumnInfo column_;
|
||||
const proto::plan::GenericValue lower_val_;
|
||||
const proto::plan::GenericValue upper_val_;
|
||||
|
|
|
@ -63,6 +63,11 @@ class PlanNode {
|
|||
virtual std::string_view
|
||||
name() const = 0;
|
||||
|
||||
virtual expr::ExprInfo
|
||||
GatherInfo() const {
|
||||
return {};
|
||||
};
|
||||
|
||||
private:
|
||||
PlanNodeId id_;
|
||||
};
|
||||
|
@ -243,6 +248,13 @@ class FilterBitsNode : public PlanNode {
|
|||
filter_->ToString());
|
||||
}
|
||||
|
||||
expr::ExprInfo
|
||||
GatherInfo() const override {
|
||||
expr::ExprInfo info;
|
||||
filter_->GatherInfo(info);
|
||||
return info;
|
||||
}
|
||||
|
||||
private:
|
||||
const std::vector<PlanNodePtr> sources_;
|
||||
const expr::TypedExprPtr filter_;
|
||||
|
|
|
@ -202,6 +202,8 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
|
|||
search_info.round_decimal_ = query_info_proto.round_decimal();
|
||||
search_info.search_params_ =
|
||||
nlohmann::json::parse(query_info_proto.search_params());
|
||||
search_info.materialized_view_involved =
|
||||
query_info_proto.materialized_view_involved();
|
||||
|
||||
if (query_info_proto.group_by_field_id() > 0) {
|
||||
auto group_by_field_id = FieldId(query_info_proto.group_by_field_id());
|
||||
|
|
|
@ -11,8 +11,10 @@
|
|||
|
||||
#include "query/generated/ExecPlanNodeVisitor.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "expr/ITypeExpr.h"
|
||||
#include "query/PlanImpl.h"
|
||||
#include "query/SubSearchResult.h"
|
||||
#include "query/generated/ExecExprVisitor.h"
|
||||
|
@ -24,7 +26,7 @@
|
|||
#include "exec/Task.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "query/GroupByOperator.h"
|
||||
|
||||
#include "knowhere/comp/materialized_view.h"
|
||||
namespace milvus::query {
|
||||
|
||||
namespace impl {
|
||||
|
@ -139,6 +141,11 @@ ExecPlanNodeVisitor::ExecuteExprNodeInternal(
|
|||
// std::cout << bitset_holder->size() << " . " << s << std::endl;
|
||||
}
|
||||
|
||||
expr::ExprInfo
|
||||
GatherInfoBasedOnExpr(const std::shared_ptr<milvus::plan::PlanNode>& node) {
|
||||
return node->GatherInfo();
|
||||
}
|
||||
|
||||
template <typename VectorType>
|
||||
void
|
||||
ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
|
||||
|
@ -165,6 +172,22 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
|
|||
|
||||
std::unique_ptr<BitsetType> bitset_holder;
|
||||
if (node.filter_plannode_.has_value()) {
|
||||
if (node.search_info_.materialized_view_involved) {
|
||||
knowhere::MaterializedViewSearchInfo materialized_view_search_info;
|
||||
const auto expr_info =
|
||||
GatherInfoBasedOnExpr(node.filter_plannode_.value());
|
||||
for (const auto& [field_id, vals] : expr_info.field_id_to_values) {
|
||||
materialized_view_search_info
|
||||
.field_id_to_touched_categories_cnt[field_id] = vals.size();
|
||||
}
|
||||
materialized_view_search_info.is_pure_and = expr_info.is_pure_and;
|
||||
materialized_view_search_info.has_not = expr_info.has_not;
|
||||
|
||||
node.search_info_
|
||||
.search_params_[knowhere::meta::MATERIALIZED_VIEW_SEARCH_INFO] =
|
||||
materialized_view_search_info;
|
||||
}
|
||||
|
||||
BitsetType expr_res;
|
||||
ExecuteExprNode(
|
||||
node.filter_plannode_.value(), segment, active_count, expr_res);
|
||||
|
|
|
@ -25,6 +25,7 @@ set(MILVUS_TEST_FILES
|
|||
test_common.cpp
|
||||
test_concurrent_vector.cpp
|
||||
test_c_api.cpp
|
||||
test_expr_materialized_view.cpp
|
||||
test_expr.cpp
|
||||
test_float16.cpp
|
||||
test_growing.cpp
|
||||
|
|
|
@ -0,0 +1,975 @@
|
|||
// Copyright (C) 2019-2024 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <fmt/core.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common/FieldDataInterface.h"
|
||||
#include "common/Schema.h"
|
||||
#include "common/Types.h"
|
||||
#include "expr/ITypeExpr.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
#include "knowhere/comp/materialized_view.h"
|
||||
#include "knowhere/config.h"
|
||||
#include "query/Plan.h"
|
||||
#include "query/PlanImpl.h"
|
||||
#include "query/generated/ExecPlanNodeVisitor.h"
|
||||
#include "plan/PlanNode.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
|
||||
using DataType = milvus::DataType;
|
||||
using Schema = milvus::Schema;
|
||||
using FieldName = milvus::FieldName;
|
||||
using FieldId = milvus::FieldId;
|
||||
|
||||
namespace {
|
||||
|
||||
// DataType::String is not supported in this test
|
||||
const std::unordered_map<DataType, std::string> kDataTypeFieldName = {
|
||||
{DataType::VECTOR_FLOAT, "VectorFloatField"},
|
||||
{DataType::BOOL, "BoolField"},
|
||||
{DataType::INT8, "Int8Field"},
|
||||
{DataType::INT16, "Int16Field"},
|
||||
{DataType::INT32, "Int32Field"},
|
||||
{DataType::INT64, "Int64Field"},
|
||||
{DataType::FLOAT, "FloatField"},
|
||||
{DataType::DOUBLE, "DoubleField"},
|
||||
{DataType::VARCHAR, "VarCharField"},
|
||||
{DataType::JSON, "JSONField"},
|
||||
};
|
||||
|
||||
// use field name to get schema pb string
|
||||
std::string
|
||||
GetDataTypeSchemapbStr(const DataType& data_type) {
|
||||
if (kDataTypeFieldName.find(data_type) == kDataTypeFieldName.end()) {
|
||||
throw std::runtime_error("GetDataTypeSchemapbStr: Invalid data type " +
|
||||
std::to_string(static_cast<int>(data_type)));
|
||||
}
|
||||
|
||||
std::string str = kDataTypeFieldName.at(data_type);
|
||||
str.erase(str.find("Field"), 5);
|
||||
return str;
|
||||
}
|
||||
|
||||
constexpr size_t kFieldIdToTouchedCategoriesCntDefault = 0;
|
||||
constexpr bool kIsPureAndDefault = true;
|
||||
constexpr bool kHasNotDefault = false;
|
||||
|
||||
const std::string kFieldIdPlaceholder = "FID";
|
||||
const std::string kVecFieldIdPlaceholder = "VEC_FID";
|
||||
const std::string kDataTypePlaceholder = "DT";
|
||||
const std::string kValPlaceholder = "VAL";
|
||||
const std::string kPredicatePlaceholder = "PREDICATE_PLACEHOLDER";
|
||||
} // namespace
|
||||
|
||||
class ExprMaterializedViewTest : public testing::Test {
|
||||
public:
|
||||
// NOTE: If your test fixture defines SetUpTestSuite() or TearDownTestSuite()
|
||||
// they must be declared public rather than protected in order to use TEST_P.
|
||||
// https://google.github.io/googletest/advanced.html#value-parameterized-tests
|
||||
static void
|
||||
SetUpTestSuite() {
|
||||
// create schema and assign field_id
|
||||
schema = std::make_shared<Schema>();
|
||||
for (const auto& [data_type, field_name] : kDataTypeFieldName) {
|
||||
if (data_type == DataType::VECTOR_FLOAT) {
|
||||
schema->AddDebugField(
|
||||
field_name, data_type, kDim, knowhere::metric::L2);
|
||||
} else {
|
||||
schema->AddDebugField(field_name, data_type);
|
||||
}
|
||||
data_field_info[data_type].field_id =
|
||||
schema->get_field_id(FieldName(field_name)).get();
|
||||
std::cout << field_name << " with id "
|
||||
<< data_field_info[data_type].field_id << std::endl;
|
||||
}
|
||||
|
||||
// generate data and prepare for search
|
||||
gen_data = std::make_unique<milvus::segcore::GeneratedData>(
|
||||
milvus::segcore::DataGen(schema, N));
|
||||
segment = milvus::segcore::CreateSealedSegment(schema);
|
||||
auto fields = schema->get_fields();
|
||||
milvus::segcore::SealedLoadFieldData(*gen_data, *segment);
|
||||
exec_plan_node_visitor =
|
||||
std::make_unique<milvus::query::ExecPlanNodeVisitor>(
|
||||
*segment, milvus::MAX_TIMESTAMP);
|
||||
|
||||
// prepare plan template
|
||||
plan_template = R"(vector_anns: <
|
||||
field_id: VEC_FID
|
||||
predicates: <
|
||||
PREDICATE_PLACEHOLDER
|
||||
>
|
||||
query_info: <
|
||||
topk: 1
|
||||
round_decimal: 3
|
||||
metric_type: "L2"
|
||||
search_params: "{\"nprobe\": 1}"
|
||||
>
|
||||
placeholder_tag: "$0">)";
|
||||
const int64_t vec_field_id =
|
||||
data_field_info[DataType::VECTOR_FLOAT].field_id;
|
||||
ReplaceAllOccurrence(plan_template,
|
||||
kVecFieldIdPlaceholder,
|
||||
std::to_string(vec_field_id));
|
||||
|
||||
// collect mv supported data type
|
||||
numeric_str_scalar_data_types.clear();
|
||||
for (const auto& e : kDataTypeFieldName) {
|
||||
if (e.first != DataType::VECTOR_FLOAT &&
|
||||
e.first != DataType::JSON) {
|
||||
numeric_str_scalar_data_types.insert(e.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
TearDownTestSuite() {
|
||||
}
|
||||
|
||||
protected:
|
||||
// this function takes an predicate string in schemapb format
|
||||
// and return a vector search plan
|
||||
std::unique_ptr<milvus::query::Plan>
|
||||
CreatePlan(const std::string& predicate_str) {
|
||||
auto plan_str = InterpolateTemplate(predicate_str);
|
||||
auto binary_plan = milvus::segcore::translate_text_plan_to_binary_plan(
|
||||
plan_str.c_str());
|
||||
return milvus::query::CreateSearchPlanByExpr(
|
||||
*schema, binary_plan.data(), binary_plan.size());
|
||||
}
|
||||
|
||||
knowhere::MaterializedViewSearchInfo
|
||||
ExecutePlan(const std::unique_ptr<milvus::query::Plan>& plan) {
|
||||
auto ph_group_raw = milvus::segcore::CreatePlaceholderGroup(1, kDim);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
segment->Search(plan.get(), ph_group.get(), milvus::MAX_TIMESTAMP);
|
||||
return plan->plan_node_->search_info_
|
||||
.search_params_[knowhere::meta::MATERIALIZED_VIEW_SEARCH_INFO];
|
||||
}
|
||||
|
||||
// replace field id, data type and scalar value in a single expr schemapb plan
|
||||
std::string
|
||||
InterpolateSingleExpr(const std::string& expr_in,
|
||||
const DataType& data_type) {
|
||||
std::string expr = expr_in;
|
||||
const int64_t field_id = data_field_info[data_type].field_id;
|
||||
ReplaceAllOccurrence(
|
||||
expr, kFieldIdPlaceholder, std::to_string(field_id));
|
||||
ReplaceAllOccurrence(
|
||||
expr, kDataTypePlaceholder, GetDataTypeSchemapbStr(data_type));
|
||||
|
||||
// The user can use value placeholder and numeric values after it to distinguish different values
|
||||
// eg. VAL1, VAL2, VAL3 should be replaced with different values of the same data type
|
||||
std::regex pattern("VAL(\\d+)");
|
||||
std::string replacement = "";
|
||||
while (std::regex_search(expr, pattern)) {
|
||||
switch (data_type) {
|
||||
case DataType::BOOL:
|
||||
ReplaceAllOccurrence(expr, "VAL0", "bool_val:false");
|
||||
ReplaceAllOccurrence(expr, "VAL1", "bool_val:true");
|
||||
break;
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32:
|
||||
case DataType::INT64:
|
||||
replacement = "int64_val:$1";
|
||||
expr = std::regex_replace(expr, pattern, replacement);
|
||||
break;
|
||||
case DataType::FLOAT:
|
||||
case DataType::DOUBLE:
|
||||
replacement = "float_val:$1";
|
||||
expr = std::regex_replace(expr, pattern, replacement);
|
||||
break;
|
||||
case DataType::VARCHAR:
|
||||
replacement = "string_val:\"str$1\"";
|
||||
expr = std::regex_replace(expr, pattern, replacement);
|
||||
break;
|
||||
case DataType::JSON:
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error(
|
||||
"InterpolateSingleExpr: Invalid data type " +
|
||||
fmt::format("{}", data_type));
|
||||
}
|
||||
|
||||
// fmt::print("expr {} data_type {}\n", expr, data_type);
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
knowhere::MaterializedViewSearchInfo
|
||||
TranslateThenExecuteWhenMvInolved(const std::string& predicate_str) {
|
||||
auto plan = CreatePlan(predicate_str);
|
||||
plan->plan_node_->search_info_.materialized_view_involved = true;
|
||||
return ExecutePlan(plan);
|
||||
}
|
||||
|
||||
knowhere::MaterializedViewSearchInfo
|
||||
TranslateThenExecuteWhenMvNotInolved(const std::string& predicate_str) {
|
||||
auto plan = CreatePlan(predicate_str);
|
||||
plan->plan_node_->search_info_.materialized_view_involved = false;
|
||||
return ExecutePlan(plan);
|
||||
}
|
||||
|
||||
static const std::unordered_set<DataType>&
|
||||
GetNumericAndVarcharScalarDataTypes() {
|
||||
return numeric_str_scalar_data_types;
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetFieldID(const DataType& data_type) {
|
||||
if (data_field_info.find(data_type) == data_field_info.end()) {
|
||||
throw std::runtime_error("Invalid data type " +
|
||||
fmt::format("{}", data_type));
|
||||
}
|
||||
|
||||
return data_field_info[data_type].field_id;
|
||||
}
|
||||
|
||||
void
|
||||
TestMvExpectDefault(knowhere::MaterializedViewSearchInfo& mv) {
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt.size(),
|
||||
kFieldIdToTouchedCategoriesCntDefault);
|
||||
EXPECT_EQ(mv.is_pure_and, kIsPureAndDefault);
|
||||
EXPECT_EQ(mv.has_not, kHasNotDefault);
|
||||
}
|
||||
|
||||
static void
|
||||
ReplaceAllOccurrence(std::string& str,
|
||||
const std::string& occ,
|
||||
const std::string& replace) {
|
||||
str = std::regex_replace(str, std::regex(occ), replace);
|
||||
}
|
||||
|
||||
private:
|
||||
std::string
|
||||
InterpolateTemplate(const std::string& predicate_str) {
|
||||
std::string plan_str = plan_template;
|
||||
ReplaceAllOccurrence(plan_str, kPredicatePlaceholder, predicate_str);
|
||||
return plan_str;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct DataFieldInfo {
|
||||
std::string field_name;
|
||||
int64_t field_id;
|
||||
};
|
||||
|
||||
static std::shared_ptr<Schema> schema;
|
||||
static std::unordered_map<DataType, DataFieldInfo> data_field_info;
|
||||
|
||||
private:
|
||||
static std::unique_ptr<milvus::segcore::GeneratedData> gen_data;
|
||||
static milvus::segcore::SegmentSealedUPtr segment;
|
||||
static std::unique_ptr<milvus::query::ExecPlanNodeVisitor>
|
||||
exec_plan_node_visitor;
|
||||
static std::unordered_set<DataType> numeric_str_scalar_data_types;
|
||||
static std::string plan_template;
|
||||
|
||||
constexpr static size_t N = 1000;
|
||||
constexpr static size_t kDim = 16;
|
||||
};
|
||||
|
||||
std::unordered_map<DataType, ExprMaterializedViewTest::DataFieldInfo>
|
||||
ExprMaterializedViewTest::data_field_info = {};
|
||||
std::shared_ptr<Schema> ExprMaterializedViewTest::schema = nullptr;
|
||||
std::unique_ptr<milvus::segcore::GeneratedData>
|
||||
ExprMaterializedViewTest::gen_data = nullptr;
|
||||
milvus::segcore::SegmentSealedUPtr ExprMaterializedViewTest::segment = nullptr;
|
||||
std::unique_ptr<milvus::query::ExecPlanNodeVisitor>
|
||||
ExprMaterializedViewTest::exec_plan_node_visitor = nullptr;
|
||||
std::unordered_set<DataType>
|
||||
ExprMaterializedViewTest::numeric_str_scalar_data_types = {};
|
||||
std::string ExprMaterializedViewTest::plan_template = "";
|
||||
|
||||
/*************** Test Cases Start ***************/
|
||||
|
||||
// Test plan without expr
|
||||
// Should return default values
|
||||
TEST_F(ExprMaterializedViewTest, TestMvNoExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
for (const auto& mv_involved : {true, false}) {
|
||||
std::string plan_str = R"(vector_anns: <
|
||||
field_id: VEC_FID
|
||||
query_info: <
|
||||
topk: 1
|
||||
round_decimal: 3
|
||||
metric_type: "L2"
|
||||
search_params: "{\"nprobe\": 1}"
|
||||
>
|
||||
placeholder_tag: "$0">)";
|
||||
const int64_t vec_field_id =
|
||||
data_field_info[DataType::VECTOR_FLOAT].field_id;
|
||||
ReplaceAllOccurrence(
|
||||
plan_str, kVecFieldIdPlaceholder, std::to_string(vec_field_id));
|
||||
auto binary_plan =
|
||||
milvus::segcore::translate_text_plan_to_binary_plan(
|
||||
plan_str.c_str());
|
||||
auto plan = milvus::query::CreateSearchPlanByExpr(
|
||||
*schema, binary_plan.data(), binary_plan.size());
|
||||
plan->plan_node_->search_info_.materialized_view_involved =
|
||||
mv_involved;
|
||||
auto mv = ExecutePlan(plan);
|
||||
TestMvExpectDefault(mv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ExprMaterializedViewTest, TestMvNotInvolvedExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
term_expr: <
|
||||
column_info: <
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values: < VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto plan = CreatePlan(predicate);
|
||||
plan->plan_node_->search_info_.materialized_view_involved = false;
|
||||
auto mv = ExecutePlan(plan);
|
||||
TestMvExpectDefault(mv);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ExprMaterializedViewTest, TestMvNotInvolvedJsonExpr) {
|
||||
std::string predicate =
|
||||
InterpolateSingleExpr(
|
||||
R"( json_contains_expr:<column_info:<field_id:FID data_type:DT nested_path:"A" > )",
|
||||
DataType::JSON) +
|
||||
InterpolateSingleExpr(
|
||||
R"( elements:<VAL1> op:Contains elements_same_type:true>)",
|
||||
DataType::INT64);
|
||||
auto plan = CreatePlan(predicate);
|
||||
plan->plan_node_->search_info_.materialized_view_involved = false;
|
||||
auto mv = ExecutePlan(plan);
|
||||
TestMvExpectDefault(mv);
|
||||
}
|
||||
|
||||
// Test json_contains
|
||||
TEST_F(ExprMaterializedViewTest, TestJsonContainsExpr) {
|
||||
std::string predicate =
|
||||
InterpolateSingleExpr(
|
||||
R"( json_contains_expr:<column_info:<field_id:FID data_type:DT nested_path:"A" > )",
|
||||
DataType::JSON) +
|
||||
InterpolateSingleExpr(
|
||||
R"( elements:<VAL1> op:Contains elements_same_type:true>)",
|
||||
DataType::INT64);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
TestMvExpectDefault(mv);
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F0 in [A]
|
||||
TEST_F(ExprMaterializedViewTest, TestInExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
// fmt::print("Predicate: {}\n", predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F0 in [A, A, A]
|
||||
TEST_F(ExprMaterializedViewTest, TestInDuplicatesExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL1 >
|
||||
values:< VAL1 >
|
||||
values:< VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
// fmt::print("Predicate: {}\n", predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F0 not in [A]
|
||||
TEST_F(ExprMaterializedViewTest, TestUnaryLogicalNotInExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
unary_expr:<
|
||||
op:Not
|
||||
child: <
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL1 >
|
||||
>
|
||||
>
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1);
|
||||
EXPECT_EQ(mv.has_not, true);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F0 == A
|
||||
TEST_F(ExprMaterializedViewTest, TestUnaryRangeEqualExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F0 != A
|
||||
TEST_F(ExprMaterializedViewTest, TestUnaryRangeNotEqualExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op: NotEqual
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1);
|
||||
EXPECT_EQ(mv.has_not, true);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F0 < A, F0 <= A, F0 > A, F0 >= A
|
||||
TEST_F(ExprMaterializedViewTest, TestUnaryRangeCompareExpr) {
|
||||
const std::vector<std::string> ops = {
|
||||
"LessThan", "LessEqual", "GreaterThan", "GreaterEqual"};
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
for (const auto& ops_str : ops) {
|
||||
std::string predicate = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op: )" + ops_str +
|
||||
R"(
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F in [A, B, C]
|
||||
TEST_F(ExprMaterializedViewTest, TestInMultipleExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL0 >
|
||||
values:< VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test numeric and varchar expr: F0 not in [A]
|
||||
TEST_F(ExprMaterializedViewTest, TestUnaryLogicalNotInMultipleExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
unary_expr:<
|
||||
op:Not
|
||||
child: <
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL0 >
|
||||
values:< VAL1 >
|
||||
>
|
||||
>
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2);
|
||||
EXPECT_EQ(mv.has_not, true);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test expr: F0 == A && F1 == B
|
||||
TEST_F(ExprMaterializedViewTest, TestEqualAndEqualExpr) {
|
||||
const DataType c0_data_type = DataType::VARCHAR;
|
||||
const DataType c1_data_type = DataType::INT32;
|
||||
std::string c0 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
c0 = InterpolateSingleExpr(c0, c0_data_type);
|
||||
std::string c1 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL2 >
|
||||
>
|
||||
)";
|
||||
c1 = InterpolateSingleExpr(c1, c1_data_type);
|
||||
std::string predicate = R"(
|
||||
binary_expr:<
|
||||
op:LogicalAnd
|
||||
left: <)" + c0 +
|
||||
R"(>
|
||||
right: <)" + c1 +
|
||||
R"(>
|
||||
>
|
||||
)";
|
||||
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
|
||||
// Test expr: F0 == A && F1 in [A, B]
|
||||
TEST_F(ExprMaterializedViewTest, TestEqualAndInExpr) {
|
||||
const DataType c0_data_type = DataType::VARCHAR;
|
||||
const DataType c1_data_type = DataType::INT32;
|
||||
|
||||
std::string c0 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
c0 = InterpolateSingleExpr(c0, c0_data_type);
|
||||
std::string c1 = R"(
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL1 >
|
||||
values:< VAL2 >
|
||||
>
|
||||
)";
|
||||
c1 = InterpolateSingleExpr(c1, c1_data_type);
|
||||
std::string predicate = R"(
|
||||
binary_expr:<
|
||||
op:LogicalAnd
|
||||
left: <)" + c0 +
|
||||
R"(>
|
||||
right: <)" + c1 +
|
||||
R"(>
|
||||
>
|
||||
)";
|
||||
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)],
|
||||
2);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
|
||||
// Test expr: F0 == A && F1 not in [A, B]
|
||||
TEST_F(ExprMaterializedViewTest, TestEqualAndNotInExpr) {
|
||||
const DataType c0_data_type = DataType::VARCHAR;
|
||||
const DataType c1_data_type = DataType::INT32;
|
||||
|
||||
std::string c0 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
c0 = InterpolateSingleExpr(c0, c0_data_type);
|
||||
std::string c1 = R"(
|
||||
unary_expr:<
|
||||
op:Not
|
||||
child: <
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL1 >
|
||||
values:< VAL2 >
|
||||
>
|
||||
>
|
||||
>
|
||||
)";
|
||||
c1 = InterpolateSingleExpr(c1, c1_data_type);
|
||||
std::string predicate = R"(
|
||||
binary_expr:<
|
||||
op:LogicalAnd
|
||||
left: <)" + c0 +
|
||||
R"(>
|
||||
right: <)" + c1 +
|
||||
R"(>
|
||||
>
|
||||
)";
|
||||
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)],
|
||||
2);
|
||||
EXPECT_EQ(mv.has_not, true);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
|
||||
// Test expr: F0 == A || F1 == B
|
||||
TEST_F(ExprMaterializedViewTest, TestEqualOrEqualExpr) {
|
||||
const DataType c0_data_type = DataType::VARCHAR;
|
||||
const DataType c1_data_type = DataType::INT32;
|
||||
|
||||
std::string c0 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
c0 = InterpolateSingleExpr(c0, c0_data_type);
|
||||
std::string c1 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL2 >
|
||||
>
|
||||
)";
|
||||
c1 = InterpolateSingleExpr(c1, c1_data_type);
|
||||
std::string predicate = R"(
|
||||
binary_expr:<
|
||||
op:LogicalOr
|
||||
left: <)" + c0 +
|
||||
R"(>
|
||||
right: <)" + c1 +
|
||||
R"(>
|
||||
>
|
||||
)";
|
||||
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, false);
|
||||
}
|
||||
|
||||
// Test expr: F0 == A && F1 in [A, B] || F2 == A
|
||||
TEST_F(ExprMaterializedViewTest, TestEqualAndInOrEqualExpr) {
|
||||
const DataType c0_data_type = DataType::VARCHAR;
|
||||
const DataType c1_data_type = DataType::INT32;
|
||||
const DataType c2_data_type = DataType::INT16;
|
||||
|
||||
std::string c0 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
c0 = InterpolateSingleExpr(c0, c0_data_type);
|
||||
std::string c1 = R"(
|
||||
term_expr:<
|
||||
column_info:<
|
||||
field_id: FID
|
||||
data_type: DT
|
||||
>
|
||||
values:< VAL1 >
|
||||
values:< VAL2 >
|
||||
>
|
||||
)";
|
||||
c1 = InterpolateSingleExpr(c1, c1_data_type);
|
||||
std::string c2 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL3 >
|
||||
>
|
||||
)";
|
||||
c2 = InterpolateSingleExpr(c2, c2_data_type);
|
||||
|
||||
std::string predicate = R"(
|
||||
binary_expr:<
|
||||
op:LogicalAnd
|
||||
left: <)" + c0 +
|
||||
R"(>
|
||||
right: <
|
||||
binary_expr:<
|
||||
op:LogicalOr
|
||||
left: <)" +
|
||||
c1 +
|
||||
R"(>
|
||||
right: <)" +
|
||||
c2 +
|
||||
R"(>
|
||||
>
|
||||
>
|
||||
>
|
||||
)";
|
||||
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 3);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)],
|
||||
2);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c2_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, false);
|
||||
}
|
||||
|
||||
// Test expr: F0 == A && not (F1 == B) || F2 == A
|
||||
TEST_F(ExprMaterializedViewTest, TestEqualAndNotEqualOrEqualExpr) {
|
||||
const DataType c0_data_type = DataType::VARCHAR;
|
||||
const DataType c1_data_type = DataType::INT32;
|
||||
const DataType c2_data_type = DataType::INT16;
|
||||
|
||||
std::string c0 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
c0 = InterpolateSingleExpr(c0, c0_data_type);
|
||||
std::string c1 = R"(
|
||||
unary_expr:<
|
||||
op:Not
|
||||
child: <
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL2 >
|
||||
>
|
||||
>
|
||||
>
|
||||
)";
|
||||
c1 = InterpolateSingleExpr(c1, c1_data_type);
|
||||
std::string c2 = R"(
|
||||
unary_range_expr:<
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
op:Equal
|
||||
value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
c2 = InterpolateSingleExpr(c2, c2_data_type);
|
||||
|
||||
std::string predicate = R"(
|
||||
binary_expr:<
|
||||
op:LogicalAnd
|
||||
left: <)" + c0 +
|
||||
R"(>
|
||||
right: <
|
||||
binary_expr:<
|
||||
op:LogicalOr
|
||||
left: <)" +
|
||||
c1 +
|
||||
R"(>
|
||||
right: <)" +
|
||||
c2 +
|
||||
R"(>
|
||||
>
|
||||
>
|
||||
>
|
||||
)";
|
||||
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 3);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c2_data_type)],
|
||||
1);
|
||||
EXPECT_EQ(mv.has_not, true);
|
||||
EXPECT_EQ(mv.is_pure_and, false);
|
||||
}
|
||||
|
||||
// Test expr: A < F0 < B
|
||||
TEST_F(ExprMaterializedViewTest, TestBinaryRangeExpr) {
|
||||
for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) {
|
||||
std::string predicate = R"(
|
||||
binary_range_expr: <
|
||||
column_info:<
|
||||
field_id:FID
|
||||
data_type: DT
|
||||
>
|
||||
lower_value: < VAL0 >
|
||||
upper_value: < VAL1 >
|
||||
>
|
||||
)";
|
||||
predicate = InterpolateSingleExpr(predicate, data_type);
|
||||
auto mv = TranslateThenExecuteWhenMvInolved(predicate);
|
||||
|
||||
ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1);
|
||||
auto field_id = GetFieldID(data_type);
|
||||
ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) !=
|
||||
mv.field_id_to_touched_categories_cnt.end());
|
||||
EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2);
|
||||
EXPECT_EQ(mv.has_not, false);
|
||||
EXPECT_EQ(mv.is_pure_and, true);
|
||||
}
|
||||
}
|
|
@ -287,7 +287,7 @@ func (ib *indexBuilder) process(buildID UniqueID) bool {
|
|||
|
||||
// vector index build needs information of optional scalar fields data
|
||||
optionalFields := make([]*indexpb.OptionalFieldInfo, 0)
|
||||
if Params.CommonCfg.EnableNodeFilteringOnPartitionKey.GetAsBool() && isOptionalScalarFieldSupported(indexType) {
|
||||
if Params.CommonCfg.EnableMaterializedView.GetAsBool() && isOptionalScalarFieldSupported(indexType) {
|
||||
colSchema := ib.meta.GetCollection(meta.CollectionID).Schema
|
||||
hasPartitionKey := typeutil.HasPartitionKey(colSchema)
|
||||
if hasPartitionKey {
|
||||
|
|
|
@ -1413,8 +1413,8 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) {
|
|||
mt.collections[collID].Schema.Fields[1].IsPartitionKey = true
|
||||
}
|
||||
|
||||
paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("true")
|
||||
defer paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("false")
|
||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
|
||||
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
|
||||
ib := newIndexBuilder(ctx, &mt, nodeManager, cm, newIndexEngineVersionManager(), nil)
|
||||
|
||||
t.Run("success to get opt field on startup", func(t *testing.T) {
|
||||
|
@ -1464,7 +1464,7 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) {
|
|||
|
||||
// should still be able to build vec index when opt field is not set
|
||||
t.Run("enqueue returns empty optional field when cfg disable", func(t *testing.T) {
|
||||
paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("false")
|
||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
|
||||
ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||
assert.Zero(t, len(in.OptionalScalarFields), "optional scalar field should be set")
|
||||
|
@ -1478,7 +1478,7 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) {
|
|||
})
|
||||
|
||||
t.Run("enqueue returns empty optional field when index is not HNSW", func(t *testing.T) {
|
||||
paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("true")
|
||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
|
||||
mt.indexMeta.indexes[collID][indexID].IndexParams[1].Value = indexparamcheck.IndexDISKANN
|
||||
ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||
|
@ -1493,7 +1493,7 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) {
|
|||
})
|
||||
|
||||
t.Run("enqueue returns empty optional field when no partition key", func(t *testing.T) {
|
||||
paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("true")
|
||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
|
||||
mt.collections[collID].Schema.Fields[1].IsPartitionKey = false
|
||||
ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
||||
func(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||
|
|
|
@ -60,6 +60,7 @@ message QueryInfo {
|
|||
string search_params = 4;
|
||||
int64 round_decimal = 5;
|
||||
int64 group_by_field_id = 6;
|
||||
bool materialized_view_involved = 7;
|
||||
}
|
||||
|
||||
message ColumnInfo {
|
||||
|
|
|
@ -2653,11 +2653,12 @@ func (node *Proxy) Search(ctx context.Context, request *milvuspb.SearchRequest)
|
|||
),
|
||||
ReqID: paramtable.GetNodeID(),
|
||||
},
|
||||
request: request,
|
||||
tr: timerecord.NewTimeRecorder("search"),
|
||||
qc: node.queryCoord,
|
||||
node: node,
|
||||
lb: node.lbPolicy,
|
||||
request: request,
|
||||
tr: timerecord.NewTimeRecorder("search"),
|
||||
qc: node.queryCoord,
|
||||
node: node,
|
||||
lb: node.lbPolicy,
|
||||
enableMaterializedView: node.enableMaterializedView,
|
||||
}
|
||||
|
||||
guaranteeTs := request.GuaranteeTimestamp
|
||||
|
|
|
@ -123,6 +123,9 @@ type Proxy struct {
|
|||
// resource manager
|
||||
resourceManager resource.Manager
|
||||
replicateStreamManager *ReplicateStreamManager
|
||||
|
||||
// materialized view
|
||||
enableMaterializedView bool
|
||||
}
|
||||
|
||||
// NewProxy returns a Proxy struct.
|
||||
|
@ -290,6 +293,8 @@ func (node *Proxy) Init() error {
|
|||
}
|
||||
log.Debug("init meta cache done", zap.String("role", typeutil.ProxyRole))
|
||||
|
||||
node.enableMaterializedView = Params.CommonCfg.EnableMaterializedView.GetAsBool()
|
||||
|
||||
log.Info("init proxy done", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("Address", node.address))
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -111,6 +111,12 @@ func initSearchRequest(ctx context.Context, t *searchTask) error {
|
|||
log.Warn("failed to get partition ids", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
if t.enableMaterializedView {
|
||||
if planPtr := plan.GetVectorAnns(); planPtr != nil {
|
||||
planPtr.QueryInfo.MaterializedViewInvolved = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -56,11 +56,12 @@ type searchTask struct {
|
|||
result *milvuspb.SearchResults
|
||||
request *milvuspb.SearchRequest
|
||||
|
||||
tr *timerecord.TimeRecorder
|
||||
collectionName string
|
||||
schema *schemaInfo
|
||||
requery bool
|
||||
partitionKeyMode bool
|
||||
tr *timerecord.TimeRecorder
|
||||
collectionName string
|
||||
schema *schemaInfo
|
||||
requery bool
|
||||
partitionKeyMode bool
|
||||
enableMaterializedView bool
|
||||
|
||||
userOutputFields []string
|
||||
|
||||
|
|
|
@ -201,7 +201,7 @@ type commonConfig struct {
|
|||
HighPriorityThreadCoreCoefficient ParamItem `refreshable:"false"`
|
||||
MiddlePriorityThreadCoreCoefficient ParamItem `refreshable:"false"`
|
||||
LowPriorityThreadCoreCoefficient ParamItem `refreshable:"false"`
|
||||
EnableNodeFilteringOnPartitionKey ParamItem `refreshable:"false"`
|
||||
EnableMaterializedView ParamItem `refreshable:"false"`
|
||||
BuildIndexThreadPoolRatio ParamItem `refreshable:"false"`
|
||||
MaxDegree ParamItem `refreshable:"true"`
|
||||
SearchListSize ParamItem `refreshable:"true"`
|
||||
|
@ -435,12 +435,12 @@ This configuration is only used by querynode and indexnode, it selects CPU instr
|
|||
}
|
||||
p.IndexSliceSize.Init(base.mgr)
|
||||
|
||||
p.EnableNodeFilteringOnPartitionKey = ParamItem{
|
||||
Key: "common.nodeFiltering.enableOnPartitionKey",
|
||||
p.EnableMaterializedView = ParamItem{
|
||||
Key: "common.materializedView.enabled",
|
||||
Version: "2.5.0",
|
||||
DefaultValue: "false",
|
||||
}
|
||||
p.EnableNodeFilteringOnPartitionKey.Init(base.mgr)
|
||||
p.EnableMaterializedView.Init(base.mgr)
|
||||
|
||||
p.MaxDegree = ParamItem{
|
||||
Key: "common.DiskIndex.MaxDegree",
|
||||
|
|
Loading…
Reference in New Issue