mirror of https://github.com/milvus-io/milvus.git
enhance: add optimize for like expr (#41222)
pr: #41066 cherry-pick from master Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>hotfix-2.5.9
parent
00b6df86e5
commit
7d74f8b5dd
|
@ -138,6 +138,14 @@ PostfixMatch(const std::string_view str, const std::string_view postfix) {
|
|||
return true;
|
||||
}
|
||||
|
||||
inline bool
|
||||
InnerMatch(const std::string_view str, const std::string_view pattern) {
|
||||
if (pattern.length() > str.length()) {
|
||||
return false;
|
||||
}
|
||||
return str.find(pattern) != std::string::npos;
|
||||
}
|
||||
|
||||
inline int64_t
|
||||
upper_align(int64_t value, int64_t align) {
|
||||
Assert(align > 0);
|
||||
|
|
|
@ -320,6 +320,7 @@ IsLikeExpr(std::shared_ptr<Expr> input) {
|
|||
switch (optype) {
|
||||
case proto::plan::PrefixMatch:
|
||||
case proto::plan::PostfixMatch:
|
||||
case proto::plan::InnerMatch:
|
||||
case proto::plan::Match:
|
||||
return true;
|
||||
default:
|
||||
|
|
|
@ -432,6 +432,40 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) {
|
|||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::PostfixMatch: {
|
||||
UnaryElementFuncForArray<ValueType,
|
||||
proto::plan::PostfixMatch,
|
||||
filter_type>
|
||||
func;
|
||||
func(data,
|
||||
valid_data,
|
||||
size,
|
||||
val,
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::InnerMatch: {
|
||||
UnaryElementFuncForArray<ValueType,
|
||||
proto::plan::InnerMatch,
|
||||
filter_type>
|
||||
func;
|
||||
func(data,
|
||||
valid_data,
|
||||
size,
|
||||
val,
|
||||
index,
|
||||
res,
|
||||
valid_res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::PrefixMatch: {
|
||||
UnaryElementFuncForArray<ValueType,
|
||||
proto::plan::PrefixMatch,
|
||||
|
@ -829,6 +863,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::InnerMatch:
|
||||
case proto::plan::PostfixMatch:
|
||||
case proto::plan::PrefixMatch: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto offset = i;
|
||||
|
@ -1515,6 +1551,16 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||
res = std::move(func(index_ptr, val));
|
||||
break;
|
||||
}
|
||||
case proto::plan::PostfixMatch: {
|
||||
UnaryIndexFunc<T, proto::plan::PostfixMatch> func;
|
||||
res = std::move(func(index_ptr, val));
|
||||
break;
|
||||
}
|
||||
case proto::plan::InnerMatch: {
|
||||
UnaryIndexFunc<T, proto::plan::InnerMatch> func;
|
||||
res = std::move(func(index_ptr, val));
|
||||
break;
|
||||
}
|
||||
case proto::plan::Match: {
|
||||
UnaryIndexFunc<T, proto::plan::Match> func;
|
||||
res = std::move(func(index_ptr, val));
|
||||
|
@ -1722,6 +1768,29 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
|||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::PostfixMatch: {
|
||||
UnaryElementFunc<T, proto::plan::PostfixMatch, filter_type>
|
||||
func;
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::InnerMatch: {
|
||||
UnaryElementFunc<T, proto::plan::InnerMatch, filter_type> func;
|
||||
func(data,
|
||||
size,
|
||||
val,
|
||||
res,
|
||||
bitmap_input,
|
||||
processed_cursor,
|
||||
offsets);
|
||||
break;
|
||||
}
|
||||
case proto::plan::Match: {
|
||||
UnaryElementFunc<T, proto::plan::Match, filter_type> func;
|
||||
func(data,
|
||||
|
|
|
@ -106,9 +106,10 @@ struct UnaryElementFunc {
|
|||
res[i] = src[offset] >= val;
|
||||
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
||||
res[i] = src[offset] <= val;
|
||||
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
||||
res[i] = milvus::query::Match(
|
||||
src[offset], val, proto::plan::OpType::PrefixMatch);
|
||||
} else if constexpr (op == proto::plan::OpType::PrefixMatch ||
|
||||
op == proto::plan::OpType::PostfixMatch ||
|
||||
op == proto::plan::OpType::InnerMatch) {
|
||||
res[i] = milvus::query::Match(src[offset], val, op);
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
@ -119,12 +120,7 @@ struct UnaryElementFunc {
|
|||
return;
|
||||
}
|
||||
|
||||
if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
res[i] = milvus::query::Match(
|
||||
src[i], val, proto::plan::OpType::PrefixMatch);
|
||||
}
|
||||
} else if constexpr (op == proto::plan::OpType::Equal) {
|
||||
if constexpr (op == proto::plan::OpType::Equal) {
|
||||
res.inplace_compare_val<T, milvus::bitset::CompareOpType::EQ>(
|
||||
src, size, val);
|
||||
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
||||
|
@ -225,7 +221,9 @@ struct UnaryElementFuncForArray {
|
|||
UnaryArrayCompare(array_data >= val);
|
||||
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
||||
UnaryArrayCompare(array_data <= val);
|
||||
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
||||
} else if constexpr (op == proto::plan::OpType::PrefixMatch ||
|
||||
op == proto::plan::OpType::PostfixMatch ||
|
||||
op == proto::plan::OpType::InnerMatch) {
|
||||
UnaryArrayCompare(milvus::query::Match(array_data, val, op));
|
||||
} else if constexpr (op == proto::plan::OpType::Match) {
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
|
@ -258,36 +256,57 @@ struct UnaryIndexFuncForMatch {
|
|||
std::conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
|
||||
using Index = index::ScalarIndex<IndexInnerType>;
|
||||
TargetBitmap
|
||||
operator()(Index* index, IndexInnerType val) {
|
||||
if constexpr (!std::is_same_v<T, std::string_view> &&
|
||||
!std::is_same_v<T, std::string>) {
|
||||
PanicInfo(Unsupported, "regex query is only supported on string");
|
||||
} else {
|
||||
if (index->SupportRegexQuery()) {
|
||||
return index->PatternMatch(val);
|
||||
operator()(Index* index, IndexInnerType val, proto::plan::OpType op) {
|
||||
AssertInfo(op == proto::plan::OpType::Match ||
|
||||
op == proto::plan::OpType::PostfixMatch ||
|
||||
op == proto::plan::OpType::InnerMatch ||
|
||||
op == proto::plan::OpType::PrefixMatch,
|
||||
"op must be one of the following: Match, PrefixMatch, "
|
||||
"PostfixMatch, InnerMatch");
|
||||
|
||||
if constexpr (std::is_same_v<T, std::string> ||
|
||||
std::is_same_v<T, std::string_view>) {
|
||||
if (index->SupportPatternMatch()) {
|
||||
return index->PatternMatch(val, op);
|
||||
}
|
||||
|
||||
if (!index->HasRawData()) {
|
||||
PanicInfo(Unsupported,
|
||||
"index don't support regex query and don't have "
|
||||
"raw data");
|
||||
}
|
||||
|
||||
// retrieve raw data to do brute force query, may be very slow.
|
||||
auto cnt = index->Count();
|
||||
TargetBitmap res(cnt);
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(val);
|
||||
RegexMatcher matcher(regex_pattern);
|
||||
for (int64_t i = 0; i < cnt; i++) {
|
||||
auto raw = index->Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
if (op == proto::plan::OpType::InnerMatch ||
|
||||
op == proto::plan::OpType::PostfixMatch ||
|
||||
op == proto::plan::OpType::PrefixMatch) {
|
||||
for (int64_t i = 0; i < cnt; i++) {
|
||||
auto raw = index->Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = milvus::query::Match(raw.value(), val, op);
|
||||
}
|
||||
res[i] = matcher(raw.value());
|
||||
return res;
|
||||
} else {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(val);
|
||||
RegexMatcher matcher(regex_pattern);
|
||||
for (int64_t i = 0; i < cnt; i++) {
|
||||
auto raw = index->Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = matcher(raw.value());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"UnaryIndexFuncForMatch is only supported on string types");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -310,15 +329,12 @@ struct UnaryIndexFunc {
|
|||
return index->Range(val, OpType::GreaterEqual);
|
||||
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
||||
return index->Range(val, OpType::LessEqual);
|
||||
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
||||
auto dataset = std::make_unique<Dataset>();
|
||||
dataset->Set(milvus::index::OPERATOR_TYPE,
|
||||
proto::plan::OpType::PrefixMatch);
|
||||
dataset->Set(milvus::index::PREFIX_VALUE, val);
|
||||
return index->Query(std::move(dataset));
|
||||
} else if constexpr (op == proto::plan::OpType::Match) {
|
||||
} else if constexpr (op == proto::plan::OpType::PrefixMatch ||
|
||||
op == proto::plan::OpType::Match ||
|
||||
op == proto::plan::OpType::PostfixMatch ||
|
||||
op == proto::plan::OpType::InnerMatch) {
|
||||
UnaryIndexFuncForMatch<T> func;
|
||||
return func(index, val);
|
||||
return func(index, val, op);
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
|
|
@ -1228,45 +1228,39 @@ BitmapIndex<std::string>::Query(const DatasetPtr& dataset) {
|
|||
AssertInfo(is_built_, "index has not been built");
|
||||
|
||||
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
|
||||
if (op == OpType::PrefixMatch) {
|
||||
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
if (is_mmap_) {
|
||||
for (auto it = bitmap_info_map_.begin();
|
||||
it != bitmap_info_map_.end();
|
||||
++it) {
|
||||
const auto& key = it->first;
|
||||
if (milvus::query::Match(key, prefix, op)) {
|
||||
for (const auto& v : it->second) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
for (auto it = data_.begin(); it != data_.end(); ++it) {
|
||||
const auto& key = it->first;
|
||||
if (milvus::query::Match(key, prefix, op)) {
|
||||
for (const auto& v : it->second) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto it = bitsets_.begin(); it != bitsets_.end(); ++it) {
|
||||
const auto& key = it->first;
|
||||
if (milvus::query::Match(key, prefix, op)) {
|
||||
res |= it->second;
|
||||
auto val = dataset->Get<std::string>(MATCH_VALUE);
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
if (is_mmap_) {
|
||||
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
|
||||
++it) {
|
||||
const auto& key = it->first;
|
||||
if (milvus::query::Match(key, val, op)) {
|
||||
for (const auto& v : it->second) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
} else {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported op_type:{} for bitmap query", op));
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
for (auto it = data_.begin(); it != data_.end(); ++it) {
|
||||
const auto& key = it->first;
|
||||
if (milvus::query::Match(key, val, op)) {
|
||||
for (const auto& v : it->second) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto it = bitsets_.begin(); it != bitsets_.end(); ++it) {
|
||||
const auto& key = it->first;
|
||||
if (milvus::query::Match(key, val, op)) {
|
||||
res |= it->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -138,10 +138,26 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
}
|
||||
|
||||
const TargetBitmap
|
||||
PatternMatch(const std::string& pattern) override {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(pattern);
|
||||
return RegexQuery(regex_pattern);
|
||||
PatternMatch(const std::string& pattern, proto::plan::OpType op) override {
|
||||
switch (op) {
|
||||
case proto::plan::OpType::PrefixMatch:
|
||||
case proto::plan::OpType::PostfixMatch:
|
||||
case proto::plan::OpType::InnerMatch: {
|
||||
auto dataset = std::make_unique<Dataset>();
|
||||
dataset->Set(milvus::index::OPERATOR_TYPE, op);
|
||||
dataset->Set(milvus::index::MATCH_VALUE, pattern);
|
||||
return Query(std::move(dataset));
|
||||
}
|
||||
case proto::plan::OpType::Match: {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(pattern);
|
||||
return RegexQuery(regex_pattern);
|
||||
}
|
||||
default:
|
||||
PanicInfo(ErrorCode::OpTypeInvalid,
|
||||
"not supported op type: {} for index PatterMatch",
|
||||
op);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -104,11 +104,14 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
return internal_index_->Query(dataset);
|
||||
}
|
||||
|
||||
bool
|
||||
SupportPatternMatch() const override {
|
||||
return internal_index_->SupportPatternMatch();
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
PatternMatch(const std::string& pattern) override {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(pattern);
|
||||
return RegexQuery(regex_pattern);
|
||||
PatternMatch(const std::string& pattern, proto::plan::OpType op) override {
|
||||
return internal_index_->PatternMatch(pattern, op);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -395,7 +395,7 @@ const TargetBitmap
|
|||
InvertedIndexTantivy<std::string>::Query(const DatasetPtr& dataset) {
|
||||
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
|
||||
if (op == OpType::PrefixMatch) {
|
||||
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
|
||||
auto prefix = dataset->Get<std::string>(MATCH_VALUE);
|
||||
return PrefixMatch(prefix);
|
||||
}
|
||||
return ScalarIndex<std::string>::Query(dataset);
|
||||
|
|
|
@ -188,10 +188,32 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
Query(const DatasetPtr& dataset) override;
|
||||
|
||||
const TargetBitmap
|
||||
PatternMatch(const std::string& pattern) override {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(pattern);
|
||||
return RegexQuery(regex_pattern);
|
||||
PatternMatch(const std::string& pattern, proto::plan::OpType op) override {
|
||||
switch (op) {
|
||||
case proto::plan::OpType::PrefixMatch: {
|
||||
return PrefixMatch(pattern);
|
||||
}
|
||||
case proto::plan::OpType::PostfixMatch: {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(fmt::format("%{}", pattern));
|
||||
return RegexQuery(regex_pattern);
|
||||
}
|
||||
case proto::plan::OpType::InnerMatch: {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(fmt::format("%{}%", pattern));
|
||||
return RegexQuery(regex_pattern);
|
||||
}
|
||||
case proto::plan::OpType::Match: {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(pattern);
|
||||
return RegexQuery(regex_pattern);
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
ErrorCode::OpTypeInvalid,
|
||||
"not supported op type: {} for inverted index PatternMatch",
|
||||
op);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -25,7 +25,7 @@ constexpr const char* LOWER_BOUND_VALUE = "lower_bound_value";
|
|||
constexpr const char* LOWER_BOUND_INCLUSIVE = "lower_bound_inclusive";
|
||||
constexpr const char* UPPER_BOUND_VALUE = "upper_bound_value";
|
||||
constexpr const char* UPPER_BOUND_INCLUSIVE = "upper_bound_inclusive";
|
||||
constexpr const char* PREFIX_VALUE = "prefix_value";
|
||||
constexpr const char* MATCH_VALUE = "match_value";
|
||||
// below configurations will be persistent, do not edit them.
|
||||
constexpr const char* MARISA_TRIE_INDEX = "marisa_trie_index";
|
||||
constexpr const char* MARISA_STR_IDS = "marisa_trie_str_ids";
|
||||
|
|
|
@ -130,7 +130,7 @@ class ScalarIndex : public IndexBase {
|
|||
}
|
||||
|
||||
virtual const TargetBitmap
|
||||
PatternMatch(const std::string& pattern) {
|
||||
PatternMatch(const std::string& pattern, proto::plan::OpType op) {
|
||||
PanicInfo(Unsupported, "pattern match is not supported");
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ class StringIndex : public ScalarIndex<std::string> {
|
|||
Query(const DatasetPtr& dataset) override {
|
||||
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
|
||||
if (op == OpType::PrefixMatch) {
|
||||
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
|
||||
auto prefix = dataset->Get<std::string>(MATCH_VALUE);
|
||||
return PrefixMatch(prefix);
|
||||
}
|
||||
return ScalarIndex<std::string>::Query(dataset);
|
||||
|
|
|
@ -31,7 +31,7 @@ class StringIndexSort : public ScalarIndexSort<std::string> {
|
|||
Query(const DatasetPtr& dataset) override {
|
||||
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
|
||||
if (op == OpType::PrefixMatch) {
|
||||
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
|
||||
auto prefix = dataset->Get<std::string>(MATCH_VALUE);
|
||||
return PrefixMatch(prefix);
|
||||
}
|
||||
return ScalarIndex<std::string>::Query(dataset);
|
||||
|
|
|
@ -32,6 +32,8 @@ Match<std::string>(const std::string& str, const std::string& val, OpType op) {
|
|||
return PrefixMatch(str, val);
|
||||
case OpType::PostfixMatch:
|
||||
return PostfixMatch(str, val);
|
||||
case OpType::InnerMatch:
|
||||
return InnerMatch(str, val);
|
||||
default:
|
||||
PanicInfo(OpTypeInvalid, "not supported");
|
||||
}
|
||||
|
@ -47,6 +49,8 @@ Match<std::string_view>(const std::string_view& str,
|
|||
return PrefixMatch(str, val);
|
||||
case OpType::PostfixMatch:
|
||||
return PostfixMatch(str, val);
|
||||
case OpType::InnerMatch:
|
||||
return InnerMatch(str, val);
|
||||
default:
|
||||
PanicInfo(OpTypeInvalid, "not supported");
|
||||
}
|
||||
|
|
|
@ -5650,6 +5650,59 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeBenchExpr) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(BitmapIndexTest, PatternMatchTest) {
|
||||
// Initialize bitmap index
|
||||
using namespace milvus::index;
|
||||
BitmapIndex<std::string> index;
|
||||
|
||||
// Add test data
|
||||
std::vector<std::string> data = {"apple", "banana", "orange", "pear"};
|
||||
|
||||
// Build index
|
||||
index.Build(data.size(), data.data(), nullptr);
|
||||
|
||||
// Create test datasets with different operators
|
||||
auto prefix_dataset = std::make_shared<Dataset>();
|
||||
prefix_dataset->Set(OPERATOR_TYPE, OpType::PrefixMatch);
|
||||
prefix_dataset->Set(MATCH_VALUE, std::string("a"));
|
||||
|
||||
auto contains_dataset = std::make_shared<Dataset>();
|
||||
contains_dataset->Set(OPERATOR_TYPE, OpType::InnerMatch);
|
||||
contains_dataset->Set(MATCH_VALUE, std::string("an"));
|
||||
|
||||
auto posix_dataset = std::make_shared<Dataset>();
|
||||
posix_dataset->Set(OPERATOR_TYPE, OpType::PostfixMatch);
|
||||
posix_dataset->Set(MATCH_VALUE, std::string("a"));
|
||||
|
||||
// Execute queries
|
||||
auto prefix_result = index.Query(prefix_dataset);
|
||||
auto contains_result = index.Query(contains_dataset);
|
||||
auto posix_result = index.Query(posix_dataset);
|
||||
|
||||
// Verify results
|
||||
EXPECT_TRUE(prefix_result[0]);
|
||||
EXPECT_FALSE(prefix_result[2]);
|
||||
|
||||
EXPECT_FALSE(contains_result[0]);
|
||||
EXPECT_TRUE(contains_result[1]);
|
||||
EXPECT_TRUE(contains_result[2]);
|
||||
|
||||
EXPECT_FALSE(posix_result[0]);
|
||||
EXPECT_TRUE(posix_result[1]);
|
||||
EXPECT_FALSE(posix_result[2]);
|
||||
|
||||
auto prefix_result2 =
|
||||
index.PatternMatch(std::string("a"), OpType::PrefixMatch);
|
||||
auto contains_result2 =
|
||||
index.PatternMatch(std::string("an"), OpType::InnerMatch);
|
||||
auto posix_result2 =
|
||||
index.PatternMatch(std::string("a"), OpType::PostfixMatch);
|
||||
|
||||
EXPECT_TRUE(prefix_result == prefix_result2);
|
||||
EXPECT_TRUE(contains_result == contains_result2);
|
||||
EXPECT_TRUE(posix_result == posix_result2);
|
||||
}
|
||||
|
||||
TEST(Expr, TestExprNull) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL, true);
|
||||
|
|
|
@ -620,7 +620,7 @@ test_string() {
|
|||
auto dataset = std::make_shared<Dataset>();
|
||||
auto prefix = data[0];
|
||||
dataset->Set(index::OPERATOR_TYPE, OpType::PrefixMatch);
|
||||
dataset->Set(index::PREFIX_VALUE, prefix);
|
||||
dataset->Set(index::MATCH_VALUE, prefix);
|
||||
auto bitset = real_index->Query(dataset);
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
|
|
|
@ -465,6 +465,96 @@ TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnInvertedIndexStringField) {
|
|||
ASSERT_TRUE(final[4]);
|
||||
}
|
||||
|
||||
TEST_F(SealedSegmentRegexQueryTest, PrefixMatchOnInvertedIndexStringField) {
|
||||
std::string operand = "a";
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto unary_range_expr =
|
||||
test::GenUnaryRangeExpr(OpType::PrefixMatch, operand);
|
||||
unary_range_expr->set_allocated_column_info(column_info);
|
||||
auto expr = test::GenExpr();
|
||||
expr->set_allocated_unary_range_expr(unary_range_expr);
|
||||
|
||||
auto parser = ProtoParser(*schema);
|
||||
auto typed_expr = parser.ParseExprs(*expr);
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
LoadInvertedIndex();
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
ASSERT_FALSE(final[0]);
|
||||
ASSERT_TRUE(final[1]);
|
||||
ASSERT_TRUE(final[2]);
|
||||
ASSERT_TRUE(final[3]);
|
||||
ASSERT_TRUE(final[4]);
|
||||
}
|
||||
|
||||
TEST_F(SealedSegmentRegexQueryTest, PostfixMatchOnInvertedIndexStringField) {
|
||||
std::string operand = "a";
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto unary_range_expr =
|
||||
test::GenUnaryRangeExpr(OpType::PostfixMatch, operand);
|
||||
unary_range_expr->set_allocated_column_info(column_info);
|
||||
auto expr = test::GenExpr();
|
||||
expr->set_allocated_unary_range_expr(unary_range_expr);
|
||||
|
||||
auto parser = ProtoParser(*schema);
|
||||
auto typed_expr = parser.ParseExprs(*expr);
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
LoadInvertedIndex();
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
ASSERT_FALSE(final[0]);
|
||||
ASSERT_FALSE(final[1]);
|
||||
ASSERT_FALSE(final[2]);
|
||||
ASSERT_FALSE(final[3]);
|
||||
ASSERT_FALSE(final[4]);
|
||||
}
|
||||
|
||||
TEST_F(SealedSegmentRegexQueryTest, InnerMatchOnInvertedIndexStringField) {
|
||||
std::string operand = "a";
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto unary_range_expr =
|
||||
test::GenUnaryRangeExpr(OpType::InnerMatch, operand);
|
||||
unary_range_expr->set_allocated_column_info(column_info);
|
||||
auto expr = test::GenExpr();
|
||||
expr->set_allocated_unary_range_expr(unary_range_expr);
|
||||
|
||||
auto parser = ProtoParser(*schema);
|
||||
auto typed_expr = parser.ParseExprs(*expr);
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
LoadInvertedIndex();
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
ASSERT_FALSE(final[0]);
|
||||
ASSERT_TRUE(final[1]);
|
||||
ASSERT_TRUE(final[2]);
|
||||
ASSERT_TRUE(final[3]);
|
||||
ASSERT_TRUE(final[4]);
|
||||
}
|
||||
|
||||
TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnUnsupportedIndex) {
|
||||
std::string operand = "a%";
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
|
|
@ -221,7 +221,7 @@ TEST_F(StringIndexMarisaTest, Query) {
|
|||
auto ds = std::make_shared<knowhere::DataSet>();
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
|
||||
milvus::OpType::PrefixMatch);
|
||||
ds->Set<std::string>(milvus::index::PREFIX_VALUE,
|
||||
ds->Set<std::string>(milvus::index::MATCH_VALUE,
|
||||
std::move(strs[i]));
|
||||
auto bitset = index->Query(ds);
|
||||
ASSERT_EQ(bitset.size(), strs.size());
|
||||
|
|
|
@ -39,16 +39,39 @@ TEST(Util, StringMatch) {
|
|||
|
||||
ASSERT_TRUE(PrefixMatch("prefix1", "prefix"));
|
||||
ASSERT_TRUE(PostfixMatch("1postfix", "postfix"));
|
||||
ASSERT_TRUE(InnerMatch("xxinner1xx", "inner"));
|
||||
ASSERT_TRUE(Match(
|
||||
std::string("prefix1"), std::string("prefix"), OpType::PrefixMatch));
|
||||
ASSERT_TRUE(Match(
|
||||
std::string("1postfix"), std::string("postfix"), OpType::PostfixMatch));
|
||||
ASSERT_TRUE(Match(std::string("xxpostfixxx"),
|
||||
std::string("postfix"),
|
||||
OpType::InnerMatch));
|
||||
|
||||
ASSERT_FALSE(PrefixMatch("", "longer"));
|
||||
ASSERT_FALSE(PostfixMatch("", "longer"));
|
||||
ASSERT_FALSE(InnerMatch("", "longer"));
|
||||
|
||||
ASSERT_FALSE(PrefixMatch("dontmatch", "prefix"));
|
||||
ASSERT_FALSE(PostfixMatch("dontmatch", "postfix"));
|
||||
ASSERT_FALSE(InnerMatch("dontmatch", "postfix"));
|
||||
|
||||
ASSERT_TRUE(Match(std::string_view("prefix1"),
|
||||
std::string("prefix"),
|
||||
OpType::PrefixMatch));
|
||||
|
||||
ASSERT_TRUE(Match(std::string_view("1postfix"),
|
||||
std::string("postfix"),
|
||||
OpType::PostfixMatch));
|
||||
|
||||
ASSERT_TRUE(Match(std::string_view("xxpostfixxx"),
|
||||
std::string("postfix"),
|
||||
OpType::InnerMatch));
|
||||
ASSERT_TRUE(
|
||||
Match(std::string_view("x"), std::string("x"), OpType::PrefixMatch));
|
||||
ASSERT_FALSE(
|
||||
Match(std::string_view(""), std::string("x"), OpType::InnerMatch));
|
||||
ASSERT_TRUE(
|
||||
Match(std::string_view("x"), std::string(""), OpType::InnerMatch));
|
||||
}
|
||||
|
||||
TEST(Util, GetDeleteBitmap) {
|
||||
|
|
|
@ -13,65 +13,74 @@ var wildcards = map[byte]struct{}{
|
|||
|
||||
var escapeCharacter byte = '\\'
|
||||
|
||||
// hasWildcards returns true if pattern contains any wildcard.
|
||||
func hasWildcards(pattern string) (string, bool) {
|
||||
var result strings.Builder
|
||||
hasWildcard := false
|
||||
|
||||
for i := 0; i < len(pattern); i++ {
|
||||
if pattern[i] == '\\' && i+1 < len(pattern) {
|
||||
next := pattern[i+1]
|
||||
if next == '_' || next == '%' {
|
||||
result.WriteByte(next)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if pattern[i] == '_' || pattern[i] == '%' {
|
||||
hasWildcard = true
|
||||
}
|
||||
result.WriteByte(pattern[i])
|
||||
func optimizeLikePattern(pattern string) (planpb.OpType, string, bool) {
|
||||
if len(pattern) == 0 {
|
||||
return planpb.OpType_Equal, "", true
|
||||
}
|
||||
|
||||
return result.String(), hasWildcard
|
||||
}
|
||||
if pattern == "%" || pattern == "%%" {
|
||||
return planpb.OpType_PrefixMatch, "", true
|
||||
}
|
||||
|
||||
// findLastNotOfWildcards find the last location not of last wildcard.
|
||||
func findLastNotOfWildcards(pattern string) int {
|
||||
loc := len(pattern) - 1
|
||||
for ; loc >= 0; loc-- {
|
||||
_, ok := wildcards[pattern[loc]]
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
if ok {
|
||||
if loc > 0 && pattern[loc-1] == escapeCharacter {
|
||||
break
|
||||
process := func(s string) (string, bool) {
|
||||
var buf strings.Builder
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if c == escapeCharacter && i+1 < len(s) {
|
||||
next := s[i+1]
|
||||
if _, ok := wildcards[next]; ok {
|
||||
buf.WriteByte(next)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
}
|
||||
if _, ok := wildcards[c]; ok {
|
||||
return "", false
|
||||
}
|
||||
buf.WriteByte(c)
|
||||
}
|
||||
return buf.String(), true
|
||||
}
|
||||
|
||||
leading := pattern[0] == '%'
|
||||
trailing := pattern[len(pattern)-1] == '%'
|
||||
|
||||
switch {
|
||||
case leading && trailing:
|
||||
inner := pattern[1 : len(pattern)-1]
|
||||
trimmed := strings.TrimLeft(inner, "%")
|
||||
trimmed = strings.TrimRight(trimmed, "%")
|
||||
if subStr, valid := process(trimmed); valid {
|
||||
// if subStr is empty, it means the pattern is all %,
|
||||
// return prefix match and empty operand, means all match
|
||||
if len(subStr) == 0 {
|
||||
return planpb.OpType_PrefixMatch, "", true
|
||||
}
|
||||
return planpb.OpType_InnerMatch, subStr, true
|
||||
}
|
||||
case leading:
|
||||
trimmed := strings.TrimLeft(pattern[1:], "%")
|
||||
if subStr, valid := process(trimmed); valid {
|
||||
return planpb.OpType_PostfixMatch, subStr, true
|
||||
}
|
||||
case trailing:
|
||||
trimmed := strings.TrimRight(pattern[:len(pattern)-1], "%")
|
||||
if subStr, valid := process(trimmed); valid {
|
||||
return planpb.OpType_PrefixMatch, subStr, true
|
||||
}
|
||||
default:
|
||||
if subStr, valid := process(pattern); valid {
|
||||
return planpb.OpType_Equal, subStr, true
|
||||
}
|
||||
}
|
||||
return loc
|
||||
return planpb.OpType_Invalid, "", false
|
||||
}
|
||||
|
||||
// translatePatternMatch translates pattern to related op type and operand.
|
||||
func translatePatternMatch(pattern string) (op planpb.OpType, operand string, err error) {
|
||||
l := len(pattern)
|
||||
loc := findLastNotOfWildcards(pattern)
|
||||
|
||||
if loc < 0 {
|
||||
// always match.
|
||||
return planpb.OpType_PrefixMatch, "", nil
|
||||
}
|
||||
|
||||
newPattern, exist := hasWildcards(pattern[:loc+1])
|
||||
if loc >= l-1 && !exist {
|
||||
// equal match.
|
||||
return planpb.OpType_Equal, newPattern, nil
|
||||
}
|
||||
if !exist {
|
||||
// prefix match.
|
||||
return planpb.OpType_PrefixMatch, newPattern, nil
|
||||
op, operand, ok := optimizeLikePattern(pattern)
|
||||
if ok {
|
||||
return op, operand, nil
|
||||
}
|
||||
|
||||
return planpb.OpType_Match, pattern, nil
|
||||
|
|
|
@ -6,92 +6,6 @@ import (
|
|||
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
||||
)
|
||||
|
||||
func Test_hasWildcards(t *testing.T) {
|
||||
type args struct {
|
||||
pattern string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want bool
|
||||
target string
|
||||
}{
|
||||
{
|
||||
args: args{
|
||||
pattern: "no-wildcards",
|
||||
},
|
||||
want: false,
|
||||
target: "no-wildcards",
|
||||
},
|
||||
{
|
||||
args: args{
|
||||
pattern: "has\\%",
|
||||
},
|
||||
want: false,
|
||||
target: "has%",
|
||||
},
|
||||
{
|
||||
args: args{
|
||||
pattern: "%",
|
||||
},
|
||||
want: true,
|
||||
target: "%",
|
||||
},
|
||||
{
|
||||
args: args{
|
||||
pattern: "has%",
|
||||
},
|
||||
want: true,
|
||||
target: "has%",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
patten, got := hasWildcards(tt.args.pattern)
|
||||
if got != tt.want || patten != tt.target {
|
||||
t.Errorf("hasWildcards(%s) = %v, want %v", tt.args.pattern, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_findLocOfLastWildcard(t *testing.T) {
|
||||
type args struct {
|
||||
pattern string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want int
|
||||
}{
|
||||
{
|
||||
args: args{
|
||||
pattern: "no-wildcards",
|
||||
},
|
||||
want: 11,
|
||||
},
|
||||
{
|
||||
args: args{
|
||||
pattern: "only\\%",
|
||||
},
|
||||
want: 5,
|
||||
},
|
||||
{
|
||||
args: args{
|
||||
pattern: "prefix%%",
|
||||
},
|
||||
want: 5,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := findLastNotOfWildcards(tt.args.pattern); got != tt.want {
|
||||
t.Errorf("findLastNotOfWildcards(%s) = %v, want %v", tt.args.pattern, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_translatePatternMatch(t *testing.T) {
|
||||
type args struct {
|
||||
pattern string
|
||||
|
@ -150,3 +64,60 @@ func Test_translatePatternMatch(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOptimizeLikePattern(t *testing.T) {
|
||||
tests := []struct {
|
||||
pattern string
|
||||
expectedType planpb.OpType
|
||||
expectedStr string
|
||||
expectedOk bool
|
||||
}{
|
||||
// inner match
|
||||
{"%abc%", planpb.OpType_InnerMatch, "abc", true},
|
||||
{"%a\\%b%", planpb.OpType_InnerMatch, "a%b", true},
|
||||
{"%a\\_b%", planpb.OpType_InnerMatch, "a_b", true},
|
||||
{"%a\\\\%", planpb.OpType_InnerMatch, "a\\\\", true},
|
||||
{"%a\t%", planpb.OpType_InnerMatch, "a\t", true},
|
||||
{"%", planpb.OpType_PrefixMatch, "", true},
|
||||
{"%%", planpb.OpType_PrefixMatch, "", true},
|
||||
{"%a%b%", planpb.OpType_Invalid, "", false},
|
||||
{"%a_b%", planpb.OpType_Invalid, "", false},
|
||||
{"%abc\\", planpb.OpType_PostfixMatch, "abc\\", true},
|
||||
{"%核心%", planpb.OpType_InnerMatch, "核心", true},
|
||||
{"%核%", planpb.OpType_InnerMatch, "核", true},
|
||||
{"%\u6838%", planpb.OpType_InnerMatch, "核", true},
|
||||
{"%\u6838%", planpb.OpType_InnerMatch, "\u6838", true},
|
||||
|
||||
// prefix match
|
||||
{"abc%", planpb.OpType_PrefixMatch, "abc", true},
|
||||
{"a\\%bc%", planpb.OpType_PrefixMatch, "a%bc", true},
|
||||
{"a\\_bc%", planpb.OpType_PrefixMatch, "a_bc", true},
|
||||
{"_abc%", planpb.OpType_Invalid, "", false},
|
||||
|
||||
// posix match
|
||||
{"%abc", planpb.OpType_PostfixMatch, "abc", true},
|
||||
{"%a\\_bc", planpb.OpType_PostfixMatch, "a_bc", true},
|
||||
{"%abc_", planpb.OpType_Invalid, "", false},
|
||||
{"%臥蜜", planpb.OpType_PostfixMatch, "臥蜜", true},
|
||||
{"%%臥蜜", planpb.OpType_PostfixMatch, "臥蜜", true},
|
||||
{"%\u81e5\u871c", planpb.OpType_PostfixMatch, "臥蜜", true},
|
||||
|
||||
// equal match
|
||||
{"abc", planpb.OpType_Equal, "abc", true},
|
||||
{"a\\%bc", planpb.OpType_Equal, "a%bc", true},
|
||||
{"a\\_bc", planpb.OpType_Equal, "a_bc", true},
|
||||
{"abc_", planpb.OpType_Invalid, "", false},
|
||||
|
||||
// null pattern
|
||||
{"", planpb.OpType_Equal, "", true},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
actualType, actualStr, actualOk := optimizeLikePattern(test.pattern)
|
||||
if actualType != test.expectedType || actualStr != test.expectedStr || actualOk != test.expectedOk {
|
||||
t.Errorf("optimizeLikePattern(%q) = (%q, %q, %v), expected (%q, %q, %v)",
|
||||
test.pattern, actualType, actualStr, actualOk,
|
||||
test.expectedType, test.expectedStr, test.expectedOk)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@ enum OpType {
|
|||
In = 11; // TODO:: used for term expr
|
||||
NotIn = 12;
|
||||
TextMatch = 13; // text match
|
||||
PhraseMatch = 14; // phrase match
|
||||
InnerMatch = 15; // substring (e.g., "%value%")
|
||||
};
|
||||
|
||||
enum ArithOpType {
|
||||
|
|
|
@ -38,6 +38,8 @@ const (
|
|||
OpType_In OpType = 11 // TODO:: used for term expr
|
||||
OpType_NotIn OpType = 12
|
||||
OpType_TextMatch OpType = 13 // text match
|
||||
OpType_PhraseMatch OpType = 14 // phrase match
|
||||
OpType_InnerMatch OpType = 15 // substring (e.g., "%value%")
|
||||
)
|
||||
|
||||
// Enum value maps for OpType.
|
||||
|
@ -57,6 +59,8 @@ var (
|
|||
11: "In",
|
||||
12: "NotIn",
|
||||
13: "TextMatch",
|
||||
14: "PhraseMatch",
|
||||
15: "InnerMatch",
|
||||
}
|
||||
OpType_value = map[string]int32{
|
||||
"Invalid": 0,
|
||||
|
@ -73,6 +77,8 @@ var (
|
|||
"In": 11,
|
||||
"NotIn": 12,
|
||||
"TextMatch": 13,
|
||||
"PhraseMatch": 14,
|
||||
"InnerMatch": 15,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -2863,7 +2869,7 @@ var file_plan_proto_rawDesc = []byte{
|
|||
0x64, 0x49, 0x64, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x79, 0x6e, 0x61, 0x6d, 0x69, 0x63, 0x5f,
|
||||
0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x64, 0x79,
|
||||
0x6e, 0x61, 0x6d, 0x69, 0x63, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x42, 0x06, 0x0a, 0x04, 0x6e,
|
||||
0x6f, 0x64, 0x65, 0x2a, 0xc9, 0x01, 0x0a, 0x06, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
|
||||
0x6f, 0x64, 0x65, 0x2a, 0xea, 0x01, 0x0a, 0x06, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
|
||||
0x0a, 0x07, 0x49, 0x6e, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x47,
|
||||
0x72, 0x65, 0x61, 0x74, 0x65, 0x72, 0x54, 0x68, 0x61, 0x6e, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c,
|
||||
0x47, 0x72, 0x65, 0x61, 0x74, 0x65, 0x72, 0x45, 0x71, 0x75, 0x61, 0x6c, 0x10, 0x02, 0x12, 0x0c,
|
||||
|
@ -2875,24 +2881,26 @@ var file_plan_proto_rawDesc = []byte{
|
|||
0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x08, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x61, 0x74, 0x63, 0x68,
|
||||
0x10, 0x09, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x10, 0x0a, 0x12, 0x06, 0x0a,
|
||||
0x02, 0x49, 0x6e, 0x10, 0x0b, 0x12, 0x09, 0x0a, 0x05, 0x4e, 0x6f, 0x74, 0x49, 0x6e, 0x10, 0x0c,
|
||||
0x12, 0x0d, 0x0a, 0x09, 0x54, 0x65, 0x78, 0x74, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0d, 0x2a,
|
||||
0x58, 0x0a, 0x0b, 0x41, 0x72, 0x69, 0x74, 0x68, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
|
||||
0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x41,
|
||||
0x64, 0x64, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x75, 0x62, 0x10, 0x02, 0x12, 0x07, 0x0a,
|
||||
0x03, 0x4d, 0x75, 0x6c, 0x10, 0x03, 0x12, 0x07, 0x0a, 0x03, 0x44, 0x69, 0x76, 0x10, 0x04, 0x12,
|
||||
0x07, 0x0a, 0x03, 0x4d, 0x6f, 0x64, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x41, 0x72, 0x72, 0x61,
|
||||
0x79, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x10, 0x06, 0x2a, 0x6d, 0x0a, 0x0a, 0x56, 0x65, 0x63,
|
||||
0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x42, 0x69, 0x6e, 0x61, 0x72,
|
||||
0x79, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x6c, 0x6f,
|
||||
0x61, 0x74, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x46, 0x6c,
|
||||
0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x02, 0x12, 0x12, 0x0a,
|
||||
0x0e, 0x42, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10,
|
||||
0x03, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c, 0x6f, 0x61, 0x74,
|
||||
0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x04, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74, 0x68,
|
||||
0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x69, 0x6f,
|
||||
0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x76, 0x32, 0x2f, 0x70,
|
||||
0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x70, 0x6c, 0x61, 0x6e, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f,
|
||||
0x74, 0x6f, 0x33,
|
||||
0x12, 0x0d, 0x0a, 0x09, 0x54, 0x65, 0x78, 0x74, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0d, 0x12,
|
||||
0x0f, 0x0a, 0x0b, 0x50, 0x68, 0x72, 0x61, 0x73, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0e,
|
||||
0x12, 0x0e, 0x0a, 0x0a, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0f,
|
||||
0x2a, 0x58, 0x0a, 0x0b, 0x41, 0x72, 0x69, 0x74, 0x68, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12,
|
||||
0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03,
|
||||
0x41, 0x64, 0x64, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x75, 0x62, 0x10, 0x02, 0x12, 0x07,
|
||||
0x0a, 0x03, 0x4d, 0x75, 0x6c, 0x10, 0x03, 0x12, 0x07, 0x0a, 0x03, 0x44, 0x69, 0x76, 0x10, 0x04,
|
||||
0x12, 0x07, 0x0a, 0x03, 0x4d, 0x6f, 0x64, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x41, 0x72, 0x72,
|
||||
0x61, 0x79, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x10, 0x06, 0x2a, 0x6d, 0x0a, 0x0a, 0x56, 0x65,
|
||||
0x63, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x42, 0x69, 0x6e, 0x61,
|
||||
0x72, 0x79, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x6c,
|
||||
0x6f, 0x61, 0x74, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x46,
|
||||
0x6c, 0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x02, 0x12, 0x12,
|
||||
0x0a, 0x0e, 0x42, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72,
|
||||
0x10, 0x03, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c, 0x6f, 0x61,
|
||||
0x74, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x04, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74,
|
||||
0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x69,
|
||||
0x6f, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x76, 0x32, 0x2f,
|
||||
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x70, 0x6c, 0x61, 0x6e, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72,
|
||||
0x6f, 0x74, 0x6f, 0x33,
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
|
@ -288,7 +288,7 @@ if [[ ${RUN_CPPLINT} == "ON" ]]; then
|
|||
echo "clang-format check passed!"
|
||||
else
|
||||
# compile and build
|
||||
make -j 7 install || exit 1
|
||||
make -j ${jobs} install || exit 1
|
||||
fi
|
||||
|
||||
if command -v ccache &> /dev/null
|
||||
|
|
|
@ -2762,6 +2762,38 @@ class TestQueryString(TestcaseBase):
|
|||
output_fields = [default_int_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.query(expression, output_fields=output_fields,
|
||||
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_string_expr_with_suffix(self):
|
||||
"""
|
||||
target: test query with prefix string expression
|
||||
method: specify string is primary field, use prefix string expr
|
||||
expected: verify query successfully
|
||||
"""
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
|
||||
primary_field=ct.default_string_field_name)[0:2]
|
||||
expression = 'varchar like "%0"'
|
||||
filtered_data = vectors[0][vectors[0][default_string_field_name].str.endswith('0')]
|
||||
res = filtered_data.iloc[:, :3].to_dict('records')
|
||||
output_fields = [default_int_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.query(expression, output_fields=output_fields,
|
||||
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_string_expr_with_inner_match(self):
|
||||
"""
|
||||
target: test query with prefix string expression
|
||||
method: specify string is primary field, use prefix string expr
|
||||
expected: verify query successfully
|
||||
"""
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
|
||||
primary_field=ct.default_string_field_name)[0:2]
|
||||
expression = 'varchar like "%0%"'
|
||||
filtered_data = vectors[0][vectors[0][default_string_field_name].str.contains('0')]
|
||||
res = filtered_data.iloc[:, :3].to_dict('records')
|
||||
output_fields = [default_int_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.query(expression, output_fields=output_fields,
|
||||
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_bitmap_alter_offset_cache_param(self):
|
||||
|
@ -2798,9 +2830,10 @@ class TestQueryString(TestcaseBase):
|
|||
collection_w.release()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_string_expr_with_prefixes_auto_index(self):
|
||||
@pytest.mark.parametrize("expression", ['varchar like "0%"', 'varchar like "%0"','varchar like "%0%"'])
|
||||
def test_query_string_expr_with_like_auto_index(self, expression):
|
||||
"""
|
||||
target: test query with prefix string expression and indexed with auto index
|
||||
target: test query with like string expression and indexed with auto index
|
||||
expected: verify query successfully
|
||||
"""
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True, is_index=False,
|
||||
|
@ -2810,8 +2843,7 @@ class TestQueryString(TestcaseBase):
|
|||
index_name="query_expr_pre_index")
|
||||
collection_w.create_index("varchar", index_name="varchar_auto_index")
|
||||
time.sleep(1)
|
||||
collection_w.load()
|
||||
expression = 'varchar like "0%"'
|
||||
collection_w.load()
|
||||
result, _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
res_len = len(result)
|
||||
collection_w.release()
|
||||
|
@ -2822,7 +2854,8 @@ class TestQueryString(TestcaseBase):
|
|||
assert res_len_1 == res_len
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_string_expr_with_prefixes_bitmap(self):
|
||||
@pytest.mark.parametrize("expression", ['varchar like "0%"', 'varchar like "%0"','varchar like "%0%"'])
|
||||
def test_query_string_expr_with_prefixes_bitmap(self, expression):
|
||||
"""
|
||||
target: test query with prefix string expression and indexed with bitmap
|
||||
expected: verify query successfully
|
||||
|
@ -2835,7 +2868,6 @@ class TestQueryString(TestcaseBase):
|
|||
collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"})
|
||||
time.sleep(1)
|
||||
collection_w.load()
|
||||
expression = 'varchar like "0%"'
|
||||
result, _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
res_len = len(result)
|
||||
collection_w.release()
|
||||
|
@ -2846,7 +2878,8 @@ class TestQueryString(TestcaseBase):
|
|||
assert res_len_1 == res_len
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_string_expr_with_match_auto_index(self):
|
||||
@pytest.mark.parametrize("expression", ['varchar like "0%"', 'varchar like "%0"','varchar like "%0%"'])
|
||||
def test_query_string_expr_with_match_auto_index(self, expression):
|
||||
"""
|
||||
target: test query with match string expression and indexed with auto index
|
||||
expected: verify query successfully
|
||||
|
@ -2859,7 +2892,6 @@ class TestQueryString(TestcaseBase):
|
|||
collection_w.create_index("varchar", index_name="varchar_auto_index")
|
||||
time.sleep(1)
|
||||
collection_w.load()
|
||||
expression = 'varchar like "%0%"'
|
||||
result, _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
res_len = len(result)
|
||||
collection_w.release()
|
||||
|
@ -3163,6 +3195,80 @@ class TestQueryArray(TestcaseBase):
|
|||
for i in range(len(res)):
|
||||
assert res[i]["id"] == ground_truth[i]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("use_index", [True, False])
|
||||
@pytest.mark.parametrize("index_type", ["INVERTED", "BITMAP"])
|
||||
def test_query_array_with_prefix_like(self, use_index, index_type):
|
||||
# 1. create a collection
|
||||
schema = cf.gen_array_collection_schema()
|
||||
collection_w = self.init_collection_wrap(schema=schema)
|
||||
|
||||
# 2. insert data
|
||||
string_field_value = [[str(j) for j in range(i, i + 3)] for i in range(ct.default_nb)]
|
||||
data = cf.gen_array_dataframe_data()
|
||||
data[ct.default_string_array_field_name] = string_field_value
|
||||
collection_w.insert(data)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, {})
|
||||
if use_index:
|
||||
collection_w.create_index(ct.default_string_array_field_name, {"index_type": index_type})
|
||||
|
||||
# 3. query
|
||||
collection_w.load()
|
||||
expression = 'string_array[0] like "0%"'
|
||||
res = collection_w.query(limit=ct.default_nb, expr=expression)[0]
|
||||
log.info(res)
|
||||
filter_data = [row for row in string_field_value if row[0].startswith('0')]
|
||||
assert len(res) == len(filter_data)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("use_index", [True, False])
|
||||
@pytest.mark.parametrize("index_type", ["INVERTED", "BITMAP"])
|
||||
def test_query_array_with_suffix_like(self, use_index, index_type):
|
||||
# 1. create a collection
|
||||
schema = cf.gen_array_collection_schema()
|
||||
collection_w = self.init_collection_wrap(schema=schema)
|
||||
|
||||
# 2. insert data
|
||||
string_field_value = [[str(j) for j in range(i, i + 3)] for i in range(ct.default_nb)]
|
||||
data = cf.gen_array_dataframe_data()
|
||||
data[ct.default_string_array_field_name] = string_field_value
|
||||
collection_w.insert(data)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, {})
|
||||
if use_index:
|
||||
collection_w.create_index(ct.default_string_array_field_name, {"index_type": index_type})
|
||||
|
||||
# 3. query
|
||||
collection_w.load()
|
||||
expression = 'string_array[0] like "%0"'
|
||||
res = collection_w.query(limit=ct.default_nb, expr=expression)[0]
|
||||
log.info(res)
|
||||
filter_data = [row for row in string_field_value if row[0].endswith('0')]
|
||||
assert len(res) == len(filter_data)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("use_index", [True, False])
|
||||
@pytest.mark.parametrize("index_type", ["INVERTED", "BITMAP"])
|
||||
def test_query_array_with_inner_like(self, use_index, index_type):
|
||||
# 1. create a collection
|
||||
schema = cf.gen_array_collection_schema()
|
||||
collection_w = self.init_collection_wrap(schema=schema)
|
||||
|
||||
# 2. insert data
|
||||
string_field_value = [[str(j) for j in range(i, i + 3)] for i in range(ct.default_nb)]
|
||||
data = cf.gen_array_dataframe_data()
|
||||
data[ct.default_string_array_field_name] = string_field_value
|
||||
collection_w.insert(data)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, {})
|
||||
if use_index:
|
||||
collection_w.create_index(ct.default_string_array_field_name, {"index_type": index_type})
|
||||
|
||||
# 3. query
|
||||
collection_w.load()
|
||||
expression = 'string_array[0] like "%0%"'
|
||||
res = collection_w.query(limit=ct.default_nb, expr=expression)[0]
|
||||
log.info(res)
|
||||
filter_data = [row for row in string_field_value if '0' in row[0]]
|
||||
assert len(res) == len(filter_data)
|
||||
|
||||
class TestQueryCount(TestcaseBase):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue