enhance: add optimize for like expr (#41222)

pr: #41066  cherry-pick from master

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
hotfix-2.5.9
zhagnlu 2025-04-14 21:06:32 +08:00 committed by Li Liu
parent 00b6df86e5
commit 7d74f8b5dd
25 changed files with 648 additions and 253 deletions

View File

@ -138,6 +138,14 @@ PostfixMatch(const std::string_view str, const std::string_view postfix) {
return true;
}
inline bool
InnerMatch(const std::string_view str, const std::string_view pattern) {
if (pattern.length() > str.length()) {
return false;
}
return str.find(pattern) != std::string::npos;
}
inline int64_t
upper_align(int64_t value, int64_t align) {
Assert(align > 0);

View File

@ -320,6 +320,7 @@ IsLikeExpr(std::shared_ptr<Expr> input) {
switch (optype) {
case proto::plan::PrefixMatch:
case proto::plan::PostfixMatch:
case proto::plan::InnerMatch:
case proto::plan::Match:
return true;
default:

View File

@ -432,6 +432,40 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) {
offsets);
break;
}
case proto::plan::PostfixMatch: {
UnaryElementFuncForArray<ValueType,
proto::plan::PostfixMatch,
filter_type>
func;
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
bitmap_input,
processed_cursor,
offsets);
break;
}
case proto::plan::InnerMatch: {
UnaryElementFuncForArray<ValueType,
proto::plan::InnerMatch,
filter_type>
func;
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
bitmap_input,
processed_cursor,
offsets);
break;
}
case proto::plan::PrefixMatch: {
UnaryElementFuncForArray<ValueType,
proto::plan::PrefixMatch,
@ -829,6 +863,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
}
break;
}
case proto::plan::InnerMatch:
case proto::plan::PostfixMatch:
case proto::plan::PrefixMatch: {
for (size_t i = 0; i < size; ++i) {
auto offset = i;
@ -1515,6 +1551,16 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
res = std::move(func(index_ptr, val));
break;
}
case proto::plan::PostfixMatch: {
UnaryIndexFunc<T, proto::plan::PostfixMatch> func;
res = std::move(func(index_ptr, val));
break;
}
case proto::plan::InnerMatch: {
UnaryIndexFunc<T, proto::plan::InnerMatch> func;
res = std::move(func(index_ptr, val));
break;
}
case proto::plan::Match: {
UnaryIndexFunc<T, proto::plan::Match> func;
res = std::move(func(index_ptr, val));
@ -1722,6 +1768,29 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
offsets);
break;
}
case proto::plan::PostfixMatch: {
UnaryElementFunc<T, proto::plan::PostfixMatch, filter_type>
func;
func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break;
}
case proto::plan::InnerMatch: {
UnaryElementFunc<T, proto::plan::InnerMatch, filter_type> func;
func(data,
size,
val,
res,
bitmap_input,
processed_cursor,
offsets);
break;
}
case proto::plan::Match: {
UnaryElementFunc<T, proto::plan::Match, filter_type> func;
func(data,

View File

@ -106,9 +106,10 @@ struct UnaryElementFunc {
res[i] = src[offset] >= val;
} else if constexpr (op == proto::plan::OpType::LessEqual) {
res[i] = src[offset] <= val;
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
res[i] = milvus::query::Match(
src[offset], val, proto::plan::OpType::PrefixMatch);
} else if constexpr (op == proto::plan::OpType::PrefixMatch ||
op == proto::plan::OpType::PostfixMatch ||
op == proto::plan::OpType::InnerMatch) {
res[i] = milvus::query::Match(src[offset], val, op);
} else {
PanicInfo(
OpTypeInvalid,
@ -119,12 +120,7 @@ struct UnaryElementFunc {
return;
}
if constexpr (op == proto::plan::OpType::PrefixMatch) {
for (int i = 0; i < size; ++i) {
res[i] = milvus::query::Match(
src[i], val, proto::plan::OpType::PrefixMatch);
}
} else if constexpr (op == proto::plan::OpType::Equal) {
if constexpr (op == proto::plan::OpType::Equal) {
res.inplace_compare_val<T, milvus::bitset::CompareOpType::EQ>(
src, size, val);
} else if constexpr (op == proto::plan::OpType::NotEqual) {
@ -225,7 +221,9 @@ struct UnaryElementFuncForArray {
UnaryArrayCompare(array_data >= val);
} else if constexpr (op == proto::plan::OpType::LessEqual) {
UnaryArrayCompare(array_data <= val);
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
} else if constexpr (op == proto::plan::OpType::PrefixMatch ||
op == proto::plan::OpType::PostfixMatch ||
op == proto::plan::OpType::InnerMatch) {
UnaryArrayCompare(milvus::query::Match(array_data, val, op));
} else if constexpr (op == proto::plan::OpType::Match) {
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
@ -258,36 +256,57 @@ struct UnaryIndexFuncForMatch {
std::conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
using Index = index::ScalarIndex<IndexInnerType>;
TargetBitmap
operator()(Index* index, IndexInnerType val) {
if constexpr (!std::is_same_v<T, std::string_view> &&
!std::is_same_v<T, std::string>) {
PanicInfo(Unsupported, "regex query is only supported on string");
} else {
if (index->SupportRegexQuery()) {
return index->PatternMatch(val);
operator()(Index* index, IndexInnerType val, proto::plan::OpType op) {
AssertInfo(op == proto::plan::OpType::Match ||
op == proto::plan::OpType::PostfixMatch ||
op == proto::plan::OpType::InnerMatch ||
op == proto::plan::OpType::PrefixMatch,
"op must be one of the following: Match, PrefixMatch, "
"PostfixMatch, InnerMatch");
if constexpr (std::is_same_v<T, std::string> ||
std::is_same_v<T, std::string_view>) {
if (index->SupportPatternMatch()) {
return index->PatternMatch(val, op);
}
if (!index->HasRawData()) {
PanicInfo(Unsupported,
"index don't support regex query and don't have "
"raw data");
}
// retrieve raw data to do brute force query, may be very slow.
auto cnt = index->Count();
TargetBitmap res(cnt);
PatternMatchTranslator translator;
auto regex_pattern = translator(val);
RegexMatcher matcher(regex_pattern);
for (int64_t i = 0; i < cnt; i++) {
auto raw = index->Reverse_Lookup(i);
if (!raw.has_value()) {
res[i] = false;
continue;
if (op == proto::plan::OpType::InnerMatch ||
op == proto::plan::OpType::PostfixMatch ||
op == proto::plan::OpType::PrefixMatch) {
for (int64_t i = 0; i < cnt; i++) {
auto raw = index->Reverse_Lookup(i);
if (!raw.has_value()) {
res[i] = false;
continue;
}
res[i] = milvus::query::Match(raw.value(), val, op);
}
res[i] = matcher(raw.value());
return res;
} else {
PatternMatchTranslator translator;
auto regex_pattern = translator(val);
RegexMatcher matcher(regex_pattern);
for (int64_t i = 0; i < cnt; i++) {
auto raw = index->Reverse_Lookup(i);
if (!raw.has_value()) {
res[i] = false;
continue;
}
res[i] = matcher(raw.value());
}
return res;
}
return res;
}
PanicInfo(ErrorCode::Unsupported,
"UnaryIndexFuncForMatch is only supported on string types");
}
};
@ -310,15 +329,12 @@ struct UnaryIndexFunc {
return index->Range(val, OpType::GreaterEqual);
} else if constexpr (op == proto::plan::OpType::LessEqual) {
return index->Range(val, OpType::LessEqual);
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
auto dataset = std::make_unique<Dataset>();
dataset->Set(milvus::index::OPERATOR_TYPE,
proto::plan::OpType::PrefixMatch);
dataset->Set(milvus::index::PREFIX_VALUE, val);
return index->Query(std::move(dataset));
} else if constexpr (op == proto::plan::OpType::Match) {
} else if constexpr (op == proto::plan::OpType::PrefixMatch ||
op == proto::plan::OpType::Match ||
op == proto::plan::OpType::PostfixMatch ||
op == proto::plan::OpType::InnerMatch) {
UnaryIndexFuncForMatch<T> func;
return func(index, val);
return func(index, val, op);
} else {
PanicInfo(
OpTypeInvalid,

View File

@ -1228,45 +1228,39 @@ BitmapIndex<std::string>::Query(const DatasetPtr& dataset) {
AssertInfo(is_built_, "index has not been built");
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
if (op == OpType::PrefixMatch) {
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
TargetBitmap res(total_num_rows_, false);
if (is_mmap_) {
for (auto it = bitmap_info_map_.begin();
it != bitmap_info_map_.end();
++it) {
const auto& key = it->first;
if (milvus::query::Match(key, prefix, op)) {
for (const auto& v : it->second) {
res.set(v);
}
}
}
return res;
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
for (auto it = data_.begin(); it != data_.end(); ++it) {
const auto& key = it->first;
if (milvus::query::Match(key, prefix, op)) {
for (const auto& v : it->second) {
res.set(v);
}
}
}
} else {
for (auto it = bitsets_.begin(); it != bitsets_.end(); ++it) {
const auto& key = it->first;
if (milvus::query::Match(key, prefix, op)) {
res |= it->second;
auto val = dataset->Get<std::string>(MATCH_VALUE);
TargetBitmap res(total_num_rows_, false);
if (is_mmap_) {
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
++it) {
const auto& key = it->first;
if (milvus::query::Match(key, val, op)) {
for (const auto& v : it->second) {
res.set(v);
}
}
}
return res;
} else {
PanicInfo(OpTypeInvalid,
fmt::format("unsupported op_type:{} for bitmap query", op));
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
for (auto it = data_.begin(); it != data_.end(); ++it) {
const auto& key = it->first;
if (milvus::query::Match(key, val, op)) {
for (const auto& v : it->second) {
res.set(v);
}
}
}
} else {
for (auto it = bitsets_.begin(); it != bitsets_.end(); ++it) {
const auto& key = it->first;
if (milvus::query::Match(key, val, op)) {
res |= it->second;
}
}
}
return res;
}
template <typename T>

View File

@ -138,10 +138,26 @@ class BitmapIndex : public ScalarIndex<T> {
}
const TargetBitmap
PatternMatch(const std::string& pattern) override {
PatternMatchTranslator translator;
auto regex_pattern = translator(pattern);
return RegexQuery(regex_pattern);
PatternMatch(const std::string& pattern, proto::plan::OpType op) override {
switch (op) {
case proto::plan::OpType::PrefixMatch:
case proto::plan::OpType::PostfixMatch:
case proto::plan::OpType::InnerMatch: {
auto dataset = std::make_unique<Dataset>();
dataset->Set(milvus::index::OPERATOR_TYPE, op);
dataset->Set(milvus::index::MATCH_VALUE, pattern);
return Query(std::move(dataset));
}
case proto::plan::OpType::Match: {
PatternMatchTranslator translator;
auto regex_pattern = translator(pattern);
return RegexQuery(regex_pattern);
}
default:
PanicInfo(ErrorCode::OpTypeInvalid,
"not supported op type: {} for index PatterMatch",
op);
}
}
bool

View File

@ -104,11 +104,14 @@ class HybridScalarIndex : public ScalarIndex<T> {
return internal_index_->Query(dataset);
}
bool
SupportPatternMatch() const override {
return internal_index_->SupportPatternMatch();
}
const TargetBitmap
PatternMatch(const std::string& pattern) override {
PatternMatchTranslator translator;
auto regex_pattern = translator(pattern);
return RegexQuery(regex_pattern);
PatternMatch(const std::string& pattern, proto::plan::OpType op) override {
return internal_index_->PatternMatch(pattern, op);
}
bool

View File

@ -395,7 +395,7 @@ const TargetBitmap
InvertedIndexTantivy<std::string>::Query(const DatasetPtr& dataset) {
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
if (op == OpType::PrefixMatch) {
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
auto prefix = dataset->Get<std::string>(MATCH_VALUE);
return PrefixMatch(prefix);
}
return ScalarIndex<std::string>::Query(dataset);

View File

@ -188,10 +188,32 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
Query(const DatasetPtr& dataset) override;
const TargetBitmap
PatternMatch(const std::string& pattern) override {
PatternMatchTranslator translator;
auto regex_pattern = translator(pattern);
return RegexQuery(regex_pattern);
PatternMatch(const std::string& pattern, proto::plan::OpType op) override {
switch (op) {
case proto::plan::OpType::PrefixMatch: {
return PrefixMatch(pattern);
}
case proto::plan::OpType::PostfixMatch: {
PatternMatchTranslator translator;
auto regex_pattern = translator(fmt::format("%{}", pattern));
return RegexQuery(regex_pattern);
}
case proto::plan::OpType::InnerMatch: {
PatternMatchTranslator translator;
auto regex_pattern = translator(fmt::format("%{}%", pattern));
return RegexQuery(regex_pattern);
}
case proto::plan::OpType::Match: {
PatternMatchTranslator translator;
auto regex_pattern = translator(pattern);
return RegexQuery(regex_pattern);
}
default:
PanicInfo(
ErrorCode::OpTypeInvalid,
"not supported op type: {} for inverted index PatternMatch",
op);
}
}
bool

View File

@ -25,7 +25,7 @@ constexpr const char* LOWER_BOUND_VALUE = "lower_bound_value";
constexpr const char* LOWER_BOUND_INCLUSIVE = "lower_bound_inclusive";
constexpr const char* UPPER_BOUND_VALUE = "upper_bound_value";
constexpr const char* UPPER_BOUND_INCLUSIVE = "upper_bound_inclusive";
constexpr const char* PREFIX_VALUE = "prefix_value";
constexpr const char* MATCH_VALUE = "match_value";
// below configurations will be persistent, do not edit them.
constexpr const char* MARISA_TRIE_INDEX = "marisa_trie_index";
constexpr const char* MARISA_STR_IDS = "marisa_trie_str_ids";

View File

@ -130,7 +130,7 @@ class ScalarIndex : public IndexBase {
}
virtual const TargetBitmap
PatternMatch(const std::string& pattern) {
PatternMatch(const std::string& pattern, proto::plan::OpType op) {
PanicInfo(Unsupported, "pattern match is not supported");
}

View File

@ -37,7 +37,7 @@ class StringIndex : public ScalarIndex<std::string> {
Query(const DatasetPtr& dataset) override {
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
if (op == OpType::PrefixMatch) {
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
auto prefix = dataset->Get<std::string>(MATCH_VALUE);
return PrefixMatch(prefix);
}
return ScalarIndex<std::string>::Query(dataset);

View File

@ -31,7 +31,7 @@ class StringIndexSort : public ScalarIndexSort<std::string> {
Query(const DatasetPtr& dataset) override {
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
if (op == OpType::PrefixMatch) {
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
auto prefix = dataset->Get<std::string>(MATCH_VALUE);
return PrefixMatch(prefix);
}
return ScalarIndex<std::string>::Query(dataset);

View File

@ -32,6 +32,8 @@ Match<std::string>(const std::string& str, const std::string& val, OpType op) {
return PrefixMatch(str, val);
case OpType::PostfixMatch:
return PostfixMatch(str, val);
case OpType::InnerMatch:
return InnerMatch(str, val);
default:
PanicInfo(OpTypeInvalid, "not supported");
}
@ -47,6 +49,8 @@ Match<std::string_view>(const std::string_view& str,
return PrefixMatch(str, val);
case OpType::PostfixMatch:
return PostfixMatch(str, val);
case OpType::InnerMatch:
return InnerMatch(str, val);
default:
PanicInfo(OpTypeInvalid, "not supported");
}

View File

@ -5650,6 +5650,59 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeBenchExpr) {
}
}
TEST(BitmapIndexTest, PatternMatchTest) {
// Initialize bitmap index
using namespace milvus::index;
BitmapIndex<std::string> index;
// Add test data
std::vector<std::string> data = {"apple", "banana", "orange", "pear"};
// Build index
index.Build(data.size(), data.data(), nullptr);
// Create test datasets with different operators
auto prefix_dataset = std::make_shared<Dataset>();
prefix_dataset->Set(OPERATOR_TYPE, OpType::PrefixMatch);
prefix_dataset->Set(MATCH_VALUE, std::string("a"));
auto contains_dataset = std::make_shared<Dataset>();
contains_dataset->Set(OPERATOR_TYPE, OpType::InnerMatch);
contains_dataset->Set(MATCH_VALUE, std::string("an"));
auto posix_dataset = std::make_shared<Dataset>();
posix_dataset->Set(OPERATOR_TYPE, OpType::PostfixMatch);
posix_dataset->Set(MATCH_VALUE, std::string("a"));
// Execute queries
auto prefix_result = index.Query(prefix_dataset);
auto contains_result = index.Query(contains_dataset);
auto posix_result = index.Query(posix_dataset);
// Verify results
EXPECT_TRUE(prefix_result[0]);
EXPECT_FALSE(prefix_result[2]);
EXPECT_FALSE(contains_result[0]);
EXPECT_TRUE(contains_result[1]);
EXPECT_TRUE(contains_result[2]);
EXPECT_FALSE(posix_result[0]);
EXPECT_TRUE(posix_result[1]);
EXPECT_FALSE(posix_result[2]);
auto prefix_result2 =
index.PatternMatch(std::string("a"), OpType::PrefixMatch);
auto contains_result2 =
index.PatternMatch(std::string("an"), OpType::InnerMatch);
auto posix_result2 =
index.PatternMatch(std::string("a"), OpType::PostfixMatch);
EXPECT_TRUE(prefix_result == prefix_result2);
EXPECT_TRUE(contains_result == contains_result2);
EXPECT_TRUE(posix_result == posix_result2);
}
TEST(Expr, TestExprNull) {
auto schema = std::make_shared<Schema>();
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL, true);

View File

@ -620,7 +620,7 @@ test_string() {
auto dataset = std::make_shared<Dataset>();
auto prefix = data[0];
dataset->Set(index::OPERATOR_TYPE, OpType::PrefixMatch);
dataset->Set(index::PREFIX_VALUE, prefix);
dataset->Set(index::MATCH_VALUE, prefix);
auto bitset = real_index->Query(dataset);
ASSERT_EQ(cnt, bitset.size());
for (size_t i = 0; i < bitset.size(); i++) {

View File

@ -465,6 +465,96 @@ TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnInvertedIndexStringField) {
ASSERT_TRUE(final[4]);
}
TEST_F(SealedSegmentRegexQueryTest, PrefixMatchOnInvertedIndexStringField) {
std::string operand = "a";
const auto& str_meta = schema->operator[](FieldName("str"));
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto unary_range_expr =
test::GenUnaryRangeExpr(OpType::PrefixMatch, operand);
unary_range_expr->set_allocated_column_info(column_info);
auto expr = test::GenExpr();
expr->set_allocated_unary_range_expr(unary_range_expr);
auto parser = ProtoParser(*schema);
auto typed_expr = parser.ParseExprs(*expr);
auto parsed =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
LoadInvertedIndex();
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
BitsetType final;
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
ASSERT_FALSE(final[0]);
ASSERT_TRUE(final[1]);
ASSERT_TRUE(final[2]);
ASSERT_TRUE(final[3]);
ASSERT_TRUE(final[4]);
}
TEST_F(SealedSegmentRegexQueryTest, PostfixMatchOnInvertedIndexStringField) {
std::string operand = "a";
const auto& str_meta = schema->operator[](FieldName("str"));
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto unary_range_expr =
test::GenUnaryRangeExpr(OpType::PostfixMatch, operand);
unary_range_expr->set_allocated_column_info(column_info);
auto expr = test::GenExpr();
expr->set_allocated_unary_range_expr(unary_range_expr);
auto parser = ProtoParser(*schema);
auto typed_expr = parser.ParseExprs(*expr);
auto parsed =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
LoadInvertedIndex();
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
BitsetType final;
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
ASSERT_FALSE(final[0]);
ASSERT_FALSE(final[1]);
ASSERT_FALSE(final[2]);
ASSERT_FALSE(final[3]);
ASSERT_FALSE(final[4]);
}
TEST_F(SealedSegmentRegexQueryTest, InnerMatchOnInvertedIndexStringField) {
std::string operand = "a";
const auto& str_meta = schema->operator[](FieldName("str"));
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto unary_range_expr =
test::GenUnaryRangeExpr(OpType::InnerMatch, operand);
unary_range_expr->set_allocated_column_info(column_info);
auto expr = test::GenExpr();
expr->set_allocated_unary_range_expr(unary_range_expr);
auto parser = ProtoParser(*schema);
auto typed_expr = parser.ParseExprs(*expr);
auto parsed =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
LoadInvertedIndex();
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
BitsetType final;
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
ASSERT_FALSE(final[0]);
ASSERT_TRUE(final[1]);
ASSERT_TRUE(final[2]);
ASSERT_TRUE(final[3]);
ASSERT_TRUE(final[4]);
}
TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnUnsupportedIndex) {
std::string operand = "a%";
const auto& str_meta = schema->operator[](FieldName("str"));

View File

@ -221,7 +221,7 @@ TEST_F(StringIndexMarisaTest, Query) {
auto ds = std::make_shared<knowhere::DataSet>();
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
milvus::OpType::PrefixMatch);
ds->Set<std::string>(milvus::index::PREFIX_VALUE,
ds->Set<std::string>(milvus::index::MATCH_VALUE,
std::move(strs[i]));
auto bitset = index->Query(ds);
ASSERT_EQ(bitset.size(), strs.size());

View File

@ -39,16 +39,39 @@ TEST(Util, StringMatch) {
ASSERT_TRUE(PrefixMatch("prefix1", "prefix"));
ASSERT_TRUE(PostfixMatch("1postfix", "postfix"));
ASSERT_TRUE(InnerMatch("xxinner1xx", "inner"));
ASSERT_TRUE(Match(
std::string("prefix1"), std::string("prefix"), OpType::PrefixMatch));
ASSERT_TRUE(Match(
std::string("1postfix"), std::string("postfix"), OpType::PostfixMatch));
ASSERT_TRUE(Match(std::string("xxpostfixxx"),
std::string("postfix"),
OpType::InnerMatch));
ASSERT_FALSE(PrefixMatch("", "longer"));
ASSERT_FALSE(PostfixMatch("", "longer"));
ASSERT_FALSE(InnerMatch("", "longer"));
ASSERT_FALSE(PrefixMatch("dontmatch", "prefix"));
ASSERT_FALSE(PostfixMatch("dontmatch", "postfix"));
ASSERT_FALSE(InnerMatch("dontmatch", "postfix"));
ASSERT_TRUE(Match(std::string_view("prefix1"),
std::string("prefix"),
OpType::PrefixMatch));
ASSERT_TRUE(Match(std::string_view("1postfix"),
std::string("postfix"),
OpType::PostfixMatch));
ASSERT_TRUE(Match(std::string_view("xxpostfixxx"),
std::string("postfix"),
OpType::InnerMatch));
ASSERT_TRUE(
Match(std::string_view("x"), std::string("x"), OpType::PrefixMatch));
ASSERT_FALSE(
Match(std::string_view(""), std::string("x"), OpType::InnerMatch));
ASSERT_TRUE(
Match(std::string_view("x"), std::string(""), OpType::InnerMatch));
}
TEST(Util, GetDeleteBitmap) {

View File

@ -13,65 +13,74 @@ var wildcards = map[byte]struct{}{
var escapeCharacter byte = '\\'
// hasWildcards returns true if pattern contains any wildcard.
func hasWildcards(pattern string) (string, bool) {
var result strings.Builder
hasWildcard := false
for i := 0; i < len(pattern); i++ {
if pattern[i] == '\\' && i+1 < len(pattern) {
next := pattern[i+1]
if next == '_' || next == '%' {
result.WriteByte(next)
i++
continue
}
}
if pattern[i] == '_' || pattern[i] == '%' {
hasWildcard = true
}
result.WriteByte(pattern[i])
func optimizeLikePattern(pattern string) (planpb.OpType, string, bool) {
if len(pattern) == 0 {
return planpb.OpType_Equal, "", true
}
return result.String(), hasWildcard
}
if pattern == "%" || pattern == "%%" {
return planpb.OpType_PrefixMatch, "", true
}
// findLastNotOfWildcards find the last location not of last wildcard.
func findLastNotOfWildcards(pattern string) int {
loc := len(pattern) - 1
for ; loc >= 0; loc-- {
_, ok := wildcards[pattern[loc]]
if !ok {
break
}
if ok {
if loc > 0 && pattern[loc-1] == escapeCharacter {
break
process := func(s string) (string, bool) {
var buf strings.Builder
for i := 0; i < len(s); i++ {
c := s[i]
if c == escapeCharacter && i+1 < len(s) {
next := s[i+1]
if _, ok := wildcards[next]; ok {
buf.WriteByte(next)
i++
continue
}
}
if _, ok := wildcards[c]; ok {
return "", false
}
buf.WriteByte(c)
}
return buf.String(), true
}
leading := pattern[0] == '%'
trailing := pattern[len(pattern)-1] == '%'
switch {
case leading && trailing:
inner := pattern[1 : len(pattern)-1]
trimmed := strings.TrimLeft(inner, "%")
trimmed = strings.TrimRight(trimmed, "%")
if subStr, valid := process(trimmed); valid {
// if subStr is empty, it means the pattern is all %,
// return prefix match and empty operand, means all match
if len(subStr) == 0 {
return planpb.OpType_PrefixMatch, "", true
}
return planpb.OpType_InnerMatch, subStr, true
}
case leading:
trimmed := strings.TrimLeft(pattern[1:], "%")
if subStr, valid := process(trimmed); valid {
return planpb.OpType_PostfixMatch, subStr, true
}
case trailing:
trimmed := strings.TrimRight(pattern[:len(pattern)-1], "%")
if subStr, valid := process(trimmed); valid {
return planpb.OpType_PrefixMatch, subStr, true
}
default:
if subStr, valid := process(pattern); valid {
return planpb.OpType_Equal, subStr, true
}
}
return loc
return planpb.OpType_Invalid, "", false
}
// translatePatternMatch translates pattern to related op type and operand.
func translatePatternMatch(pattern string) (op planpb.OpType, operand string, err error) {
l := len(pattern)
loc := findLastNotOfWildcards(pattern)
if loc < 0 {
// always match.
return planpb.OpType_PrefixMatch, "", nil
}
newPattern, exist := hasWildcards(pattern[:loc+1])
if loc >= l-1 && !exist {
// equal match.
return planpb.OpType_Equal, newPattern, nil
}
if !exist {
// prefix match.
return planpb.OpType_PrefixMatch, newPattern, nil
op, operand, ok := optimizeLikePattern(pattern)
if ok {
return op, operand, nil
}
return planpb.OpType_Match, pattern, nil

View File

@ -6,92 +6,6 @@ import (
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
)
func Test_hasWildcards(t *testing.T) {
type args struct {
pattern string
}
tests := []struct {
name string
args args
want bool
target string
}{
{
args: args{
pattern: "no-wildcards",
},
want: false,
target: "no-wildcards",
},
{
args: args{
pattern: "has\\%",
},
want: false,
target: "has%",
},
{
args: args{
pattern: "%",
},
want: true,
target: "%",
},
{
args: args{
pattern: "has%",
},
want: true,
target: "has%",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
patten, got := hasWildcards(tt.args.pattern)
if got != tt.want || patten != tt.target {
t.Errorf("hasWildcards(%s) = %v, want %v", tt.args.pattern, got, tt.want)
}
})
}
}
func Test_findLocOfLastWildcard(t *testing.T) {
type args struct {
pattern string
}
tests := []struct {
name string
args args
want int
}{
{
args: args{
pattern: "no-wildcards",
},
want: 11,
},
{
args: args{
pattern: "only\\%",
},
want: 5,
},
{
args: args{
pattern: "prefix%%",
},
want: 5,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := findLastNotOfWildcards(tt.args.pattern); got != tt.want {
t.Errorf("findLastNotOfWildcards(%s) = %v, want %v", tt.args.pattern, got, tt.want)
}
})
}
}
func Test_translatePatternMatch(t *testing.T) {
type args struct {
pattern string
@ -150,3 +64,60 @@ func Test_translatePatternMatch(t *testing.T) {
})
}
}
func TestOptimizeLikePattern(t *testing.T) {
tests := []struct {
pattern string
expectedType planpb.OpType
expectedStr string
expectedOk bool
}{
// inner match
{"%abc%", planpb.OpType_InnerMatch, "abc", true},
{"%a\\%b%", planpb.OpType_InnerMatch, "a%b", true},
{"%a\\_b%", planpb.OpType_InnerMatch, "a_b", true},
{"%a\\\\%", planpb.OpType_InnerMatch, "a\\\\", true},
{"%a\t%", planpb.OpType_InnerMatch, "a\t", true},
{"%", planpb.OpType_PrefixMatch, "", true},
{"%%", planpb.OpType_PrefixMatch, "", true},
{"%a%b%", planpb.OpType_Invalid, "", false},
{"%a_b%", planpb.OpType_Invalid, "", false},
{"%abc\\", planpb.OpType_PostfixMatch, "abc\\", true},
{"%核心%", planpb.OpType_InnerMatch, "核心", true},
{"%核%", planpb.OpType_InnerMatch, "核", true},
{"%\u6838%", planpb.OpType_InnerMatch, "核", true},
{"%\u6838%", planpb.OpType_InnerMatch, "\u6838", true},
// prefix match
{"abc%", planpb.OpType_PrefixMatch, "abc", true},
{"a\\%bc%", planpb.OpType_PrefixMatch, "a%bc", true},
{"a\\_bc%", planpb.OpType_PrefixMatch, "a_bc", true},
{"_abc%", planpb.OpType_Invalid, "", false},
// posix match
{"%abc", planpb.OpType_PostfixMatch, "abc", true},
{"%a\\_bc", planpb.OpType_PostfixMatch, "a_bc", true},
{"%abc_", planpb.OpType_Invalid, "", false},
{"%臥蜜", planpb.OpType_PostfixMatch, "臥蜜", true},
{"%%臥蜜", planpb.OpType_PostfixMatch, "臥蜜", true},
{"%\u81e5\u871c", planpb.OpType_PostfixMatch, "臥蜜", true},
// equal match
{"abc", planpb.OpType_Equal, "abc", true},
{"a\\%bc", planpb.OpType_Equal, "a%bc", true},
{"a\\_bc", planpb.OpType_Equal, "a_bc", true},
{"abc_", planpb.OpType_Invalid, "", false},
// null pattern
{"", planpb.OpType_Equal, "", true},
}
for _, test := range tests {
actualType, actualStr, actualOk := optimizeLikePattern(test.pattern)
if actualType != test.expectedType || actualStr != test.expectedStr || actualOk != test.expectedOk {
t.Errorf("optimizeLikePattern(%q) = (%q, %q, %v), expected (%q, %q, %v)",
test.pattern, actualType, actualStr, actualOk,
test.expectedType, test.expectedStr, test.expectedOk)
}
}
}

View File

@ -19,6 +19,8 @@ enum OpType {
In = 11; // TODO:: used for term expr
NotIn = 12;
TextMatch = 13; // text match
PhraseMatch = 14; // phrase match
InnerMatch = 15; // substring (e.g., "%value%")
};
enum ArithOpType {

View File

@ -38,6 +38,8 @@ const (
OpType_In OpType = 11 // TODO:: used for term expr
OpType_NotIn OpType = 12
OpType_TextMatch OpType = 13 // text match
OpType_PhraseMatch OpType = 14 // phrase match
OpType_InnerMatch OpType = 15 // substring (e.g., "%value%")
)
// Enum value maps for OpType.
@ -57,6 +59,8 @@ var (
11: "In",
12: "NotIn",
13: "TextMatch",
14: "PhraseMatch",
15: "InnerMatch",
}
OpType_value = map[string]int32{
"Invalid": 0,
@ -73,6 +77,8 @@ var (
"In": 11,
"NotIn": 12,
"TextMatch": 13,
"PhraseMatch": 14,
"InnerMatch": 15,
}
)
@ -2863,7 +2869,7 @@ var file_plan_proto_rawDesc = []byte{
0x64, 0x49, 0x64, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x79, 0x6e, 0x61, 0x6d, 0x69, 0x63, 0x5f,
0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x64, 0x79,
0x6e, 0x61, 0x6d, 0x69, 0x63, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x42, 0x06, 0x0a, 0x04, 0x6e,
0x6f, 0x64, 0x65, 0x2a, 0xc9, 0x01, 0x0a, 0x06, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
0x6f, 0x64, 0x65, 0x2a, 0xea, 0x01, 0x0a, 0x06, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
0x0a, 0x07, 0x49, 0x6e, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x47,
0x72, 0x65, 0x61, 0x74, 0x65, 0x72, 0x54, 0x68, 0x61, 0x6e, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c,
0x47, 0x72, 0x65, 0x61, 0x74, 0x65, 0x72, 0x45, 0x71, 0x75, 0x61, 0x6c, 0x10, 0x02, 0x12, 0x0c,
@ -2875,24 +2881,26 @@ var file_plan_proto_rawDesc = []byte{
0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x08, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x61, 0x74, 0x63, 0x68,
0x10, 0x09, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x10, 0x0a, 0x12, 0x06, 0x0a,
0x02, 0x49, 0x6e, 0x10, 0x0b, 0x12, 0x09, 0x0a, 0x05, 0x4e, 0x6f, 0x74, 0x49, 0x6e, 0x10, 0x0c,
0x12, 0x0d, 0x0a, 0x09, 0x54, 0x65, 0x78, 0x74, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0d, 0x2a,
0x58, 0x0a, 0x0b, 0x41, 0x72, 0x69, 0x74, 0x68, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x41,
0x64, 0x64, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x75, 0x62, 0x10, 0x02, 0x12, 0x07, 0x0a,
0x03, 0x4d, 0x75, 0x6c, 0x10, 0x03, 0x12, 0x07, 0x0a, 0x03, 0x44, 0x69, 0x76, 0x10, 0x04, 0x12,
0x07, 0x0a, 0x03, 0x4d, 0x6f, 0x64, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x41, 0x72, 0x72, 0x61,
0x79, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x10, 0x06, 0x2a, 0x6d, 0x0a, 0x0a, 0x56, 0x65, 0x63,
0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x42, 0x69, 0x6e, 0x61, 0x72,
0x79, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x6c, 0x6f,
0x61, 0x74, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x46, 0x6c,
0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x02, 0x12, 0x12, 0x0a,
0x0e, 0x42, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10,
0x03, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c, 0x6f, 0x61, 0x74,
0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x04, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74, 0x68,
0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x69, 0x6f,
0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x76, 0x32, 0x2f, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x70, 0x6c, 0x61, 0x6e, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x33,
0x12, 0x0d, 0x0a, 0x09, 0x54, 0x65, 0x78, 0x74, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0d, 0x12,
0x0f, 0x0a, 0x0b, 0x50, 0x68, 0x72, 0x61, 0x73, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0e,
0x12, 0x0e, 0x0a, 0x0a, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x10, 0x0f,
0x2a, 0x58, 0x0a, 0x0b, 0x41, 0x72, 0x69, 0x74, 0x68, 0x4f, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12,
0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03,
0x41, 0x64, 0x64, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x75, 0x62, 0x10, 0x02, 0x12, 0x07,
0x0a, 0x03, 0x4d, 0x75, 0x6c, 0x10, 0x03, 0x12, 0x07, 0x0a, 0x03, 0x44, 0x69, 0x76, 0x10, 0x04,
0x12, 0x07, 0x0a, 0x03, 0x4d, 0x6f, 0x64, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x41, 0x72, 0x72,
0x61, 0x79, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x10, 0x06, 0x2a, 0x6d, 0x0a, 0x0a, 0x56, 0x65,
0x63, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x42, 0x69, 0x6e, 0x61,
0x72, 0x79, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x6c,
0x6f, 0x61, 0x74, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x46,
0x6c, 0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x02, 0x12, 0x12,
0x0a, 0x0e, 0x42, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x31, 0x36, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72,
0x10, 0x03, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c, 0x6f, 0x61,
0x74, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x10, 0x04, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74,
0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x69,
0x6f, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x76, 0x32, 0x2f,
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x70, 0x6c, 0x61, 0x6e, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x33,
}
var (

View File

@ -288,7 +288,7 @@ if [[ ${RUN_CPPLINT} == "ON" ]]; then
echo "clang-format check passed!"
else
# compile and build
make -j 7 install || exit 1
make -j ${jobs} install || exit 1
fi
if command -v ccache &> /dev/null

View File

@ -2762,6 +2762,38 @@ class TestQueryString(TestcaseBase):
output_fields = [default_int_field_name, default_float_field_name, default_string_field_name]
collection_w.query(expression, output_fields=output_fields,
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_expr_with_suffix(self):
"""
target: test query with prefix string expression
method: specify string is primary field, use prefix string expr
expected: verify query successfully
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
primary_field=ct.default_string_field_name)[0:2]
expression = 'varchar like "%0"'
filtered_data = vectors[0][vectors[0][default_string_field_name].str.endswith('0')]
res = filtered_data.iloc[:, :3].to_dict('records')
output_fields = [default_int_field_name, default_float_field_name, default_string_field_name]
collection_w.query(expression, output_fields=output_fields,
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_expr_with_inner_match(self):
"""
target: test query with prefix string expression
method: specify string is primary field, use prefix string expr
expected: verify query successfully
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
primary_field=ct.default_string_field_name)[0:2]
expression = 'varchar like "%0%"'
filtered_data = vectors[0][vectors[0][default_string_field_name].str.contains('0')]
res = filtered_data.iloc[:, :3].to_dict('records')
output_fields = [default_int_field_name, default_float_field_name, default_string_field_name]
collection_w.query(expression, output_fields=output_fields,
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
def test_bitmap_alter_offset_cache_param(self):
@ -2798,9 +2830,10 @@ class TestQueryString(TestcaseBase):
collection_w.release()
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_expr_with_prefixes_auto_index(self):
@pytest.mark.parametrize("expression", ['varchar like "0%"', 'varchar like "%0"','varchar like "%0%"'])
def test_query_string_expr_with_like_auto_index(self, expression):
"""
target: test query with prefix string expression and indexed with auto index
target: test query with like string expression and indexed with auto index
expected: verify query successfully
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True, is_index=False,
@ -2810,8 +2843,7 @@ class TestQueryString(TestcaseBase):
index_name="query_expr_pre_index")
collection_w.create_index("varchar", index_name="varchar_auto_index")
time.sleep(1)
collection_w.load()
expression = 'varchar like "0%"'
collection_w.load()
result, _ = collection_w.query(expression, output_fields=['varchar'])
res_len = len(result)
collection_w.release()
@ -2822,7 +2854,8 @@ class TestQueryString(TestcaseBase):
assert res_len_1 == res_len
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_expr_with_prefixes_bitmap(self):
@pytest.mark.parametrize("expression", ['varchar like "0%"', 'varchar like "%0"','varchar like "%0%"'])
def test_query_string_expr_with_prefixes_bitmap(self, expression):
"""
target: test query with prefix string expression and indexed with bitmap
expected: verify query successfully
@ -2835,7 +2868,6 @@ class TestQueryString(TestcaseBase):
collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"})
time.sleep(1)
collection_w.load()
expression = 'varchar like "0%"'
result, _ = collection_w.query(expression, output_fields=['varchar'])
res_len = len(result)
collection_w.release()
@ -2846,7 +2878,8 @@ class TestQueryString(TestcaseBase):
assert res_len_1 == res_len
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_expr_with_match_auto_index(self):
@pytest.mark.parametrize("expression", ['varchar like "0%"', 'varchar like "%0"','varchar like "%0%"'])
def test_query_string_expr_with_match_auto_index(self, expression):
"""
target: test query with match string expression and indexed with auto index
expected: verify query successfully
@ -2859,7 +2892,6 @@ class TestQueryString(TestcaseBase):
collection_w.create_index("varchar", index_name="varchar_auto_index")
time.sleep(1)
collection_w.load()
expression = 'varchar like "%0%"'
result, _ = collection_w.query(expression, output_fields=['varchar'])
res_len = len(result)
collection_w.release()
@ -3163,6 +3195,80 @@ class TestQueryArray(TestcaseBase):
for i in range(len(res)):
assert res[i]["id"] == ground_truth[i]
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("use_index", [True, False])
@pytest.mark.parametrize("index_type", ["INVERTED", "BITMAP"])
def test_query_array_with_prefix_like(self, use_index, index_type):
# 1. create a collection
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
# 2. insert data
string_field_value = [[str(j) for j in range(i, i + 3)] for i in range(ct.default_nb)]
data = cf.gen_array_dataframe_data()
data[ct.default_string_array_field_name] = string_field_value
collection_w.insert(data)
collection_w.create_index(ct.default_float_vec_field_name, {})
if use_index:
collection_w.create_index(ct.default_string_array_field_name, {"index_type": index_type})
# 3. query
collection_w.load()
expression = 'string_array[0] like "0%"'
res = collection_w.query(limit=ct.default_nb, expr=expression)[0]
log.info(res)
filter_data = [row for row in string_field_value if row[0].startswith('0')]
assert len(res) == len(filter_data)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("use_index", [True, False])
@pytest.mark.parametrize("index_type", ["INVERTED", "BITMAP"])
def test_query_array_with_suffix_like(self, use_index, index_type):
# 1. create a collection
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
# 2. insert data
string_field_value = [[str(j) for j in range(i, i + 3)] for i in range(ct.default_nb)]
data = cf.gen_array_dataframe_data()
data[ct.default_string_array_field_name] = string_field_value
collection_w.insert(data)
collection_w.create_index(ct.default_float_vec_field_name, {})
if use_index:
collection_w.create_index(ct.default_string_array_field_name, {"index_type": index_type})
# 3. query
collection_w.load()
expression = 'string_array[0] like "%0"'
res = collection_w.query(limit=ct.default_nb, expr=expression)[0]
log.info(res)
filter_data = [row for row in string_field_value if row[0].endswith('0')]
assert len(res) == len(filter_data)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("use_index", [True, False])
@pytest.mark.parametrize("index_type", ["INVERTED", "BITMAP"])
def test_query_array_with_inner_like(self, use_index, index_type):
# 1. create a collection
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
# 2. insert data
string_field_value = [[str(j) for j in range(i, i + 3)] for i in range(ct.default_nb)]
data = cf.gen_array_dataframe_data()
data[ct.default_string_array_field_name] = string_field_value
collection_w.insert(data)
collection_w.create_index(ct.default_float_vec_field_name, {})
if use_index:
collection_w.create_index(ct.default_string_array_field_name, {"index_type": index_type})
# 3. query
collection_w.load()
expression = 'string_array[0] like "%0%"'
res = collection_w.query(limit=ct.default_nb, expr=expression)[0]
log.info(res)
filter_data = [row for row in string_field_value if '0' in row[0]]
assert len(res) == len(filter_data)
class TestQueryCount(TestcaseBase):
"""