fix: expr add skip using index while index exists (#34202)

This PR cherry-pick part from commit:

-  enhance: add skip using array index when some situation #33947 
-  fix: [ut] regex query under unsupported index  #34087

pr: #33947, #34087

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
pull/34277/head
zhagnlu 2024-06-30 20:03:13 +08:00 committed by GitHub
parent 460815ceab
commit fc6bd387b8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 8 deletions

View File

@ -119,7 +119,9 @@ class SegmentExpr : public Expr {
is_index_mode_ = segment_->HasIndex(field_id_);
if (is_index_mode_) {
num_index_chunk_ = segment_->num_chunk_index(field_id_);
} else {
}
// if index not include raw data, also need load data
if (segment_->HasFieldData(field_id_)) {
num_data_chunk_ = upper_div(active_count_, size_per_chunk_);
}
}
@ -166,6 +168,9 @@ class SegmentExpr : public Expr {
MoveCursor() override {
if (is_index_mode_) {
MoveCursorForIndex();
if (segment_->HasFieldData(field_id_)) {
MoveCursorForData();
}
} else {
MoveCursorForData();
}
@ -173,10 +178,11 @@ class SegmentExpr : public Expr {
int64_t
GetNextBatchSize() {
auto current_chunk =
is_index_mode_ ? current_index_chunk_ : current_data_chunk_;
auto current_chunk_pos =
is_index_mode_ ? current_index_chunk_pos_ : current_data_chunk_pos_;
auto current_chunk = is_index_mode_ && use_index_ ? current_index_chunk_
: current_data_chunk_;
auto current_chunk_pos = is_index_mode_ && use_index_
? current_index_chunk_pos_
: current_data_chunk_pos_;
auto current_rows = current_chunk * size_per_chunk_ + current_chunk_pos;
return current_rows + batch_size_ >= active_count_
? active_count_ - current_rows
@ -330,14 +336,17 @@ class SegmentExpr : public Expr {
DataType pk_type_;
int64_t batch_size_;
// State indicate position that expr computing at
// because expr maybe called for every batch.
bool is_index_mode_{false};
bool is_data_mode_{false};
// sometimes need to skip index and using raw data
// default true means use index as much as possible
bool use_index_{true};
int64_t active_count_{0};
int64_t num_data_chunk_{0};
int64_t num_index_chunk_{0};
// State indicate position that expr computing at
// because expr maybe called for every batch.
int64_t current_data_chunk_{0};
int64_t current_data_chunk_pos_{0};
int64_t current_index_chunk_{0};

View File

@ -501,5 +501,11 @@ TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnUnsupportedIndex) {
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
query::ExecPlanNodeVisitor visitor(*segpromote, MAX_TIMESTAMP);
BitsetType final;
ASSERT_ANY_THROW(visitor.ExecuteExprNode(parsed, segpromote, N, final));
// regex query under this index will be executed using raw data (brute force).
visitor.ExecuteExprNode(parsed, segpromote, N, final);
ASSERT_FALSE(final[0]);
ASSERT_TRUE(final[1]);
ASSERT_TRUE(final[2]);
ASSERT_TRUE(final[3]);
ASSERT_TRUE(final[4]);
}