enhance: [2.5] Optimize retrieve performance utilizing bitset SIMD methods (#39041)

Cherry pick from master
pr: #39004 #39037
Related to #39003

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/39046/head
congqixia 2025-01-07 17:45:03 +08:00 committed by GitHub
parent b457c2f415
commit 0b62c1e692
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 27 additions and 17 deletions

View File

@ -223,9 +223,10 @@ PhyTermFilterExpr::ExecPkTermImpl() {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
for (size_t i = 0; i < real_batch_size; ++i) {
res[i] = cached_bits_[current_data_chunk_pos_++];
}
auto current_chunk_view =
cached_bits_.view(current_data_chunk_pos_, real_batch_size);
res |= current_chunk_view;
current_data_chunk_pos_ += real_batch_size;
return res_vec;
}

View File

@ -1255,20 +1255,24 @@ ChunkedSegmentSealedImpl::find_first(int64_t limit,
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
// flip bitset since `find_next` is used to find true.
auto flipped = bitset.clone();
flipped.flip();
int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}
if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}
return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}
ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl(

View File

@ -1719,20 +1719,25 @@ SegmentSealedImpl::find_first(int64_t limit, const BitsetType& bitset) const {
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
// flip bitset since `find_first` & `find_next` is used to find true.
// could be optimized by support find false in bitset.
auto flipped = bitset.clone();
flipped.flip();
int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}
if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}
return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}
SegcoreError