enhance: Accelerate `find_first` by utilizing bitset simd methods (#39004)

Related to #39003

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/39033/head
congqixia 2025-01-07 10:34:54 +08:00 committed by GitHub
parent ee9a2793da
commit 72f5b85c05
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 14 deletions

View File

@ -1255,20 +1255,24 @@ ChunkedSegmentSealedImpl::find_first(int64_t limit,
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
// flip bitset since `find_next` is used to find true.
auto flipped = bitset.clone();
flipped.flip();
int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}
if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}
return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}
ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl(

View File

@ -1714,20 +1714,25 @@ SegmentSealedImpl::find_first(int64_t limit, const BitsetType& bitset) const {
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
// flip bitset since `find_first` & `find_next` is used to find true.
// could be optimized by support find false in bitset.
auto flipped = bitset.clone();
flipped.flip();
int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}
if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}
return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}
SegcoreError