Change segcore search_id from travelling all bits to select true bits(#24659) (#24800)

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
pull/24921/head
zhagnlu 2023-06-16 16:48:44 +08:00 committed by GitHub
parent b62429070c
commit c5b1533fdc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 15 deletions

View File

@ -193,7 +193,7 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
GetExprUsePkIndex() && IsTermExpr(node.predicate_.value().get())
? segment->search_ids(
final_view, expr_cached_pk_id_offsets_, timestamp_)
: segment->search_ids(final_view, timestamp_);
: segment->search_ids(bitset_holder.flip(), timestamp_);
retrieve_result.result_offsets_.assign(
(int64_t*)seg_offsets.data(),
(int64_t*)seg_offsets.data() + seg_offsets.size());

View File

@ -392,13 +392,14 @@ std::vector<SegOffset>
SegmentGrowingImpl::search_ids(const BitsetType& bitset,
Timestamp timestamp) const {
std::vector<SegOffset> res_offsets;
for (int i = 0; i < bitset.size(); i++) {
if (bitset[i]) {
auto offset = SegOffset(i);
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
res_offsets.push_back(offset);
}
for (int i = bitset.find_first(); i < bitset.size();
i = bitset.find_next(i)) {
if (i == BitsetType::npos) {
return res_offsets;
}
auto offset = SegOffset(i);
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
res_offsets.push_back(offset);
}
}
return res_offsets;

View File

@ -891,12 +891,14 @@ std::vector<SegOffset>
SegmentSealedImpl::search_ids(const BitsetType& bitset,
Timestamp timestamp) const {
std::vector<SegOffset> dst_offset;
for (int i = 0; i < bitset.size(); i++) {
if (bitset[i]) {
auto offset = SegOffset(i);
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
for (int i = bitset.find_first(); i < bitset.size();
i = bitset.find_next(i)) {
if (i == BitsetType::npos) {
return dst_offset;
}
auto offset = SegOffset(i);
if (insert_record_.timestamps_[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
}
return dst_offset;

83
internal/core/unittest/test_c_api.cpp Normal file → Executable file
View File

@ -20,6 +20,7 @@
#include <unordered_set>
#include "common/LoadInfo.h"
#include "common/Types.h"
#include "index/IndexFactory.h"
#include "knowhere/comp/index_param.h"
#include "pb/plan.pb.h"
@ -4459,4 +4460,84 @@ TEST(CApiTest, AssembeChunkTest) {
for (size_t i = 0; i < 105; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
}
}
std::vector<SegOffset>
search_id(const BitsetType& bitset,
Timestamp* timestamps,
Timestamp timestamp,
bool use_find) {
std::vector<SegOffset> dst_offset;
if (use_find) {
for (int i = bitset.find_first(); i < bitset.size();
i = bitset.find_next(i)) {
if (i == BitsetType::npos) {
return dst_offset;
}
auto offset = SegOffset(i);
if (timestamps[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
}
} else {
for (int i = 0; i < bitset.size(); i++) {
if (bitset[i]) {
auto offset = SegOffset(i);
if (timestamps[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
}
}
}
return dst_offset;
}
TEST(CApiTest, SearchIdTest) {
using BitsetType = boost::dynamic_bitset<>;
auto test = [&](int NT) {
BitsetType bitset(1000000);
Timestamp* timestamps = new Timestamp[1000000];
srand(time(NULL));
for (int i = 0; i < 1000000; i++) {
timestamps[i] = i;
bitset[i] = false;
}
for (int i = 0; i < NT; i++) {
bitset[1000000 * ((double)rand() / RAND_MAX)] = true;
}
auto start = std::chrono::steady_clock::now();
auto res1 = search_id(bitset, timestamps, 1000000, true);
std::cout << "search id cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
start = std::chrono::steady_clock::now();
auto res2 = search_id(bitset, timestamps, 1000000, false);
std::cout << "search id origin cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
ASSERT_EQ(res1.size(), res2.size());
for (int i = 0; i < res1.size(); i++) {
if (res1[i].get() != res2[i].get()) {
std::cout << "error:" << i;
}
}
start = std::chrono::steady_clock::now();
bitset.flip();
std::cout << "bit set flip cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
delete[] timestamps;
};
int test_nt[] = {10, 50, 100};
for (auto nt : test_nt) {
test(nt);
}
}