Add get final filtered bitmap in SegmentGrowing (#9780)

Signed-off-by: fishpenguin <kun.yu@zilliz.com>
pull/9804/head
yukun 2021-10-13 16:54:34 +08:00 committed by GitHub
parent cb32aeb577
commit 6c88774624
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 673 additions and 649 deletions

View File

@ -30,6 +30,10 @@ class ConcurrentBitset {
}
}
explicit ConcurrentBitset(size_t count, const uint8_t* data) : bitset_(((count + 8 - 1) >> 3)) {
memcpy(mutable_data(), data, (count + 8 - 1) >> 3);
}
ConcurrentBitset&
operator&=(const ConcurrentBitset& bitset) {
auto u8_1 = mutable_data();

View File

@ -115,6 +115,22 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
return current;
}
const BitsetView
SegmentGrowingImpl::get_filtered_bitmap(BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) {
auto del_barrier = get_barrier(get_deleted_record(), timestamp);
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
AssertInfo(bitmap_holder, "bitmap_holder is null");
auto deleted_bitmap = bitmap_holder->bitmap_ptr;
AssertInfo(deleted_bitmap->count() == bitset.u8size(), "Deleted bitmap count not equal to filtered bitmap count");
auto filtered_bitmap =
std::make_shared<faiss::ConcurrentBitset>(faiss::ConcurrentBitset(bitset.u8size(), bitset.data()));
auto final_bitmap = (*deleted_bitmap.get()) | (*filtered_bitmap.get());
return BitsetView(final_bitmap);
}
Status
SegmentGrowingImpl::Insert(int64_t reserved_begin,
int64_t size,
@ -271,6 +287,7 @@ SegmentGrowingImpl::vector_search(int64_t vec_count,
Timestamp timestamp,
const BitsetView& bitset,
SearchResult& output) const {
// TODO(yukun): get final filtered bitmap
auto& sealed_indexing = this->get_sealed_indexing_record();
if (sealed_indexing.is_ready(search_info.field_offset_)) {
query::SearchOnSealed(this->get_schema(), sealed_indexing, search_info, query_data, query_count, bitset,

View File

@ -179,6 +179,9 @@ class SegmentGrowingImpl : public SegmentGrowing {
std::shared_ptr<DeletedRecord::TmpBitmap>
get_deleted_bitmap(int64_t del_barrier, Timestamp query_timestamp, int64_t insert_barrier, bool force = false);
const BitsetView
get_filtered_bitmap(BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp);
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
search_ids(const IdArray& id_array, Timestamp timestamp) const override;

File diff suppressed because it is too large Load Diff

View File

@ -233,8 +233,8 @@ TEST(Indexing, BinaryBruteForce) {
num_queries, //
topk, //
round_decimal,
dim, //
query_data //
dim, //
query_data //
};
auto sub_result = query::BinarySearchBruteForce(search_dataset, bin_vec.data(), N, nullptr);

View File

@ -118,7 +118,7 @@ TEST(Sealed, without_predicate) {
sr = sealed_segment->Search(plan.get(), *ph_group, time);
auto post_result = SearchResultToJson(sr);
std::cout << "ref_result"<< std::endl;
std::cout << "ref_result" << std::endl;
std::cout << ref_result.dump(1) << std::endl;
std::cout << "post_result" << std::endl;
std::cout << post_result.dump(1);