Update MemTable.cpp (#14224)

* Update MemTable.cpp

1. std::vector<segment::doc_id_t> ids_to_check  改成std::set
2. deleted_docs 内容写入 faiss::ConcurrentBitset中,用于查询

Signed-off-by: shu01.wang <shu01.wang@vipshop.com>

* Create MemTable.cpp

改 std::set 为 std::unordered_set

Signed-off-by: shu01.wang <shu01.wang@vipshop.com>

* Update MemTable.cpp

Modify coding specify

Signed-off-by: shu01.wang <shu01.wang@vipshop.com>
pull/14698/head
sunwsh 2021-12-28 18:15:52 +08:00 committed by GitHub
parent bb34765faf
commit ed01b3874b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 17 deletions

View File

@ -13,6 +13,7 @@
#include <chrono>
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include "cache/CpuCacheMgr.h"
@ -226,7 +227,7 @@ MemTable::ApplyDeletes() {
segment::UidsPtr uids_ptr = nullptr;
segment::DeletedDocsPtr deleted_docs_ptr = nullptr;
std::vector<segment::doc_id_t> ids_to_check;
std::unordered_set<segment::doc_id_t> ids_to_check;
TimeRecorder rec("handle segment " + file.segment_id_);
@ -284,7 +285,7 @@ MemTable::ApplyDeletes() {
// check ids by bloom filter
for (auto& id : doc_ids_to_delete_) {
if (id_bloom_filter_ptr->Check(id)) {
ids_to_check.emplace_back(id);
ids_to_check.emplace(id);
}
}
@ -307,28 +308,21 @@ MemTable::ApplyDeletes() {
rec.RecordSection("load uids and deleted docs");
// sort ids_to_check
bool ids_sorted = false;
if (ids_to_check.size() >= 64) {
std::sort(ids_to_check.begin(), ids_to_check.end());
ids_sorted = true;
rec.RecordSection("Sorting " + std::to_string(ids_to_check.size()) + " ids");
// insert deleted docs to bitset
auto deleted_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(uids_ptr->size());
for (auto& offset : deleted_docs) {
deleted_bitset_ptr->set(offset);
}
// for each id
int64_t segment_deleted_count = 0;
for (size_t i = 0; i < uids_ptr->size(); ++i) {
if (std::find(deleted_docs.begin(), deleted_docs.end(), i) != deleted_docs.end()) {
if (deleted_bitset_ptr->test(i)) {
continue;
}
if (ids_sorted) {
if (!std::binary_search(ids_to_check.begin(), ids_to_check.end(), (*uids_ptr)[i])) {
continue;
}
} else {
if (std::find(ids_to_check.begin(), ids_to_check.end(), (*uids_ptr)[i]) == ids_to_check.end()) {
continue;
}
if (ids_to_check.find((*uids_ptr)[i]) == ids_to_check.end()) {
continue;
}
// delete