reduce uid copy during search (#3867)

Signed-off-by: groot <yihua.mo@zilliz.com>
Signed-off-by: shengjun.li <shengjun.li@zilliz.com>
pull/3916/head
groot 2020-09-25 13:59:16 +08:00 committed by shengjun.li
parent 7bfcec642f
commit 6939c60bef
2 changed files with 43 additions and 8 deletions

View File

@ -244,6 +244,8 @@ SegmentReader::LoadUids(std::vector<engine::idx_t>& uids) {
return Status(DB_ERROR, err_msg);
}
TimeRecorderAuto recorder("SegmentReader::LoadUids");
uids.clear();
uids.resize(raw->data_.size() / sizeof(engine::idx_t));
memcpy(uids.data(), raw->data_.data(), raw->data_.size());
@ -269,12 +271,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
return Status(DB_ERROR, "Field is not vector type");
}
// load uids
std::vector<int64_t> uids;
STATUS_CHECK(LoadUids(uids));
// load deleted doc
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(uids.size());
int64_t row_count = GetRowCount();
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(row_count);
segment::DeletedDocsPtr deleted_docs_ptr;
LoadDeletedDocs(deleted_docs_ptr);
if (deleted_docs_ptr != nullptr) {
@ -307,7 +306,11 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
engine::BinaryDataPtr raw;
STATUS_CHECK(LoadField(field_name, raw, false));
auto dataset = knowhere::GenDataset(uids.size(), dimension, raw->data_.data());
// load uids
std::vector<int64_t> uids;
STATUS_CHECK(LoadUids(uids));
auto dataset = knowhere::GenDataset(row_count, dimension, raw->data_.data());
// construct IDMAP index
knowhere::VecIndexFactory& vec_index_factory = knowhere::VecIndexFactory::GetInstance();
@ -326,9 +329,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
segment_ptr_->SetVectorIndex(field_name, index_ptr);
cache::CpuCacheMgr::GetInstance().InsertItem(temp_index_path, index_ptr);
recorder.RecordSection("construct temp IDMAP index");
}
recorder.RecordSection("create temp IDMAP index");
return Status::OK();
}
@ -377,11 +380,16 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
STATUS_CHECK(ss_codec.GetVectorIndexFormat()->ConstructIndex(index_type, index_data, raw_data, compress_data,
index_ptr));
// load uids
std::vector<int64_t> uids;
STATUS_CHECK(LoadUids(uids));
index_ptr->SetUids(uids);
index_ptr->SetBlacklist(concurrent_bitset_ptr);
segment_ptr_->SetVectorIndex(field_name, index_ptr);
cache::CpuCacheMgr::GetInstance().InsertItem(index_file_path, index_ptr); // put into cache
recorder.RecordSection("construct index");
} catch (std::exception& e) {
std::string err_msg = "Failed to load vector index: " + std::string(e.what());
LOG_ENGINE_ERROR_ << err_msg;
@ -506,7 +514,7 @@ SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
Status
SegmentReader::LoadDeletedDocs(segment::DeletedDocsPtr& deleted_docs_ptr) {
try {
TimeRecorder recorder("SegmentReader::LoadDeletedDocs");
TimeRecorderAuto recorder("SegmentReader::LoadDeletedDocs");
deleted_docs_ptr = segment_ptr_->GetDeletedDocs();
if (deleted_docs_ptr != nullptr) {
@ -611,6 +619,30 @@ SegmentReader::GetTempIndexPath(const std::string& field_name, std::string& path
return Status::OK();
}
int64_t
SegmentReader::GetRowCount() {
engine::BinaryDataPtr raw;
auto status = LoadField(engine::FIELD_UID, raw);
if (!status.ok()) {
LOG_ENGINE_ERROR_ << status.message();
return 0;
}
if (raw == nullptr) {
LOG_ENGINE_ERROR_ << "Failed to load id field";
return 0;
}
if (raw->data_.size() % sizeof(engine::idx_t) != 0) {
std::string err_msg = "Failed to load uids: illegal file size";
LOG_ENGINE_ERROR_ << err_msg;
return 0;
}
int64_t count = raw->data_.size() / sizeof(engine::idx_t);
return count;
}
Status
SegmentReader::ClearCache() {
TimeRecorderAuto recorder("SegmentReader::ClearCache");

View File

@ -95,6 +95,9 @@ class SegmentReader {
return segment_visitor_;
}
int64_t
GetRowCount();
// clear cache from cache manager, use this method for segment merge/compact and collection/partition drop
Status
ClearCache();