mirror of https://github.com/milvus-io/milvus.git
reduce uid copy during search (#3867)
Signed-off-by: groot <yihua.mo@zilliz.com> Signed-off-by: shengjun.li <shengjun.li@zilliz.com>pull/3916/head
parent
7bfcec642f
commit
6939c60bef
|
@ -244,6 +244,8 @@ SegmentReader::LoadUids(std::vector<engine::idx_t>& uids) {
|
|||
return Status(DB_ERROR, err_msg);
|
||||
}
|
||||
|
||||
TimeRecorderAuto recorder("SegmentReader::LoadUids");
|
||||
|
||||
uids.clear();
|
||||
uids.resize(raw->data_.size() / sizeof(engine::idx_t));
|
||||
memcpy(uids.data(), raw->data_.data(), raw->data_.size());
|
||||
|
@ -269,12 +271,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
|
|||
return Status(DB_ERROR, "Field is not vector type");
|
||||
}
|
||||
|
||||
// load uids
|
||||
std::vector<int64_t> uids;
|
||||
STATUS_CHECK(LoadUids(uids));
|
||||
|
||||
// load deleted doc
|
||||
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(uids.size());
|
||||
int64_t row_count = GetRowCount();
|
||||
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(row_count);
|
||||
segment::DeletedDocsPtr deleted_docs_ptr;
|
||||
LoadDeletedDocs(deleted_docs_ptr);
|
||||
if (deleted_docs_ptr != nullptr) {
|
||||
|
@ -307,7 +306,11 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
|
|||
engine::BinaryDataPtr raw;
|
||||
STATUS_CHECK(LoadField(field_name, raw, false));
|
||||
|
||||
auto dataset = knowhere::GenDataset(uids.size(), dimension, raw->data_.data());
|
||||
// load uids
|
||||
std::vector<int64_t> uids;
|
||||
STATUS_CHECK(LoadUids(uids));
|
||||
|
||||
auto dataset = knowhere::GenDataset(row_count, dimension, raw->data_.data());
|
||||
|
||||
// construct IDMAP index
|
||||
knowhere::VecIndexFactory& vec_index_factory = knowhere::VecIndexFactory::GetInstance();
|
||||
|
@ -326,9 +329,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
|
|||
segment_ptr_->SetVectorIndex(field_name, index_ptr);
|
||||
|
||||
cache::CpuCacheMgr::GetInstance().InsertItem(temp_index_path, index_ptr);
|
||||
recorder.RecordSection("construct temp IDMAP index");
|
||||
}
|
||||
|
||||
recorder.RecordSection("create temp IDMAP index");
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -377,11 +380,16 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
|
|||
STATUS_CHECK(ss_codec.GetVectorIndexFormat()->ConstructIndex(index_type, index_data, raw_data, compress_data,
|
||||
index_ptr));
|
||||
|
||||
// load uids
|
||||
std::vector<int64_t> uids;
|
||||
STATUS_CHECK(LoadUids(uids));
|
||||
|
||||
index_ptr->SetUids(uids);
|
||||
index_ptr->SetBlacklist(concurrent_bitset_ptr);
|
||||
segment_ptr_->SetVectorIndex(field_name, index_ptr);
|
||||
|
||||
cache::CpuCacheMgr::GetInstance().InsertItem(index_file_path, index_ptr); // put into cache
|
||||
recorder.RecordSection("construct index");
|
||||
} catch (std::exception& e) {
|
||||
std::string err_msg = "Failed to load vector index: " + std::string(e.what());
|
||||
LOG_ENGINE_ERROR_ << err_msg;
|
||||
|
@ -506,7 +514,7 @@ SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
|
|||
Status
|
||||
SegmentReader::LoadDeletedDocs(segment::DeletedDocsPtr& deleted_docs_ptr) {
|
||||
try {
|
||||
TimeRecorder recorder("SegmentReader::LoadDeletedDocs");
|
||||
TimeRecorderAuto recorder("SegmentReader::LoadDeletedDocs");
|
||||
|
||||
deleted_docs_ptr = segment_ptr_->GetDeletedDocs();
|
||||
if (deleted_docs_ptr != nullptr) {
|
||||
|
@ -611,6 +619,30 @@ SegmentReader::GetTempIndexPath(const std::string& field_name, std::string& path
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
int64_t
|
||||
SegmentReader::GetRowCount() {
|
||||
engine::BinaryDataPtr raw;
|
||||
auto status = LoadField(engine::FIELD_UID, raw);
|
||||
if (!status.ok()) {
|
||||
LOG_ENGINE_ERROR_ << status.message();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (raw == nullptr) {
|
||||
LOG_ENGINE_ERROR_ << "Failed to load id field";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (raw->data_.size() % sizeof(engine::idx_t) != 0) {
|
||||
std::string err_msg = "Failed to load uids: illegal file size";
|
||||
LOG_ENGINE_ERROR_ << err_msg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t count = raw->data_.size() / sizeof(engine::idx_t);
|
||||
return count;
|
||||
}
|
||||
|
||||
Status
|
||||
SegmentReader::ClearCache() {
|
||||
TimeRecorderAuto recorder("SegmentReader::ClearCache");
|
||||
|
|
|
@ -95,6 +95,9 @@ class SegmentReader {
|
|||
return segment_visitor_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetRowCount();
|
||||
|
||||
// clear cache from cache manager, use this method for segment merge/compact and collection/partition drop
|
||||
Status
|
||||
ClearCache();
|
||||
|
|
Loading…
Reference in New Issue