mirror of https://github.com/milvus-io/milvus.git
587 lines
23 KiB
C++
587 lines
23 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "segment/SegmentReader.h"
|
|
|
|
#include <experimental/filesystem>
|
|
#include <memory>
|
|
#include <utility>
|
|
|
|
#include "cache/CpuCacheMgr.h"
|
|
#include "codecs/Codec.h"
|
|
#include "db/SnapshotUtils.h"
|
|
#include "db/Types.h"
|
|
#include "db/snapshot/ResourceHelper.h"
|
|
#include "knowhere/index/vector_index/VecIndex.h"
|
|
#include "knowhere/index/vector_index/VecIndexFactory.h"
|
|
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
|
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
|
#include "storage/disk/DiskIOReader.h"
|
|
#include "storage/disk/DiskIOWriter.h"
|
|
#include "storage/disk/DiskOperation.h"
|
|
#include "utils/Log.h"
|
|
|
|
namespace milvus {
|
|
namespace segment {
|
|
|
|
SegmentReader::SegmentReader(const std::string& dir_root, const engine::SegmentVisitorPtr& segment_visitor)
|
|
: dir_root_(dir_root), segment_visitor_(segment_visitor) {
|
|
Initialize();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::Initialize() {
|
|
dir_collections_ = dir_root_ + engine::COLLECTIONS_FOLDER;
|
|
|
|
std::string directory =
|
|
engine::snapshot::GetResPath<engine::snapshot::Segment>(dir_collections_, segment_visitor_->GetSegment());
|
|
|
|
storage::IOReaderPtr reader_ptr = std::make_shared<storage::DiskIOReader>();
|
|
storage::IOWriterPtr writer_ptr = std::make_shared<storage::DiskIOWriter>();
|
|
storage::OperationPtr operation_ptr = std::make_shared<storage::DiskOperation>(directory);
|
|
fs_ptr_ = std::make_shared<storage::FSHandler>(reader_ptr, writer_ptr, operation_ptr);
|
|
|
|
segment_ptr_ = std::make_shared<engine::Segment>();
|
|
|
|
const engine::SegmentVisitor::IdMapT& field_map = segment_visitor_->GetFieldVisitors();
|
|
for (auto& iter : field_map) {
|
|
const engine::snapshot::FieldPtr& field = iter.second->GetField();
|
|
std::string name = field->GetName();
|
|
engine::DataType ftype = static_cast<engine::DataType>(field->GetFtype());
|
|
if (engine::IsVectorField(field)) {
|
|
json params = field->GetParams();
|
|
if (params.find(knowhere::meta::DIM) == params.end()) {
|
|
std::string msg = "Vector field params must contain: dimension";
|
|
LOG_SERVER_ERROR_ << msg;
|
|
return Status(DB_ERROR, msg);
|
|
}
|
|
|
|
int64_t field_width = 0;
|
|
int64_t dimension = params[knowhere::meta::DIM];
|
|
if (ftype == engine::DataType::VECTOR_BINARY) {
|
|
field_width = (dimension / 8);
|
|
} else {
|
|
field_width = (dimension * sizeof(float));
|
|
}
|
|
segment_ptr_->AddField(name, ftype, field_width);
|
|
} else {
|
|
segment_ptr_->AddField(name, ftype);
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::Load() {
|
|
STATUS_CHECK(LoadFields());
|
|
|
|
segment::IdBloomFilterPtr id_bloom_filter_ptr;
|
|
STATUS_CHECK(LoadBloomFilter(id_bloom_filter_ptr));
|
|
|
|
segment::DeletedDocsPtr deleted_docs_ptr;
|
|
STATUS_CHECK(LoadDeletedDocs(deleted_docs_ptr));
|
|
|
|
STATUS_CHECK(LoadVectorIndice());
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadField(const std::string& field_name, engine::BinaryDataPtr& raw, bool to_cache) {
|
|
try {
|
|
segment_ptr_->GetFixedFieldData(field_name, raw);
|
|
if (raw != nullptr) {
|
|
return Status::OK(); // alread exist
|
|
}
|
|
|
|
auto field_visitor = segment_visitor_->GetFieldVisitor(field_name);
|
|
if (field_visitor == nullptr) {
|
|
return Status(DB_ERROR, "Invalid field name");
|
|
}
|
|
|
|
auto raw_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_RAW);
|
|
std::string file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, raw_visitor->GetFile());
|
|
|
|
// if the data is in cache, no need to read file
|
|
auto data_obj = cache::CpuCacheMgr::GetInstance().GetItem(file_path);
|
|
if (data_obj == nullptr) {
|
|
auto& ss_codec = codec::Codec::instance();
|
|
ss_codec.GetBlockFormat()->Read(fs_ptr_, file_path, raw);
|
|
|
|
if (to_cache) {
|
|
cache::CpuCacheMgr::GetInstance().InsertItem(file_path, raw); // put into cache
|
|
}
|
|
} else {
|
|
raw = std::static_pointer_cast<engine::BinaryData>(data_obj);
|
|
}
|
|
|
|
segment_ptr_->SetFixedFieldData(field_name, raw);
|
|
} catch (std::exception& e) {
|
|
std::string err_msg = "Failed to load raw vectors: " + std::string(e.what());
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadFields() {
|
|
auto& field_visitors_map = segment_visitor_->GetFieldVisitors();
|
|
for (auto& iter : field_visitors_map) {
|
|
const engine::snapshot::FieldPtr& field = iter.second->GetField();
|
|
std::string name = field->GetName();
|
|
engine::BinaryDataPtr raw_data;
|
|
auto status = segment_ptr_->GetFixedFieldData(name, raw_data);
|
|
|
|
if (!status.ok() || raw_data == nullptr) {
|
|
STATUS_CHECK(LoadField(name, raw_data));
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadEntities(const std::string& field_name, const std::vector<int64_t>& offsets,
|
|
engine::BinaryDataPtr& raw) {
|
|
try {
|
|
auto field_visitor = segment_visitor_->GetFieldVisitor(field_name);
|
|
if (field_visitor == nullptr) {
|
|
return Status(DB_ERROR, "Invalid field_name");
|
|
}
|
|
auto raw_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_RAW);
|
|
std::string file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, raw_visitor->GetFile());
|
|
|
|
int64_t field_width = 0;
|
|
STATUS_CHECK(segment_ptr_->GetFixedFieldWidth(field_name, field_width));
|
|
if (field_width <= 0) {
|
|
return Status(DB_ERROR, "Invalid field width");
|
|
}
|
|
|
|
codec::ReadRanges ranges;
|
|
for (auto offset : offsets) {
|
|
ranges.push_back(codec::ReadRange(offset * field_width, field_width));
|
|
}
|
|
auto& ss_codec = codec::Codec::instance();
|
|
ss_codec.GetBlockFormat()->Read(fs_ptr_, file_path, ranges, raw);
|
|
} catch (std::exception& e) {
|
|
std::string err_msg = "Failed to load raw vectors: " + std::string(e.what());
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadFieldsEntities(const std::vector<std::string>& fields_name, const std::vector<int64_t>& offsets,
|
|
engine::DataChunkPtr& data_chunk) {
|
|
if (data_chunk == nullptr) {
|
|
data_chunk = std::make_shared<engine::DataChunk>();
|
|
}
|
|
data_chunk->count_ += offsets.size();
|
|
for (auto& name : fields_name) {
|
|
engine::BinaryDataPtr raw_data;
|
|
auto status = LoadEntities(name, offsets, raw_data);
|
|
if (!status.ok() || raw_data == nullptr) {
|
|
return status;
|
|
}
|
|
|
|
auto& target_data = data_chunk->fixed_fields_[name];
|
|
if (target_data != nullptr) {
|
|
auto chunk_size = target_data->Size();
|
|
auto raw_data_size = raw_data->Size();
|
|
target_data->data_.resize(chunk_size + raw_data_size);
|
|
memcpy(target_data->data_.data() + chunk_size, raw_data->data_.data(), raw_data_size);
|
|
} else {
|
|
data_chunk->fixed_fields_[name] = raw_data;
|
|
}
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadUids(std::vector<engine::idx_t>& uids) {
|
|
engine::BinaryDataPtr raw;
|
|
auto status = LoadField(engine::FIELD_UID, raw);
|
|
if (!status.ok()) {
|
|
LOG_ENGINE_ERROR_ << status.message();
|
|
return status;
|
|
}
|
|
|
|
if (raw == nullptr) {
|
|
return Status(DB_ERROR, "Failed to load id field");
|
|
}
|
|
|
|
if (raw->data_.size() % sizeof(engine::idx_t) != 0) {
|
|
std::string err_msg = "Failed to load uids: illegal file size";
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
|
|
uids.clear();
|
|
uids.resize(raw->data_.size() / sizeof(engine::idx_t));
|
|
memcpy(uids.data(), raw->data_.data(), raw->data_.size());
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndexPtr& index_ptr, bool flat) {
|
|
try {
|
|
segment_ptr_->GetVectorIndex(field_name, index_ptr);
|
|
if (index_ptr != nullptr) {
|
|
return Status::OK(); // already exist
|
|
}
|
|
|
|
// check field type
|
|
auto& ss_codec = codec::Codec::instance();
|
|
auto field_visitor = segment_visitor_->GetFieldVisitor(field_name);
|
|
const engine::snapshot::FieldPtr& field = field_visitor->GetField();
|
|
if (!engine::IsVectorField(field)) {
|
|
return Status(DB_ERROR, "Field is not vector type");
|
|
}
|
|
|
|
// load deleted doc
|
|
auto& segment = segment_visitor_->GetSegment();
|
|
auto& snapshot = segment_visitor_->GetSnapshot();
|
|
auto segment_commit = snapshot->GetSegmentCommitBySegmentId(segment->GetID());
|
|
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr =
|
|
std::make_shared<faiss::ConcurrentBitset>(segment_commit->GetRowCount());
|
|
|
|
segment::DeletedDocsPtr deleted_docs_ptr;
|
|
STATUS_CHECK(LoadDeletedDocs(deleted_docs_ptr));
|
|
if (deleted_docs_ptr != nullptr) {
|
|
auto& deleted_docs = deleted_docs_ptr->GetDeletedDocs();
|
|
for (auto& offset : deleted_docs) {
|
|
concurrent_bitset_ptr->set(offset);
|
|
}
|
|
}
|
|
|
|
// load uids
|
|
std::vector<int64_t> uids;
|
|
STATUS_CHECK(LoadUids(uids));
|
|
|
|
knowhere::BinarySet index_data;
|
|
knowhere::BinaryPtr raw_data, compress_data;
|
|
|
|
// if index not specified, or index file not created, return a temp index(IDMAP type)
|
|
auto index_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_INDEX);
|
|
if (flat || index_visitor == nullptr || index_visitor->GetFile() == nullptr) {
|
|
auto temp_index_path = engine::snapshot::GetResPath<engine::snapshot::Segment>(dir_collections_, segment);
|
|
temp_index_path += "/";
|
|
std::string temp_index_name = field_name + ".idmap";
|
|
temp_index_path += temp_index_name;
|
|
|
|
// if the data is in cache, no need to read file
|
|
auto data_obj = cache::CpuCacheMgr::GetInstance().GetItem(temp_index_path);
|
|
if (data_obj != nullptr) {
|
|
index_ptr = std::static_pointer_cast<knowhere::VecIndex>(data_obj);
|
|
segment_ptr_->SetVectorIndex(field_name, index_ptr);
|
|
} else {
|
|
auto& json = field->GetParams();
|
|
if (json.find(knowhere::meta::DIM) == json.end()) {
|
|
return Status(DB_ERROR, "Vector field dimension undefined");
|
|
}
|
|
int64_t dimension = json[knowhere::meta::DIM];
|
|
engine::BinaryDataPtr raw;
|
|
STATUS_CHECK(LoadField(field_name, raw, false));
|
|
|
|
auto dataset = knowhere::GenDataset(segment_commit->GetRowCount(), dimension, raw->data_.data());
|
|
|
|
// construct IDMAP index
|
|
knowhere::VecIndexFactory& vec_index_factory = knowhere::VecIndexFactory::GetInstance();
|
|
if (field->GetFtype() == engine::DataType::VECTOR_FLOAT) {
|
|
index_ptr = vec_index_factory.CreateVecIndex(knowhere::IndexEnum::INDEX_FAISS_IDMAP,
|
|
knowhere::IndexMode::MODE_CPU);
|
|
} else {
|
|
index_ptr = vec_index_factory.CreateVecIndex(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
|
knowhere::IndexMode::MODE_CPU);
|
|
}
|
|
milvus::json conf{{knowhere::meta::DIM, dimension}};
|
|
index_ptr->Train(knowhere::DatasetPtr(), conf);
|
|
index_ptr->AddWithoutIds(dataset, conf);
|
|
index_ptr->SetUids(uids);
|
|
index_ptr->SetBlacklist(concurrent_bitset_ptr);
|
|
segment_ptr_->SetVectorIndex(field_name, index_ptr);
|
|
|
|
cache::CpuCacheMgr::GetInstance().InsertItem(temp_index_path, index_ptr);
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
// read index file
|
|
std::string index_file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, index_visitor->GetFile());
|
|
// if the data is in cache, no need to read file
|
|
auto data_obj = cache::CpuCacheMgr::GetInstance().GetItem(index_file_path);
|
|
if (data_obj != nullptr) {
|
|
index_ptr = std::static_pointer_cast<knowhere::VecIndex>(data_obj);
|
|
segment_ptr_->SetVectorIndex(field_name, index_ptr);
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
ss_codec.GetVectorIndexFormat()->ReadIndex(fs_ptr_, index_file_path, index_data);
|
|
|
|
// for some kinds index(IVF), read raw file
|
|
auto index_type = index_visitor->GetElement()->GetTypeName();
|
|
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT || index_type == knowhere::IndexEnum::INDEX_NSG ||
|
|
index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
|
engine::BinaryDataPtr fixed_data;
|
|
auto status = segment_ptr_->GetFixedFieldData(field_name, fixed_data);
|
|
if (status.ok()) {
|
|
ss_codec.GetVectorIndexFormat()->ConvertRaw(fixed_data, raw_data);
|
|
} else if (auto visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_RAW)) {
|
|
auto file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, visitor->GetFile());
|
|
ss_codec.GetVectorIndexFormat()->ReadRaw(fs_ptr_, file_path, raw_data);
|
|
}
|
|
}
|
|
|
|
// for some kinds index(SQ8), read compress file
|
|
if (index_type == knowhere::IndexEnum::INDEX_RHNSWSQ) {
|
|
if (auto visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_COMPRESS_SQ8)) {
|
|
auto file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, visitor->GetFile());
|
|
ss_codec.GetVectorIndexFormat()->ReadCompress(fs_ptr_, file_path, compress_data);
|
|
}
|
|
}
|
|
|
|
ss_codec.GetVectorIndexFormat()->ConstructIndex(index_type, index_data, raw_data, compress_data, index_ptr);
|
|
|
|
index_ptr->SetUids(uids);
|
|
index_ptr->SetBlacklist(concurrent_bitset_ptr);
|
|
segment_ptr_->SetVectorIndex(field_name, index_ptr);
|
|
|
|
cache::CpuCacheMgr::GetInstance().InsertItem(index_file_path, index_ptr); // put into cache
|
|
} catch (std::exception& e) {
|
|
std::string err_msg = "Failed to load vector index: " + std::string(e.what());
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadStructuredIndex(const std::string& field_name, knowhere::IndexPtr& index_ptr) {
|
|
try {
|
|
segment_ptr_->GetStructuredIndex(field_name, index_ptr);
|
|
if (index_ptr != nullptr) {
|
|
return Status::OK(); // already exist
|
|
}
|
|
|
|
// check field type
|
|
auto& ss_codec = codec::Codec::instance();
|
|
auto field_visitor = segment_visitor_->GetFieldVisitor(field_name);
|
|
if (!field_visitor) {
|
|
return Status(DB_ERROR, "Field: " + field_name + " is not exist");
|
|
}
|
|
const engine::snapshot::FieldPtr& field = field_visitor->GetField();
|
|
if (engine::IsVectorField(field)) {
|
|
return Status(DB_ERROR, "Field is not structured type");
|
|
}
|
|
|
|
// read field index
|
|
auto index_visitor = field_visitor->GetElementVisitor(engine::FieldElementType::FET_INDEX);
|
|
if (index_visitor && index_visitor->GetFile() != nullptr) {
|
|
std::string file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, index_visitor->GetFile());
|
|
|
|
// if the data is in cache, no need to read file
|
|
auto data_obj = cache::CpuCacheMgr::GetInstance().GetItem(file_path);
|
|
if (data_obj == nullptr) {
|
|
ss_codec.GetStructuredIndexFormat()->Read(fs_ptr_, file_path, index_ptr);
|
|
cache::CpuCacheMgr::GetInstance().InsertItem(file_path, index_ptr); // put into cache
|
|
} else {
|
|
index_ptr = std::static_pointer_cast<knowhere::Index>(data_obj);
|
|
}
|
|
|
|
segment_ptr_->SetStructuredIndex(field_name, index_ptr);
|
|
}
|
|
} catch (std::exception& e) {
|
|
std::string err_msg = "Failed to load vector index: " + std::string(e.what());
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadVectorIndice() {
|
|
auto& field_visitors_map = segment_visitor_->GetFieldVisitors();
|
|
for (auto& iter : field_visitors_map) {
|
|
const engine::snapshot::FieldPtr& field = iter.second->GetField();
|
|
std::string name = field->GetName();
|
|
|
|
auto element_visitor = iter.second->GetElementVisitor(engine::FieldElementType::FET_INDEX);
|
|
if (element_visitor == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
std::string file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, element_visitor->GetFile());
|
|
if (engine::IsVectorField(field)) {
|
|
knowhere::VecIndexPtr index_ptr;
|
|
STATUS_CHECK(LoadVectorIndex(name, index_ptr));
|
|
} else {
|
|
knowhere::IndexPtr index_ptr;
|
|
STATUS_CHECK(LoadStructuredIndex(name, index_ptr));
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
|
|
try {
|
|
id_bloom_filter_ptr = segment_ptr_->GetBloomFilter();
|
|
if (id_bloom_filter_ptr != nullptr) {
|
|
return Status::OK(); // already exist
|
|
}
|
|
|
|
auto uid_field_visitor = segment_visitor_->GetFieldVisitor(engine::FIELD_UID);
|
|
auto visitor = uid_field_visitor->GetElementVisitor(engine::FieldElementType::FET_BLOOM_FILTER);
|
|
std::string file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, visitor->GetFile());
|
|
if (!std::experimental::filesystem::exists(file_path + codec::IdBloomFilterFormat::FilePostfix())) {
|
|
return Status::OK(); // file doesn't exist
|
|
}
|
|
|
|
// if the data is in cache, no need to read file
|
|
auto data_obj = cache::CpuCacheMgr::GetInstance().GetItem(file_path);
|
|
if (data_obj == nullptr) {
|
|
auto& ss_codec = codec::Codec::instance();
|
|
ss_codec.GetIdBloomFilterFormat()->Read(fs_ptr_, file_path, id_bloom_filter_ptr);
|
|
} else {
|
|
id_bloom_filter_ptr = std::static_pointer_cast<segment::IdBloomFilter>(data_obj);
|
|
}
|
|
|
|
if (id_bloom_filter_ptr) {
|
|
segment_ptr_->SetBloomFilter(id_bloom_filter_ptr);
|
|
// TODO: disable cache for solving bloom filter ptr problem
|
|
// cache::CpuCacheMgr::GetInstance().InsertItem(file_path, id_bloom_filter_ptr); // put into cache
|
|
}
|
|
} catch (std::exception& e) {
|
|
std::string err_msg = "Failed to load bloom filter: " + std::string(e.what());
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::LoadDeletedDocs(segment::DeletedDocsPtr& deleted_docs_ptr) {
|
|
try {
|
|
deleted_docs_ptr = segment_ptr_->GetDeletedDocs();
|
|
if (deleted_docs_ptr != nullptr) {
|
|
return Status::OK(); // already exist
|
|
}
|
|
|
|
auto uid_field_visitor = segment_visitor_->GetFieldVisitor(engine::FIELD_UID);
|
|
auto visitor = uid_field_visitor->GetElementVisitor(engine::FieldElementType::FET_DELETED_DOCS);
|
|
std::string file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, visitor->GetFile());
|
|
if (!std::experimental::filesystem::exists(file_path + codec::DeletedDocsFormat::FilePostfix())) {
|
|
return Status::OK(); // file doesn't exist
|
|
}
|
|
|
|
// if the data is in cache, no need to read file
|
|
auto data_obj = cache::CpuCacheMgr::GetInstance().GetItem(file_path);
|
|
if (data_obj == nullptr) {
|
|
auto& ss_codec = codec::Codec::instance();
|
|
ss_codec.GetDeletedDocsFormat()->Read(fs_ptr_, file_path, deleted_docs_ptr);
|
|
} else {
|
|
deleted_docs_ptr = std::static_pointer_cast<segment::DeletedDocs>(data_obj);
|
|
}
|
|
|
|
if (deleted_docs_ptr) {
|
|
segment_ptr_->SetDeletedDocs(deleted_docs_ptr);
|
|
cache::CpuCacheMgr::GetInstance().InsertItem(file_path, deleted_docs_ptr); // put into cache
|
|
}
|
|
} catch (std::exception& e) {
|
|
std::string err_msg = "Failed to load deleted docs: " + std::string(e.what());
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::ReadDeletedDocsSize(size_t& size) {
|
|
try {
|
|
size = 0;
|
|
auto deleted_docs_ptr = segment_ptr_->GetDeletedDocs();
|
|
if (deleted_docs_ptr != nullptr) {
|
|
size = deleted_docs_ptr->GetCount();
|
|
return Status::OK(); // already exist
|
|
}
|
|
|
|
auto uid_field_visitor = segment_visitor_->GetFieldVisitor(engine::FIELD_UID);
|
|
auto visitor = uid_field_visitor->GetElementVisitor(engine::FieldElementType::FET_DELETED_DOCS);
|
|
std::string file_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::SegmentFile>(dir_collections_, visitor->GetFile());
|
|
if (!std::experimental::filesystem::exists(file_path + codec::DeletedDocsFormat::FilePostfix())) {
|
|
return Status::OK(); // file doesn't exist
|
|
}
|
|
|
|
auto& ss_codec = codec::Codec::instance();
|
|
ss_codec.GetDeletedDocsFormat()->ReadSize(fs_ptr_, file_path, size);
|
|
} catch (std::exception& e) {
|
|
std::string err_msg = "Failed to read deleted docs size: " + std::string(e.what());
|
|
LOG_ENGINE_ERROR_ << err_msg;
|
|
return Status(DB_ERROR, err_msg);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::GetSegment(engine::SegmentPtr& segment_ptr) {
|
|
segment_ptr = segment_ptr_;
|
|
return Status::OK();
|
|
}
|
|
|
|
Status
|
|
SegmentReader::GetSegmentID(int64_t& id) {
|
|
if (segment_visitor_) {
|
|
auto segment = segment_visitor_->GetSegment();
|
|
if (segment) {
|
|
id = segment->GetID();
|
|
return Status::OK();
|
|
}
|
|
}
|
|
|
|
return Status(DB_ERROR, "SegmentWriter::GetSegmentID: null pointer");
|
|
}
|
|
|
|
std::string
|
|
SegmentReader::GetSegmentPath() {
|
|
std::string seg_path =
|
|
engine::snapshot::GetResPath<engine::snapshot::Segment>(dir_collections_, segment_visitor_->GetSegment());
|
|
return seg_path;
|
|
}
|
|
|
|
} // namespace segment
|
|
} // namespace milvus
|