mirror of https://github.com/milvus-io/milvus.git
refactor(db): refactor all for crtp replacement
Former-commit-id: 3c5d3ddeec04d573ef3916f673a7504c51a0a3bfpull/191/head
parent
67960d5a55
commit
0e37089c6a
|
@ -113,7 +113,7 @@ Status DBImpl::search(const std::string& group_id, size_t k, size_t nq,
|
||||||
|
|
||||||
auto search_in_index = [&](meta::GroupFilesSchema& file_vec) -> void {
|
auto search_in_index = [&](meta::GroupFilesSchema& file_vec) -> void {
|
||||||
for (auto &file : file_vec) {
|
for (auto &file : file_vec) {
|
||||||
FaissExecutionEngineBase index(file.dimension, file.location);
|
FaissExecutionEngine index(file.dimension, file.location);
|
||||||
index.Load();
|
index.Load();
|
||||||
auto file_size = index.PhysicalSize()/(1024*1024);
|
auto file_size = index.PhysicalSize()/(1024*1024);
|
||||||
search_set_size += file_size;
|
search_set_size += file_size;
|
||||||
|
@ -213,7 +213,7 @@ Status DBImpl::merge_files(const std::string& group_id, const meta::DateT& date,
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
FaissExecutionEngineBase index(group_file.dimension, group_file.location);
|
FaissExecutionEngine index(group_file.dimension, group_file.location);
|
||||||
|
|
||||||
meta::GroupFilesSchema updated;
|
meta::GroupFilesSchema updated;
|
||||||
long index_size = 0;
|
long index_size = 0;
|
||||||
|
@ -286,7 +286,7 @@ Status DBImpl::build_index(const meta::GroupFileSchema& file) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
FaissExecutionEngineBase to_index(file.dimension, file.location);
|
FaissExecutionEngine to_index(file.dimension, file.location);
|
||||||
|
|
||||||
to_index.Load();
|
to_index.Load();
|
||||||
auto index = to_index.BuildIndex(group_file.location);
|
auto index = to_index.BuildIndex(group_file.location);
|
||||||
|
|
|
@ -5,7 +5,8 @@ namespace zilliz {
|
||||||
namespace vecwise {
|
namespace vecwise {
|
||||||
namespace engine {
|
namespace engine {
|
||||||
|
|
||||||
Status ExecutionEngine::AddWithIds(const std::vector<float>& vectors, const std::vector<long>& vector_ids) {
|
template<typename Derived>
|
||||||
|
Status ExecutionEngine<Derived>::AddWithIds(const std::vector<float>& vectors, const std::vector<long>& vector_ids) {
|
||||||
long n1 = (long)vectors.size();
|
long n1 = (long)vectors.size();
|
||||||
long n2 = (long)vector_ids.size();
|
long n2 = (long)vector_ids.size();
|
||||||
if (n1 != n2) {
|
if (n1 != n2) {
|
||||||
|
@ -16,53 +17,42 @@ Status ExecutionEngine::AddWithIds(const std::vector<float>& vectors, const std:
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
Status ExecutionEngineBase<Derived>::AddWithIds(const std::vector<float>& vectors, const std::vector<long>& vector_ids) {
|
Status ExecutionEngine<Derived>::AddWithIds(long n, const float *xdata, const long *xids) {
|
||||||
long n1 = (long)vectors.size();
|
|
||||||
long n2 = (long)vector_ids.size();
|
|
||||||
if (n1 != n2) {
|
|
||||||
LOG(ERROR) << "vectors size is not equal to the size of vector_ids: " << n1 << "!=" << n2;
|
|
||||||
return Status::Error("Error: AddWithIds");
|
|
||||||
}
|
|
||||||
return AddWithIds(n1, vectors.data(), vector_ids.data());
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename Derived>
|
|
||||||
Status ExecutionEngineBase<Derived>::AddWithIds(long n, const float *xdata, const long *xids) {
|
|
||||||
return static_cast<Derived*>(this)->AddWithIds(n, xdata, xids);
|
return static_cast<Derived*>(this)->AddWithIds(n, xdata, xids);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
size_t ExecutionEngineBase<Derived>::Count() const {
|
size_t ExecutionEngine<Derived>::Count() const {
|
||||||
return static_cast<Derived*>(this)->Count();
|
return static_cast<Derived*>(this)->Count();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
size_t ExecutionEngineBase<Derived>::Size() const {
|
size_t ExecutionEngine<Derived>::Size() const {
|
||||||
return static_cast<Derived*>(this)->Size();
|
return static_cast<Derived*>(this)->Size();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
size_t ExecutionEngineBase<Derived>::PhysicalSize() const {
|
size_t ExecutionEngine<Derived>::PhysicalSize() const {
|
||||||
return static_cast<Derived*>(this)->PhysicalSize();
|
return static_cast<Derived*>(this)->PhysicalSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
Status ExecutionEngineBase<Derived>::Serialize() {
|
Status ExecutionEngine<Derived>::Serialize() {
|
||||||
return static_cast<Derived*>(this)->Serialize();
|
return static_cast<Derived*>(this)->Serialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
Status ExecutionEngineBase<Derived>::Load() {
|
Status ExecutionEngine<Derived>::Load() {
|
||||||
return static_cast<Derived*>(this)->Load();
|
return static_cast<Derived*>(this)->Load();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
Status ExecutionEngineBase<Derived>::Merge(const std::string& location) {
|
Status ExecutionEngine<Derived>::Merge(const std::string& location) {
|
||||||
return static_cast<Derived*>(this)->Merge(location);
|
return static_cast<Derived*>(this)->Merge(location);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
Status ExecutionEngineBase<Derived>::Search(long n,
|
Status ExecutionEngine<Derived>::Search(long n,
|
||||||
const float *data,
|
const float *data,
|
||||||
long k,
|
long k,
|
||||||
float *distances,
|
float *distances,
|
||||||
|
@ -71,12 +61,12 @@ Status ExecutionEngineBase<Derived>::Search(long n,
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
Status ExecutionEngineBase<Derived>::Cache() {
|
Status ExecutionEngine<Derived>::Cache() {
|
||||||
return static_cast<Derived*>(this)->Cache();
|
return static_cast<Derived*>(this)->Cache();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
std::shared_ptr<Derived> ExecutionEngineBase<Derived>::BuildIndex(const std::string& location) {
|
std::shared_ptr<Derived> ExecutionEngine<Derived>::BuildIndex(const std::string& location) {
|
||||||
return static_cast<Derived*>(this)->BuildIndex(location);
|
return static_cast<Derived*>(this)->BuildIndex(location);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,43 +9,8 @@ namespace zilliz {
|
||||||
namespace vecwise {
|
namespace vecwise {
|
||||||
namespace engine {
|
namespace engine {
|
||||||
|
|
||||||
class ExecutionEngine;
|
|
||||||
|
|
||||||
class ExecutionEngine {
|
|
||||||
public:
|
|
||||||
|
|
||||||
Status AddWithIds(const std::vector<float>& vectors,
|
|
||||||
const std::vector<long>& vector_ids);
|
|
||||||
|
|
||||||
virtual Status AddWithIds(long n, const float *xdata, const long *xids) = 0;
|
|
||||||
|
|
||||||
virtual size_t Count() const = 0;
|
|
||||||
|
|
||||||
virtual size_t Size() const = 0;
|
|
||||||
|
|
||||||
virtual size_t PhysicalSize() const = 0;
|
|
||||||
|
|
||||||
virtual Status Serialize() = 0;
|
|
||||||
|
|
||||||
virtual Status Load() = 0;
|
|
||||||
|
|
||||||
virtual Status Merge(const std::string& location) = 0;
|
|
||||||
|
|
||||||
virtual Status Search(long n,
|
|
||||||
const float *data,
|
|
||||||
long k,
|
|
||||||
float *distances,
|
|
||||||
long *labels) const = 0;
|
|
||||||
|
|
||||||
virtual std::shared_ptr<ExecutionEngine> BuildIndex(const std::string&) = 0;
|
|
||||||
|
|
||||||
virtual Status Cache() = 0;
|
|
||||||
|
|
||||||
virtual ~ExecutionEngine() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Derived>
|
template <typename Derived>
|
||||||
class ExecutionEngineBase {
|
class ExecutionEngine {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
Status AddWithIds(const std::vector<float>& vectors,
|
Status AddWithIds(const std::vector<float>& vectors,
|
||||||
|
|
|
@ -16,6 +16,7 @@ namespace engine {
|
||||||
const std::string RawIndexType = "IDMap,Flat";
|
const std::string RawIndexType = "IDMap,Flat";
|
||||||
const std::string BuildIndexType = "IDMap,Flat";
|
const std::string BuildIndexType = "IDMap,Flat";
|
||||||
|
|
||||||
|
|
||||||
FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension, const std::string& location)
|
FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension, const std::string& location)
|
||||||
: pIndex_(faiss::index_factory(dimension, RawIndexType.c_str())),
|
: pIndex_(faiss::index_factory(dimension, RawIndexType.c_str())),
|
||||||
location_(location) {
|
location_(location) {
|
||||||
|
@ -74,7 +75,7 @@ Status FaissExecutionEngine::Merge(const std::string& location) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<ExecutionEngine> FaissExecutionEngine::BuildIndex(const std::string& location) {
|
std::shared_ptr<FaissExecutionEngine> FaissExecutionEngine::BuildIndex(const std::string& location) {
|
||||||
auto opd = std::make_shared<Operand>();
|
auto opd = std::make_shared<Operand>();
|
||||||
opd->d = pIndex_->d;
|
opd->d = pIndex_->d;
|
||||||
opd->index_type = BuildIndexType;
|
opd->index_type = BuildIndexType;
|
||||||
|
@ -86,7 +87,7 @@ std::shared_ptr<ExecutionEngine> FaissExecutionEngine::BuildIndex(const std::str
|
||||||
dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
|
dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
|
||||||
from_index->id_map.data());
|
from_index->id_map.data());
|
||||||
|
|
||||||
std::shared_ptr<ExecutionEngine> new_ee(new FaissExecutionEngine(index->data(), location));
|
std::shared_ptr<FaissExecutionEngine> new_ee(new FaissExecutionEngine(index->data(), location));
|
||||||
new_ee->Serialize();
|
new_ee->Serialize();
|
||||||
return new_ee;
|
return new_ee;
|
||||||
}
|
}
|
||||||
|
@ -109,99 +110,6 @@ Status FaissExecutionEngine::Cache() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
FaissExecutionEngineBase::FaissExecutionEngineBase(uint16_t dimension, const std::string& location)
|
|
||||||
: pIndex_(faiss::index_factory(dimension, RawIndexType.c_str())),
|
|
||||||
location_(location) {
|
|
||||||
}
|
|
||||||
|
|
||||||
FaissExecutionEngineBase::FaissExecutionEngineBase(std::shared_ptr<faiss::Index> index, const std::string& location)
|
|
||||||
: pIndex_(index),
|
|
||||||
location_(location) {
|
|
||||||
}
|
|
||||||
|
|
||||||
Status FaissExecutionEngineBase::AddWithIds(long n, const float *xdata, const long *xids) {
|
|
||||||
pIndex_->add_with_ids(n, xdata, xids);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t FaissExecutionEngineBase::Count() const {
|
|
||||||
return (size_t)(pIndex_->ntotal);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t FaissExecutionEngineBase::Size() const {
|
|
||||||
return (size_t)(Count() * pIndex_->d);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t FaissExecutionEngineBase::PhysicalSize() const {
|
|
||||||
return (size_t)(Size()*sizeof(float));
|
|
||||||
}
|
|
||||||
|
|
||||||
Status FaissExecutionEngineBase::Serialize() {
|
|
||||||
write_index(pIndex_.get(), location_.c_str());
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status FaissExecutionEngineBase::Load() {
|
|
||||||
auto index = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
|
|
||||||
if (!index) {
|
|
||||||
index = read_index(location_);
|
|
||||||
Cache();
|
|
||||||
LOG(DEBUG) << "Disk io from: " << location_;
|
|
||||||
}
|
|
||||||
|
|
||||||
pIndex_ = index->data();
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status FaissExecutionEngineBase::Merge(const std::string& location) {
|
|
||||||
if (location == location_) {
|
|
||||||
return Status::Error("Cannot Merge Self");
|
|
||||||
}
|
|
||||||
auto to_merge = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
|
|
||||||
if (!to_merge) {
|
|
||||||
to_merge = read_index(location);
|
|
||||||
}
|
|
||||||
auto file_index = dynamic_cast<faiss::IndexIDMap*>(to_merge->data().get());
|
|
||||||
pIndex_->add_with_ids(file_index->ntotal, dynamic_cast<faiss::IndexFlat*>(file_index->index)->xb.data(),
|
|
||||||
file_index->id_map.data());
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<FaissExecutionEngineBase> FaissExecutionEngineBase::BuildIndex(const std::string& location) {
|
|
||||||
auto opd = std::make_shared<Operand>();
|
|
||||||
opd->d = pIndex_->d;
|
|
||||||
opd->index_type = BuildIndexType;
|
|
||||||
IndexBuilderPtr pBuilder = GetIndexBuilder(opd);
|
|
||||||
|
|
||||||
auto from_index = dynamic_cast<faiss::IndexIDMap*>(pIndex_.get());
|
|
||||||
|
|
||||||
auto index = pBuilder->build_all(from_index->ntotal,
|
|
||||||
dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
|
|
||||||
from_index->id_map.data());
|
|
||||||
|
|
||||||
std::shared_ptr<FaissExecutionEngineBase> new_ee(new FaissExecutionEngineBase(index->data(), location));
|
|
||||||
new_ee->Serialize();
|
|
||||||
return new_ee;
|
|
||||||
}
|
|
||||||
|
|
||||||
Status FaissExecutionEngineBase::Search(long n,
|
|
||||||
const float *data,
|
|
||||||
long k,
|
|
||||||
float *distances,
|
|
||||||
long *labels) const {
|
|
||||||
|
|
||||||
pIndex_->search(n, data, k, distances, labels);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status FaissExecutionEngineBase::Cache() {
|
|
||||||
zilliz::vecwise::cache::CpuCacheMgr::GetInstance(
|
|
||||||
)->InsertItem(location_, std::make_shared<Index>(pIndex_));
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace engine
|
} // namespace engine
|
||||||
} // namespace vecwise
|
} // namespace vecwise
|
||||||
} // namespace zilliz
|
} // namespace zilliz
|
||||||
|
|
|
@ -13,44 +13,12 @@ namespace zilliz {
|
||||||
namespace vecwise {
|
namespace vecwise {
|
||||||
namespace engine {
|
namespace engine {
|
||||||
|
|
||||||
class FaissExecutionEngine : public ExecutionEngine {
|
|
||||||
|
class FaissExecutionEngine : public ExecutionEngine<FaissExecutionEngine> {
|
||||||
public:
|
public:
|
||||||
FaissExecutionEngine(uint16_t dimension, const std::string& location);
|
FaissExecutionEngine(uint16_t dimension, const std::string& location);
|
||||||
FaissExecutionEngine(std::shared_ptr<faiss::Index> index, const std::string& location);
|
FaissExecutionEngine(std::shared_ptr<faiss::Index> index, const std::string& location);
|
||||||
|
|
||||||
virtual Status AddWithIds(long n, const float *xdata, const long *xids) override;
|
|
||||||
|
|
||||||
virtual size_t Count() const override;
|
|
||||||
|
|
||||||
virtual size_t Size() const override;
|
|
||||||
|
|
||||||
virtual size_t PhysicalSize() const override;
|
|
||||||
|
|
||||||
virtual Status Merge(const std::string& location) override;
|
|
||||||
|
|
||||||
virtual Status Serialize() override;
|
|
||||||
virtual Status Load() override;
|
|
||||||
|
|
||||||
virtual Status Cache() override;
|
|
||||||
|
|
||||||
virtual Status Search(long n,
|
|
||||||
const float *data,
|
|
||||||
long k,
|
|
||||||
float *distances,
|
|
||||||
long *labels) const override;
|
|
||||||
|
|
||||||
virtual std::shared_ptr<ExecutionEngine> BuildIndex(const std::string&) override;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
std::shared_ptr<faiss::Index> pIndex_;
|
|
||||||
std::string location_;
|
|
||||||
};
|
|
||||||
|
|
||||||
class FaissExecutionEngineBase : public ExecutionEngineBase<FaissExecutionEngineBase> {
|
|
||||||
public:
|
|
||||||
FaissExecutionEngineBase(uint16_t dimension, const std::string& location);
|
|
||||||
FaissExecutionEngineBase(std::shared_ptr<faiss::Index> index, const std::string& location);
|
|
||||||
|
|
||||||
Status AddWithIds(const std::vector<float>& vectors,
|
Status AddWithIds(const std::vector<float>& vectors,
|
||||||
const std::vector<long>& vector_ids);
|
const std::vector<long>& vector_ids);
|
||||||
|
|
||||||
|
@ -74,7 +42,7 @@ public:
|
||||||
float *distances,
|
float *distances,
|
||||||
long *labels) const;
|
long *labels) const;
|
||||||
|
|
||||||
std::shared_ptr<FaissExecutionEngineBase> BuildIndex(const std::string&);
|
std::shared_ptr<FaissExecutionEngine> BuildIndex(const std::string&);
|
||||||
|
|
||||||
Status Cache();
|
Status Cache();
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -18,7 +18,7 @@ MemVectors::MemVectors(const std::shared_ptr<meta::Meta>& meta_ptr,
|
||||||
options_(options),
|
options_(options),
|
||||||
schema_(schema),
|
schema_(schema),
|
||||||
_pIdGenerator(new SimpleIDGenerator()),
|
_pIdGenerator(new SimpleIDGenerator()),
|
||||||
pEE_(new FaissExecutionEngineBase(schema_.dimension, schema_.location)) {
|
pEE_(new FaissExecutionEngine(schema_.dimension, schema_.location)) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemVectors::add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) {
|
void MemVectors::add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) {
|
||||||
|
|
|
@ -19,7 +19,7 @@ namespace meta {
|
||||||
class Meta;
|
class Meta;
|
||||||
}
|
}
|
||||||
|
|
||||||
class FaissExecutionEngineBase;
|
class FaissExecutionEngine;
|
||||||
|
|
||||||
class MemVectors {
|
class MemVectors {
|
||||||
public:
|
public:
|
||||||
|
@ -47,7 +47,7 @@ private:
|
||||||
Options options_;
|
Options options_;
|
||||||
meta::GroupFileSchema schema_;
|
meta::GroupFileSchema schema_;
|
||||||
IDGenerator* _pIdGenerator;
|
IDGenerator* _pIdGenerator;
|
||||||
std::shared_ptr<FaissExecutionEngineBase> pEE_;
|
std::shared_ptr<FaissExecutionEngine> pEE_;
|
||||||
|
|
||||||
}; // MemVectors
|
}; // MemVectors
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue