mirror of https://github.com/milvus-io/milvus.git
Merge branch 'mem-0.3.1' into 'branch-0.3.1'
MS-180: Add new mem manager See merge request megasearch/milvus!168 Former-commit-id: bf84d3e42c2f6c0c9ffbab6ac2a6a8982167cd4apull/191/head
commit
f940d1af30
|
@ -18,6 +18,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
|||
- MS-152 - Delete assert in MySQLMetaImpl and change MySQLConnectionPool impl
|
||||
|
||||
## New Feature
|
||||
- MS-180 - Add new mem manager
|
||||
|
||||
## Task
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ server_config:
|
|||
address: 0.0.0.0
|
||||
port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534
|
||||
gpu_index: 0 # the gpu milvus use, default: 0, range: 0 ~ gpu number - 1
|
||||
mode: single # milvus deployment type: single, cluster
|
||||
mode: single # milvus deployment type: single, cluster, read_only
|
||||
|
||||
db_config:
|
||||
db_path: @MILVUS_DB_PATH@ # milvus data storage path
|
||||
|
@ -15,6 +15,8 @@ db_config:
|
|||
index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB
|
||||
archive_disk_threshold: 512 # triger archive action if storage size exceed this value, unit: GB
|
||||
archive_days_threshold: 30 # files older than x days will be archived, unit: day
|
||||
maximum_memory: 4 # maximum memory allowed, default: 4, unit: GB, should be at least 1 GB.
|
||||
# the sum of maximum_memory and cpu_cache_capacity should be less than total memory
|
||||
|
||||
metric_config:
|
||||
is_startup: off # if monitoring start: on, off
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
/*******************************************************************************
|
||||
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
* Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
* Proprietary and confidential.
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
const size_t K = 1024UL;
|
||||
const size_t M = K * K;
|
||||
const size_t G = K * M;
|
||||
const size_t T = K * G;
|
||||
|
||||
const size_t MAX_TABLE_FILE_MEM = 128 * M;
|
||||
|
||||
const int VECTOR_TYPE_SIZE = sizeof(float);
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -87,8 +87,7 @@ DBImpl::DBImpl(const Options& options)
|
|||
compact_thread_pool_(1, 1),
|
||||
index_thread_pool_(1, 1) {
|
||||
meta_ptr_ = DBMetaImplFactory::Build(options.meta, options.mode);
|
||||
mem_mgr_ = std::make_shared<MemManager>(meta_ptr_, options_);
|
||||
// mem_mgr_ = (MemManagerPtr)(new MemManager(meta_ptr_, options_));
|
||||
mem_mgr_ = MemManagerFactory::Build(meta_ptr_, options_);
|
||||
if (options.mode != Options::MODE::READ_ONLY) {
|
||||
StartTimerTasks();
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "MemManager.h"
|
||||
#include "Types.h"
|
||||
#include "utils/ThreadPool.h"
|
||||
#include "MemManagerAbstract.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
@ -33,7 +34,6 @@ class Meta;
|
|||
class DBImpl : public DB {
|
||||
public:
|
||||
using MetaPtr = meta::Meta::Ptr;
|
||||
using MemManagerPtr = typename MemManager::Ptr;
|
||||
|
||||
explicit DBImpl(const Options &options);
|
||||
|
||||
|
@ -123,7 +123,7 @@ class DBImpl : public DB {
|
|||
std::thread bg_timer_thread_;
|
||||
|
||||
MetaPtr meta_ptr_;
|
||||
MemManagerPtr mem_mgr_;
|
||||
MemManagerAbstractPtr mem_mgr_;
|
||||
|
||||
server::ThreadPool compact_thread_pool_;
|
||||
std::list<std::future<void>> compact_thread_results_;
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include <stdlib.h>
|
||||
#include "Factories.h"
|
||||
#include "DBImpl.h"
|
||||
#include "MemManager.h"
|
||||
#include "NewMemManager.h"
|
||||
|
||||
#include <time.h>
|
||||
#include <sstream>
|
||||
|
@ -20,6 +22,8 @@ namespace zilliz {
|
|||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
#define USE_NEW_MEM_MANAGER 1
|
||||
|
||||
DBMetaOptions DBMetaOptionsFactory::Build(const std::string& path) {
|
||||
auto p = path;
|
||||
if(p == "") {
|
||||
|
@ -72,17 +76,14 @@ std::shared_ptr<meta::Meta> DBMetaImplFactory::Build(const DBMetaOptions& metaOp
|
|||
if (dialect.find("mysql") != std::string::npos) {
|
||||
ENGINE_LOG_INFO << "Using MySQL";
|
||||
return std::make_shared<meta::MySQLMetaImpl>(meta::MySQLMetaImpl(metaOptions, mode));
|
||||
}
|
||||
else if (dialect.find("sqlite") != std::string::npos) {
|
||||
} else if (dialect.find("sqlite") != std::string::npos) {
|
||||
ENGINE_LOG_INFO << "Using SQLite";
|
||||
return std::make_shared<meta::DBMetaImpl>(meta::DBMetaImpl(metaOptions));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
ENGINE_LOG_ERROR << "Invalid dialect in URI: dialect = " << dialect;
|
||||
throw InvalidArgumentException("URI dialect is not mysql / sqlite");
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
ENGINE_LOG_ERROR << "Wrong URI format: URI = " << uri;
|
||||
throw InvalidArgumentException("Wrong URI format ");
|
||||
}
|
||||
|
@ -98,6 +99,15 @@ DB* DBFactory::Build(const Options& options) {
|
|||
return new DBImpl(options);
|
||||
}
|
||||
|
||||
MemManagerAbstractPtr MemManagerFactory::Build(const std::shared_ptr<meta::Meta>& meta,
|
||||
const Options& options) {
|
||||
#ifdef USE_NEW_MEM_MANAGER
|
||||
return std::make_shared<NewMemManager>(meta, options);
|
||||
#else
|
||||
return std::make_shared<MemManager>(meta, options);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
|
|
@ -10,16 +10,18 @@
|
|||
#include "MySQLMetaImpl.h"
|
||||
#include "Options.h"
|
||||
#include "ExecutionEngine.h"
|
||||
#include "MemManagerAbstract.h"
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
struct DBMetaOptionsFactory {
|
||||
static DBMetaOptions Build(const std::string& path = "");
|
||||
static DBMetaOptions Build(const std::string &path = "");
|
||||
};
|
||||
|
||||
struct OptionsFactory {
|
||||
|
@ -28,12 +30,16 @@ struct OptionsFactory {
|
|||
|
||||
struct DBMetaImplFactory {
|
||||
static std::shared_ptr<meta::DBMetaImpl> Build();
|
||||
static std::shared_ptr<meta::Meta> Build(const DBMetaOptions& metaOptions, const int& mode);
|
||||
static std::shared_ptr<meta::Meta> Build(const DBMetaOptions &metaOptions, const int &mode);
|
||||
};
|
||||
|
||||
struct DBFactory {
|
||||
static std::shared_ptr<DB> Build();
|
||||
static DB* Build(const Options&);
|
||||
static DB *Build(const Options &);
|
||||
};
|
||||
|
||||
struct MemManagerFactory {
|
||||
static MemManagerAbstractPtr Build(const std::shared_ptr<meta::Meta> &meta, const Options &options);
|
||||
};
|
||||
|
||||
} // namespace engine
|
||||
|
|
|
@ -8,28 +8,30 @@
|
|||
#include "MetaConsts.h"
|
||||
#include "EngineFactory.h"
|
||||
#include "metrics/Metrics.h"
|
||||
#include "Log.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <easylogging++.h>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
MemVectors::MemVectors(const std::shared_ptr<meta::Meta>& meta_ptr,
|
||||
const meta::TableFileSchema& schema, const Options& options)
|
||||
: meta_(meta_ptr),
|
||||
options_(options),
|
||||
schema_(schema),
|
||||
id_generator_(new SimpleIDGenerator()),
|
||||
active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType)schema_.engine_type_)) {
|
||||
MemVectors::MemVectors(const std::shared_ptr<meta::Meta> &meta_ptr,
|
||||
const meta::TableFileSchema &schema, const Options &options)
|
||||
: meta_(meta_ptr),
|
||||
options_(options),
|
||||
schema_(schema),
|
||||
id_generator_(new SimpleIDGenerator()),
|
||||
active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType) schema_.engine_type_)) {
|
||||
}
|
||||
|
||||
|
||||
Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) {
|
||||
if(active_engine_ == nullptr) {
|
||||
Status MemVectors::Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_) {
|
||||
if (active_engine_ == nullptr) {
|
||||
return Status::Error("index engine is null");
|
||||
}
|
||||
|
||||
|
@ -38,13 +40,15 @@ Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_)
|
|||
Status status = active_engine_->AddWithIds(n_, vectors_, vector_ids_.data());
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_), static_cast<int>(schema_.dimension_), total_time);
|
||||
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_),
|
||||
static_cast<int>(schema_.dimension_),
|
||||
total_time);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
size_t MemVectors::RowCount() const {
|
||||
if(active_engine_ == nullptr) {
|
||||
if (active_engine_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -52,15 +56,15 @@ size_t MemVectors::RowCount() const {
|
|||
}
|
||||
|
||||
size_t MemVectors::Size() const {
|
||||
if(active_engine_ == nullptr) {
|
||||
if (active_engine_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return active_engine_->Size();
|
||||
}
|
||||
|
||||
Status MemVectors::Serialize(std::string& table_id) {
|
||||
if(active_engine_ == nullptr) {
|
||||
Status MemVectors::Serialize(std::string &table_id) {
|
||||
if (active_engine_ == nullptr) {
|
||||
return Status::Error("index engine is null");
|
||||
}
|
||||
|
||||
|
@ -72,15 +76,16 @@ Status MemVectors::Serialize(std::string& table_id) {
|
|||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
schema_.size_ = size;
|
||||
|
||||
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size/total_time);
|
||||
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size / total_time);
|
||||
|
||||
schema_.file_type_ = (size >= options_.index_trigger_size) ?
|
||||
meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW;
|
||||
meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW;
|
||||
|
||||
auto status = meta_->UpdateTableFile(schema_);
|
||||
|
||||
LOG(DEBUG) << "New " << ((schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index")
|
||||
<< " file " << schema_.file_id_ << " of size " << (double)(active_engine_->Size()) / (double)meta::M << " M";
|
||||
<< " file " << schema_.file_id_ << " of size " << (double) (active_engine_->Size()) / (double) meta::M
|
||||
<< " M";
|
||||
|
||||
active_engine_->Cache();
|
||||
|
||||
|
@ -98,7 +103,7 @@ MemVectors::~MemVectors() {
|
|||
* MemManager
|
||||
*/
|
||||
MemManager::MemVectorsPtr MemManager::GetMemByTable(
|
||||
const std::string& table_id) {
|
||||
const std::string &table_id) {
|
||||
auto memIt = mem_id_map_.find(table_id);
|
||||
if (memIt != mem_id_map_.end()) {
|
||||
return memIt->second;
|
||||
|
@ -115,26 +120,31 @@ MemManager::MemVectorsPtr MemManager::GetMemByTable(
|
|||
return mem_id_map_[table_id];
|
||||
}
|
||||
|
||||
Status MemManager::InsertVectors(const std::string& table_id_,
|
||||
size_t n_,
|
||||
const float* vectors_,
|
||||
IDNumbers& vector_ids_) {
|
||||
Status MemManager::InsertVectors(const std::string &table_id_,
|
||||
size_t n_,
|
||||
const float *vectors_,
|
||||
IDNumbers &vector_ids_) {
|
||||
|
||||
LOG(DEBUG) << "MemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() <<
|
||||
", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem();
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
|
||||
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
|
||||
}
|
||||
|
||||
Status MemManager::InsertVectorsNoLock(const std::string& table_id,
|
||||
size_t n,
|
||||
const float* vectors,
|
||||
IDNumbers& vector_ids) {
|
||||
Status MemManager::InsertVectorsNoLock(const std::string &table_id,
|
||||
size_t n,
|
||||
const float *vectors,
|
||||
IDNumbers &vector_ids) {
|
||||
|
||||
MemVectorsPtr mem = GetMemByTable(table_id);
|
||||
if (mem == nullptr) {
|
||||
return Status::NotFound("Group " + table_id + " not found!");
|
||||
}
|
||||
|
||||
//makesure each file size less than index_trigger_size
|
||||
if(mem->Size() > options_.index_trigger_size) {
|
||||
if (mem->Size() > options_.index_trigger_size) {
|
||||
std::unique_lock<std::mutex> lock(serialization_mtx_);
|
||||
immu_mem_list_.push_back(mem);
|
||||
mem_id_map_.erase(table_id);
|
||||
|
@ -147,8 +157,8 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id,
|
|||
Status MemManager::ToImmutable() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
MemIdMap temp_map;
|
||||
for (auto& kv: mem_id_map_) {
|
||||
if(kv.second->RowCount() == 0) {
|
||||
for (auto &kv: mem_id_map_) {
|
||||
if (kv.second->RowCount() == 0) {
|
||||
temp_map.insert(kv);
|
||||
continue;//empty vector, no need to serialize
|
||||
}
|
||||
|
@ -159,12 +169,12 @@ Status MemManager::ToImmutable() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status MemManager::Serialize(std::set<std::string>& table_ids) {
|
||||
Status MemManager::Serialize(std::set<std::string> &table_ids) {
|
||||
ToImmutable();
|
||||
std::unique_lock<std::mutex> lock(serialization_mtx_);
|
||||
std::string table_id;
|
||||
table_ids.clear();
|
||||
for (auto& mem : immu_mem_list_) {
|
||||
for (auto &mem : immu_mem_list_) {
|
||||
mem->Serialize(table_id);
|
||||
table_ids.insert(table_id);
|
||||
}
|
||||
|
@ -172,7 +182,7 @@ Status MemManager::Serialize(std::set<std::string>& table_ids) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status MemManager::EraseMemVector(const std::string& table_id) {
|
||||
Status MemManager::EraseMemVector(const std::string &table_id) {
|
||||
{//erase MemVector from rapid-insert cache
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
mem_id_map_.erase(table_id);
|
||||
|
@ -181,8 +191,8 @@ Status MemManager::EraseMemVector(const std::string& table_id) {
|
|||
{//erase MemVector from serialize cache
|
||||
std::unique_lock<std::mutex> lock(serialization_mtx_);
|
||||
MemList temp_list;
|
||||
for (auto& mem : immu_mem_list_) {
|
||||
if(mem->TableId() != table_id) {
|
||||
for (auto &mem : immu_mem_list_) {
|
||||
if (mem->TableId() != table_id) {
|
||||
temp_list.push_back(mem);
|
||||
}
|
||||
}
|
||||
|
@ -192,6 +202,26 @@ Status MemManager::EraseMemVector(const std::string& table_id) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t MemManager::GetCurrentMutableMem() {
|
||||
size_t totalMem = 0;
|
||||
for (auto &kv : mem_id_map_) {
|
||||
auto memVector = kv.second;
|
||||
totalMem += memVector->Size();
|
||||
}
|
||||
return totalMem;
|
||||
}
|
||||
|
||||
size_t MemManager::GetCurrentImmutableMem() {
|
||||
size_t totalMem = 0;
|
||||
for (auto &memVector : immu_mem_list_) {
|
||||
totalMem += memVector->Size();
|
||||
}
|
||||
return totalMem;
|
||||
}
|
||||
|
||||
size_t MemManager::GetCurrentMem() {
|
||||
return GetCurrentMutableMem() + GetCurrentImmutableMem();
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
|
|
|
@ -9,80 +9,87 @@
|
|||
#include "IDGenerator.h"
|
||||
#include "Status.h"
|
||||
#include "Meta.h"
|
||||
#include "MemManagerAbstract.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <ctime>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
namespace meta {
|
||||
class Meta;
|
||||
class Meta;
|
||||
}
|
||||
|
||||
class MemVectors {
|
||||
public:
|
||||
public:
|
||||
using MetaPtr = meta::Meta::Ptr;
|
||||
using Ptr = std::shared_ptr<MemVectors>;
|
||||
|
||||
explicit MemVectors(const std::shared_ptr<meta::Meta>&,
|
||||
const meta::TableFileSchema&, const Options&);
|
||||
explicit MemVectors(const std::shared_ptr<meta::Meta> &,
|
||||
const meta::TableFileSchema &, const Options &);
|
||||
|
||||
Status Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_);
|
||||
Status Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_);
|
||||
|
||||
size_t RowCount() const;
|
||||
|
||||
size_t Size() const;
|
||||
|
||||
Status Serialize(std::string& table_id);
|
||||
Status Serialize(std::string &table_id);
|
||||
|
||||
~MemVectors();
|
||||
|
||||
const std::string& Location() const { return schema_.location_; }
|
||||
const std::string &Location() const { return schema_.location_; }
|
||||
|
||||
std::string TableId() const { return schema_.table_id_; }
|
||||
|
||||
private:
|
||||
private:
|
||||
MemVectors() = delete;
|
||||
MemVectors(const MemVectors&) = delete;
|
||||
MemVectors& operator=(const MemVectors&) = delete;
|
||||
MemVectors(const MemVectors &) = delete;
|
||||
MemVectors &operator=(const MemVectors &) = delete;
|
||||
|
||||
MetaPtr meta_;
|
||||
Options options_;
|
||||
meta::TableFileSchema schema_;
|
||||
IDGenerator* id_generator_;
|
||||
IDGenerator *id_generator_;
|
||||
ExecutionEnginePtr active_engine_;
|
||||
|
||||
}; // MemVectors
|
||||
|
||||
|
||||
|
||||
class MemManager {
|
||||
public:
|
||||
class MemManager : public MemManagerAbstract {
|
||||
public:
|
||||
using MetaPtr = meta::Meta::Ptr;
|
||||
using MemVectorsPtr = typename MemVectors::Ptr;
|
||||
using Ptr = std::shared_ptr<MemManager>;
|
||||
|
||||
MemManager(const std::shared_ptr<meta::Meta>& meta, const Options& options)
|
||||
MemManager(const std::shared_ptr<meta::Meta> &meta, const Options &options)
|
||||
: meta_(meta), options_(options) {}
|
||||
|
||||
MemVectorsPtr GetMemByTable(const std::string& table_id);
|
||||
Status InsertVectors(const std::string &table_id,
|
||||
size_t n, const float *vectors, IDNumbers &vector_ids) override;
|
||||
|
||||
Status InsertVectors(const std::string& table_id,
|
||||
size_t n, const float* vectors, IDNumbers& vector_ids);
|
||||
Status Serialize(std::set<std::string> &table_ids) override;
|
||||
|
||||
Status Serialize(std::set<std::string>& table_ids);
|
||||
Status EraseMemVector(const std::string &table_id) override;
|
||||
|
||||
Status EraseMemVector(const std::string& table_id);
|
||||
size_t GetCurrentMutableMem() override;
|
||||
|
||||
private:
|
||||
Status InsertVectorsNoLock(const std::string& table_id,
|
||||
size_t n, const float* vectors, IDNumbers& vector_ids);
|
||||
size_t GetCurrentImmutableMem() override;
|
||||
|
||||
size_t GetCurrentMem() override;
|
||||
|
||||
private:
|
||||
MemVectorsPtr GetMemByTable(const std::string &table_id);
|
||||
|
||||
Status InsertVectorsNoLock(const std::string &table_id,
|
||||
size_t n, const float *vectors, IDNumbers &vector_ids);
|
||||
Status ToImmutable();
|
||||
|
||||
using MemIdMap = std::map<std::string, MemVectorsPtr>;
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
#pragma once
|
||||
|
||||
#include <set>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
class MemManagerAbstract {
|
||||
public:
|
||||
|
||||
virtual Status InsertVectors(const std::string &table_id,
|
||||
size_t n, const float *vectors, IDNumbers &vector_ids) = 0;
|
||||
|
||||
virtual Status Serialize(std::set<std::string> &table_ids) = 0;
|
||||
|
||||
virtual Status EraseMemVector(const std::string &table_id) = 0;
|
||||
|
||||
virtual size_t GetCurrentMutableMem() = 0;
|
||||
|
||||
virtual size_t GetCurrentImmutableMem() = 0;
|
||||
|
||||
virtual size_t GetCurrentMem() = 0;
|
||||
|
||||
}; // MemManagerAbstract
|
||||
|
||||
using MemManagerAbstractPtr = std::shared_ptr<MemManagerAbstract>;
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -0,0 +1,88 @@
|
|||
#include "MemTable.h"
|
||||
#include "Log.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
MemTable::MemTable(const std::string &table_id,
|
||||
const std::shared_ptr<meta::Meta> &meta,
|
||||
const Options &options) :
|
||||
table_id_(table_id),
|
||||
meta_(meta),
|
||||
options_(options) {
|
||||
|
||||
}
|
||||
|
||||
Status MemTable::Add(VectorSource::Ptr &source) {
|
||||
|
||||
while (!source->AllAdded()) {
|
||||
|
||||
MemTableFile::Ptr current_mem_table_file;
|
||||
if (!mem_table_file_list_.empty()) {
|
||||
current_mem_table_file = mem_table_file_list_.back();
|
||||
}
|
||||
|
||||
Status status;
|
||||
if (mem_table_file_list_.empty() || current_mem_table_file->IsFull()) {
|
||||
MemTableFile::Ptr new_mem_table_file = std::make_shared<MemTableFile>(table_id_, meta_, options_);
|
||||
status = new_mem_table_file->Add(source);
|
||||
if (status.ok()) {
|
||||
mem_table_file_list_.emplace_back(new_mem_table_file);
|
||||
}
|
||||
} else {
|
||||
status = current_mem_table_file->Add(source);
|
||||
}
|
||||
|
||||
if (!status.ok()) {
|
||||
std::string err_msg = "MemTable::Add failed: " + status.ToString();
|
||||
ENGINE_LOG_ERROR << err_msg;
|
||||
return Status::Error(err_msg);
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file) {
|
||||
mem_table_file = mem_table_file_list_.back();
|
||||
}
|
||||
|
||||
size_t MemTable::GetTableFileCount() {
|
||||
return mem_table_file_list_.size();
|
||||
}
|
||||
|
||||
Status MemTable::Serialize() {
|
||||
for (auto mem_table_file = mem_table_file_list_.begin(); mem_table_file != mem_table_file_list_.end();) {
|
||||
auto status = (*mem_table_file)->Serialize();
|
||||
if (!status.ok()) {
|
||||
std::string err_msg = "MemTable::Serialize failed: " + status.ToString();
|
||||
ENGINE_LOG_ERROR << err_msg;
|
||||
return Status::Error(err_msg);
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
mem_table_file = mem_table_file_list_.erase(mem_table_file);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool MemTable::Empty() {
|
||||
return mem_table_file_list_.empty();
|
||||
}
|
||||
|
||||
const std::string &MemTable::GetTableId() const {
|
||||
return table_id_;
|
||||
}
|
||||
|
||||
size_t MemTable::GetCurrentMem() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
size_t total_mem = 0;
|
||||
for (auto &mem_table_file : mem_table_file_list_) {
|
||||
total_mem += mem_table_file->GetCurrentMem();
|
||||
}
|
||||
return total_mem;
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -0,0 +1,53 @@
|
|||
#pragma once
|
||||
|
||||
#include "Status.h"
|
||||
#include "MemTableFile.h"
|
||||
#include "VectorSource.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
class MemTable {
|
||||
|
||||
public:
|
||||
|
||||
using Ptr = std::shared_ptr<MemTable>;
|
||||
using MemTableFileList = std::vector<MemTableFile::Ptr>;
|
||||
using MetaPtr = meta::Meta::Ptr;
|
||||
|
||||
MemTable(const std::string &table_id, const std::shared_ptr<meta::Meta> &meta, const Options &options);
|
||||
|
||||
Status Add(VectorSource::Ptr &source);
|
||||
|
||||
void GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file);
|
||||
|
||||
size_t GetTableFileCount();
|
||||
|
||||
Status Serialize();
|
||||
|
||||
bool Empty();
|
||||
|
||||
const std::string &GetTableId() const;
|
||||
|
||||
size_t GetCurrentMem();
|
||||
|
||||
private:
|
||||
const std::string table_id_;
|
||||
|
||||
MemTableFileList mem_table_file_list_;
|
||||
|
||||
MetaPtr meta_;
|
||||
|
||||
Options options_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
}; //MemTable
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -0,0 +1,108 @@
|
|||
#include "MemTableFile.h"
|
||||
#include "Constants.h"
|
||||
#include "Log.h"
|
||||
#include "EngineFactory.h"
|
||||
#include "metrics/Metrics.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
MemTableFile::MemTableFile(const std::string &table_id,
|
||||
const std::shared_ptr<meta::Meta> &meta,
|
||||
const Options &options) :
|
||||
table_id_(table_id),
|
||||
meta_(meta),
|
||||
options_(options) {
|
||||
|
||||
current_mem_ = 0;
|
||||
auto status = CreateTableFile();
|
||||
if (status.ok()) {
|
||||
execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_,
|
||||
table_file_schema_.location_,
|
||||
(EngineType) table_file_schema_.engine_type_);
|
||||
}
|
||||
}
|
||||
|
||||
Status MemTableFile::CreateTableFile() {
|
||||
|
||||
meta::TableFileSchema table_file_schema;
|
||||
table_file_schema.table_id_ = table_id_;
|
||||
auto status = meta_->CreateTableFile(table_file_schema);
|
||||
if (status.ok()) {
|
||||
table_file_schema_ = table_file_schema;
|
||||
} else {
|
||||
std::string err_msg = "MemTableFile::CreateTableFile failed: " + status.ToString();
|
||||
ENGINE_LOG_ERROR << err_msg;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status MemTableFile::Add(const VectorSource::Ptr &source) {
|
||||
|
||||
if (table_file_schema_.dimension_ <= 0) {
|
||||
std::string err_msg = "MemTableFile::Add: table_file_schema dimension = " +
|
||||
std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_;
|
||||
ENGINE_LOG_ERROR << err_msg;
|
||||
return Status::Error(err_msg);
|
||||
}
|
||||
|
||||
size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE;
|
||||
size_t mem_left = GetMemLeft();
|
||||
if (mem_left >= single_vector_mem_size) {
|
||||
size_t num_vectors_to_add = std::ceil(mem_left / single_vector_mem_size);
|
||||
size_t num_vectors_added;
|
||||
auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added);
|
||||
if (status.ok()) {
|
||||
current_mem_ += (num_vectors_added * single_vector_mem_size);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t MemTableFile::GetCurrentMem() {
|
||||
return current_mem_;
|
||||
}
|
||||
|
||||
size_t MemTableFile::GetMemLeft() {
|
||||
return (MAX_TABLE_FILE_MEM - current_mem_);
|
||||
}
|
||||
|
||||
bool MemTableFile::IsFull() {
|
||||
size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE;
|
||||
return (GetMemLeft() < single_vector_mem_size);
|
||||
}
|
||||
|
||||
Status MemTableFile::Serialize() {
|
||||
|
||||
auto start_time = METRICS_NOW_TIME;
|
||||
|
||||
auto size = GetCurrentMem();
|
||||
|
||||
execution_engine_->Serialize();
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
table_file_schema_.size_ = size;
|
||||
|
||||
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time);
|
||||
|
||||
table_file_schema_.file_type_ = (size >= options_.index_trigger_size) ?
|
||||
meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW;
|
||||
|
||||
auto status = meta_->UpdateTableFile(table_file_schema_);
|
||||
|
||||
LOG(DEBUG) << "New " << ((table_file_schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index")
|
||||
<< " file " << table_file_schema_.file_id_ << " of size " << (double) size / (double) M << " M";
|
||||
|
||||
execution_engine_->Cache();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -0,0 +1,52 @@
|
|||
#pragma once
|
||||
|
||||
#include "Status.h"
|
||||
#include "Meta.h"
|
||||
#include "VectorSource.h"
|
||||
#include "ExecutionEngine.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
class MemTableFile {
|
||||
|
||||
public:
|
||||
|
||||
using Ptr = std::shared_ptr<MemTableFile>;
|
||||
using MetaPtr = meta::Meta::Ptr;
|
||||
|
||||
MemTableFile(const std::string &table_id, const std::shared_ptr<meta::Meta> &meta, const Options &options);
|
||||
|
||||
Status Add(const VectorSource::Ptr &source);
|
||||
|
||||
size_t GetCurrentMem();
|
||||
|
||||
size_t GetMemLeft();
|
||||
|
||||
bool IsFull();
|
||||
|
||||
Status Serialize();
|
||||
|
||||
private:
|
||||
|
||||
Status CreateTableFile();
|
||||
|
||||
const std::string table_id_;
|
||||
|
||||
meta::TableFileSchema table_file_schema_;
|
||||
|
||||
MetaPtr meta_;
|
||||
|
||||
Options options_;
|
||||
|
||||
size_t current_mem_;
|
||||
|
||||
ExecutionEnginePtr execution_engine_;
|
||||
|
||||
}; //MemTableFile
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -0,0 +1,127 @@
|
|||
#include "NewMemManager.h"
|
||||
#include "VectorSource.h"
|
||||
#include "Log.h"
|
||||
#include "Constants.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string &table_id) {
|
||||
auto memIt = mem_id_map_.find(table_id);
|
||||
if (memIt != mem_id_map_.end()) {
|
||||
return memIt->second;
|
||||
}
|
||||
|
||||
mem_id_map_[table_id] = std::make_shared<MemTable>(table_id, meta_, options_);
|
||||
return mem_id_map_[table_id];
|
||||
}
|
||||
|
||||
Status NewMemManager::InsertVectors(const std::string &table_id_,
|
||||
size_t n_,
|
||||
const float *vectors_,
|
||||
IDNumbers &vector_ids_) {
|
||||
|
||||
while (GetCurrentMem() > options_.maximum_memory) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
LOG(DEBUG) << "NewMemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() <<
|
||||
", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem();
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
|
||||
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
|
||||
}
|
||||
|
||||
Status NewMemManager::InsertVectorsNoLock(const std::string &table_id,
|
||||
size_t n,
|
||||
const float *vectors,
|
||||
IDNumbers &vector_ids) {
|
||||
|
||||
MemTablePtr mem = GetMemByTable(table_id);
|
||||
VectorSource::Ptr source = std::make_shared<VectorSource>(n, vectors);
|
||||
|
||||
auto status = mem->Add(source);
|
||||
if (status.ok()) {
|
||||
vector_ids = source->GetVectorIds();
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status NewMemManager::ToImmutable() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
MemIdMap temp_map;
|
||||
for (auto &kv: mem_id_map_) {
|
||||
if (kv.second->Empty()) {
|
||||
//empty table, no need to serialize
|
||||
temp_map.insert(kv);
|
||||
} else {
|
||||
immu_mem_list_.push_back(kv.second);
|
||||
}
|
||||
}
|
||||
|
||||
mem_id_map_.swap(temp_map);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status NewMemManager::Serialize(std::set<std::string> &table_ids) {
|
||||
ToImmutable();
|
||||
std::unique_lock<std::mutex> lock(serialization_mtx_);
|
||||
table_ids.clear();
|
||||
for (auto &mem : immu_mem_list_) {
|
||||
mem->Serialize();
|
||||
table_ids.insert(mem->GetTableId());
|
||||
}
|
||||
immu_mem_list_.clear();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status NewMemManager::EraseMemVector(const std::string &table_id) {
|
||||
{//erase MemVector from rapid-insert cache
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
mem_id_map_.erase(table_id);
|
||||
}
|
||||
|
||||
{//erase MemVector from serialize cache
|
||||
std::unique_lock<std::mutex> lock(serialization_mtx_);
|
||||
MemList temp_list;
|
||||
for (auto &mem : immu_mem_list_) {
|
||||
if (mem->GetTableId() != table_id) {
|
||||
temp_list.push_back(mem);
|
||||
}
|
||||
}
|
||||
immu_mem_list_.swap(temp_list);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t NewMemManager::GetCurrentMutableMem() {
|
||||
size_t total_mem = 0;
|
||||
for (auto &kv : mem_id_map_) {
|
||||
auto memTable = kv.second;
|
||||
total_mem += memTable->GetCurrentMem();
|
||||
}
|
||||
return total_mem;
|
||||
}
|
||||
|
||||
size_t NewMemManager::GetCurrentImmutableMem() {
|
||||
size_t total_mem = 0;
|
||||
for (auto &mem_table : immu_mem_list_) {
|
||||
total_mem += mem_table->GetCurrentMem();
|
||||
}
|
||||
return total_mem;
|
||||
}
|
||||
|
||||
size_t NewMemManager::GetCurrentMem() {
|
||||
return GetCurrentMutableMem() + GetCurrentImmutableMem();
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -0,0 +1,61 @@
|
|||
#pragma once
|
||||
|
||||
#include "Meta.h"
|
||||
#include "MemTable.h"
|
||||
#include "Status.h"
|
||||
#include "MemManagerAbstract.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <ctime>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
class NewMemManager : public MemManagerAbstract {
|
||||
public:
|
||||
using MetaPtr = meta::Meta::Ptr;
|
||||
using Ptr = std::shared_ptr<NewMemManager>;
|
||||
using MemTablePtr = typename MemTable::Ptr;
|
||||
|
||||
NewMemManager(const std::shared_ptr<meta::Meta> &meta, const Options &options)
|
||||
: meta_(meta), options_(options) {}
|
||||
|
||||
Status InsertVectors(const std::string &table_id,
|
||||
size_t n, const float *vectors, IDNumbers &vector_ids) override;
|
||||
|
||||
Status Serialize(std::set<std::string> &table_ids) override;
|
||||
|
||||
Status EraseMemVector(const std::string &table_id) override;
|
||||
|
||||
size_t GetCurrentMutableMem() override;
|
||||
|
||||
size_t GetCurrentImmutableMem() override;
|
||||
|
||||
size_t GetCurrentMem() override;
|
||||
|
||||
private:
|
||||
MemTablePtr GetMemByTable(const std::string &table_id);
|
||||
|
||||
Status InsertVectorsNoLock(const std::string &table_id,
|
||||
size_t n, const float *vectors, IDNumbers &vector_ids);
|
||||
Status ToImmutable();
|
||||
|
||||
using MemIdMap = std::map<std::string, MemTablePtr>;
|
||||
using MemList = std::vector<MemTablePtr>;
|
||||
MemIdMap mem_id_map_;
|
||||
MemList immu_mem_list_;
|
||||
MetaPtr meta_;
|
||||
Options options_;
|
||||
std::mutex mutex_;
|
||||
std::mutex serialization_mtx_;
|
||||
}; // NewMemManager
|
||||
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -61,6 +61,7 @@ struct Options {
|
|||
size_t index_trigger_size = ONE_GB; //unit: byte
|
||||
DBMetaOptions meta;
|
||||
int mode = MODE::SINGLE;
|
||||
float maximum_memory = 4 * ONE_GB;
|
||||
}; // Options
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
#include "VectorSource.h"
|
||||
#include "ExecutionEngine.h"
|
||||
#include "EngineFactory.h"
|
||||
#include "Log.h"
|
||||
#include "metrics/Metrics.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
|
||||
VectorSource::VectorSource(const size_t &n,
|
||||
const float *vectors) :
|
||||
n_(n),
|
||||
vectors_(vectors),
|
||||
id_generator_(new SimpleIDGenerator()) {
|
||||
current_num_vectors_added = 0;
|
||||
}
|
||||
|
||||
Status VectorSource::Add(const ExecutionEnginePtr &execution_engine,
|
||||
const meta::TableFileSchema &table_file_schema,
|
||||
const size_t &num_vectors_to_add,
|
||||
size_t &num_vectors_added) {
|
||||
|
||||
auto start_time = METRICS_NOW_TIME;
|
||||
|
||||
num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ?
|
||||
num_vectors_to_add : n_ - current_num_vectors_added;
|
||||
IDNumbers vector_ids_to_add;
|
||||
id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add);
|
||||
Status status = execution_engine->AddWithIds(num_vectors_added,
|
||||
vectors_ + current_num_vectors_added * table_file_schema.dimension_,
|
||||
vector_ids_to_add.data());
|
||||
if (status.ok()) {
|
||||
current_num_vectors_added += num_vectors_added;
|
||||
vector_ids_.insert(vector_ids_.end(),
|
||||
std::make_move_iterator(vector_ids_to_add.begin()),
|
||||
std::make_move_iterator(vector_ids_to_add.end()));
|
||||
} else {
|
||||
ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString();
|
||||
}
|
||||
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_),
|
||||
static_cast<int>(table_file_schema.dimension_),
|
||||
total_time);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
size_t VectorSource::GetNumVectorsAdded() {
|
||||
return current_num_vectors_added;
|
||||
}
|
||||
|
||||
bool VectorSource::AllAdded() {
|
||||
return (current_num_vectors_added == n_);
|
||||
}
|
||||
|
||||
IDNumbers VectorSource::GetVectorIds() {
|
||||
return vector_ids_;
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -0,0 +1,46 @@
|
|||
#pragma once
|
||||
|
||||
#include "Status.h"
|
||||
#include "Meta.h"
|
||||
#include "IDGenerator.h"
|
||||
#include "ExecutionEngine.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
class VectorSource {
|
||||
|
||||
public:
|
||||
|
||||
using Ptr = std::shared_ptr<VectorSource>;
|
||||
|
||||
VectorSource(const size_t &n, const float *vectors);
|
||||
|
||||
Status Add(const ExecutionEnginePtr &execution_engine,
|
||||
const meta::TableFileSchema &table_file_schema,
|
||||
const size_t &num_vectors_to_add,
|
||||
size_t &num_vectors_added);
|
||||
|
||||
size_t GetNumVectorsAdded();
|
||||
|
||||
bool AllAdded();
|
||||
|
||||
IDNumbers GetVectorIds();
|
||||
|
||||
private:
|
||||
|
||||
const size_t n_;
|
||||
const float *vectors_;
|
||||
IDNumbers vector_ids_;
|
||||
|
||||
size_t current_num_vectors_added;
|
||||
|
||||
IDGenerator *id_generator_;
|
||||
|
||||
}; //VectorSource
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
|
@ -23,6 +23,14 @@ DBWrapper::DBWrapper() {
|
|||
if(index_size > 0) {//ensure larger than zero, unit is MB
|
||||
opt.index_trigger_size = (size_t)index_size * engine::ONE_MB;
|
||||
}
|
||||
float maximum_memory = config.GetFloatValue(CONFIG_MAXMIMUM_MEMORY);
|
||||
if (maximum_memory > 1.0) {
|
||||
opt.maximum_memory = maximum_memory * engine::ONE_GB;
|
||||
}
|
||||
else {
|
||||
std::cout << "ERROR: maximum_memory should be at least 1 GB" << std::endl;
|
||||
kill(0, SIGUSR1);
|
||||
}
|
||||
|
||||
ConfigNode& serverConfig = ServerConfig::GetInstance().GetConfig(CONFIG_SERVER);
|
||||
std::string mode = serverConfig.GetValue(CONFIG_CLUSTER_MODE, "single");
|
||||
|
|
|
@ -26,6 +26,7 @@ static const std::string CONFIG_DB_PATH = "db_path";
|
|||
static const std::string CONFIG_DB_INDEX_TRIGGER_SIZE = "index_building_threshold";
|
||||
static const std::string CONFIG_DB_ARCHIVE_DISK = "archive_disk_threshold";
|
||||
static const std::string CONFIG_DB_ARCHIVE_DAYS = "archive_days_threshold";
|
||||
static const std::string CONFIG_MAXMIMUM_MEMORY = "maximum_memory";
|
||||
|
||||
static const std::string CONFIG_LOG = "log_config";
|
||||
|
||||
|
|
|
@ -0,0 +1,372 @@
|
|||
#include "gtest/gtest.h"
|
||||
|
||||
#include "db/VectorSource.h"
|
||||
#include "db/MemTableFile.h"
|
||||
#include "db/MemTable.h"
|
||||
#include "utils.h"
|
||||
#include "db/Factories.h"
|
||||
#include "db/Constants.h"
|
||||
#include "db/EngineFactory.h"
|
||||
#include "metrics/Metrics.h"
|
||||
#include "db/MetaConsts.h"
|
||||
#include "boost/filesystem.hpp"
|
||||
|
||||
#include <thread>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
using namespace zilliz::milvus;
|
||||
|
||||
namespace {
|
||||
|
||||
static const std::string TABLE_NAME = "test_group";
|
||||
static constexpr int64_t TABLE_DIM = 256;
|
||||
static constexpr int64_t VECTOR_COUNT = 250000;
|
||||
static constexpr int64_t INSERT_LOOP = 10000;
|
||||
|
||||
engine::meta::TableSchema BuildTableSchema() {
|
||||
engine::meta::TableSchema table_info;
|
||||
table_info.dimension_ = TABLE_DIM;
|
||||
table_info.table_id_ = TABLE_NAME;
|
||||
table_info.engine_type_ = (int) engine::EngineType::FAISS_IDMAP;
|
||||
return table_info;
|
||||
}
|
||||
|
||||
void BuildVectors(int64_t n, std::vector<float> &vectors) {
|
||||
vectors.clear();
|
||||
vectors.resize(n * TABLE_DIM);
|
||||
float *data = vectors.data();
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
|
||||
data[TABLE_DIM * i] += i / 2000.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) {
|
||||
|
||||
std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
|
||||
|
||||
engine::meta::TableSchema table_schema = BuildTableSchema();
|
||||
auto status = impl_->CreateTable(table_schema);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
engine::meta::TableFileSchema table_file_schema;
|
||||
table_file_schema.table_id_ = TABLE_NAME;
|
||||
status = impl_->CreateTableFile(table_file_schema);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
int64_t n = 100;
|
||||
std::vector<float> vectors;
|
||||
BuildVectors(n, vectors);
|
||||
|
||||
engine::VectorSource source(n, vectors.data());
|
||||
|
||||
size_t num_vectors_added;
|
||||
engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_,
|
||||
table_file_schema.location_,
|
||||
(engine::EngineType) table_file_schema.engine_type_);
|
||||
status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
ASSERT_EQ(num_vectors_added, 50);
|
||||
|
||||
engine::IDNumbers vector_ids = source.GetVectorIds();
|
||||
ASSERT_EQ(vector_ids.size(), 50);
|
||||
|
||||
status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
ASSERT_EQ(num_vectors_added, 50);
|
||||
|
||||
vector_ids = source.GetVectorIds();
|
||||
ASSERT_EQ(vector_ids.size(), 100);
|
||||
|
||||
status = impl_->DropAll();
|
||||
ASSERT_TRUE(status.ok());
|
||||
}
|
||||
|
||||
TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) {
|
||||
|
||||
std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
|
||||
auto options = engine::OptionsFactory::Build();
|
||||
|
||||
engine::meta::TableSchema table_schema = BuildTableSchema();
|
||||
auto status = impl_->CreateTable(table_schema);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
engine::MemTableFile mem_table_file(TABLE_NAME, impl_, options);
|
||||
|
||||
int64_t n_100 = 100;
|
||||
std::vector<float> vectors_100;
|
||||
BuildVectors(n_100, vectors_100);
|
||||
|
||||
engine::VectorSource::Ptr source = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());
|
||||
|
||||
status = mem_table_file.Add(source);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
// std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl;
|
||||
|
||||
engine::IDNumbers vector_ids = source->GetVectorIds();
|
||||
ASSERT_EQ(vector_ids.size(), 100);
|
||||
|
||||
size_t singleVectorMem = sizeof(float) * TABLE_DIM;
|
||||
ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem);
|
||||
|
||||
int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
|
||||
std::vector<float> vectors_128M;
|
||||
BuildVectors(n_max, vectors_128M);
|
||||
|
||||
engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
|
||||
status = mem_table_file.Add(source_128M);
|
||||
|
||||
vector_ids = source_128M->GetVectorIds();
|
||||
ASSERT_EQ(vector_ids.size(), n_max - n_100);
|
||||
|
||||
ASSERT_TRUE(mem_table_file.IsFull());
|
||||
|
||||
status = impl_->DropAll();
|
||||
ASSERT_TRUE(status.ok());
|
||||
}
|
||||
|
||||
TEST_F(NewMemManagerTest, MEM_TABLE_TEST) {
|
||||
|
||||
std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
|
||||
auto options = engine::OptionsFactory::Build();
|
||||
|
||||
engine::meta::TableSchema table_schema = BuildTableSchema();
|
||||
auto status = impl_->CreateTable(table_schema);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
int64_t n_100 = 100;
|
||||
std::vector<float> vectors_100;
|
||||
BuildVectors(n_100, vectors_100);
|
||||
|
||||
engine::VectorSource::Ptr source_100 = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());
|
||||
|
||||
engine::MemTable mem_table(TABLE_NAME, impl_, options);
|
||||
|
||||
status = mem_table.Add(source_100);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
engine::IDNumbers vector_ids = source_100->GetVectorIds();
|
||||
ASSERT_EQ(vector_ids.size(), 100);
|
||||
|
||||
engine::MemTableFile::Ptr mem_table_file;
|
||||
mem_table.GetCurrentMemTableFile(mem_table_file);
|
||||
size_t singleVectorMem = sizeof(float) * TABLE_DIM;
|
||||
ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
|
||||
|
||||
int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
|
||||
std::vector<float> vectors_128M;
|
||||
BuildVectors(n_max, vectors_128M);
|
||||
|
||||
engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
|
||||
status = mem_table.Add(source_128M);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
vector_ids = source_128M->GetVectorIds();
|
||||
ASSERT_EQ(vector_ids.size(), n_max);
|
||||
|
||||
mem_table.GetCurrentMemTableFile(mem_table_file);
|
||||
ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
|
||||
|
||||
ASSERT_EQ(mem_table.GetTableFileCount(), 2);
|
||||
|
||||
int64_t n_1G = 1024000;
|
||||
std::vector<float> vectors_1G;
|
||||
BuildVectors(n_1G, vectors_1G);
|
||||
|
||||
engine::VectorSource::Ptr source_1G = std::make_shared<engine::VectorSource>(n_1G, vectors_1G.data());
|
||||
|
||||
status = mem_table.Add(source_1G);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
vector_ids = source_1G->GetVectorIds();
|
||||
ASSERT_EQ(vector_ids.size(), n_1G);
|
||||
|
||||
int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM);
|
||||
ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount);
|
||||
|
||||
status = mem_table.Serialize();
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
status = impl_->DropAll();
|
||||
ASSERT_TRUE(status.ok());
|
||||
}
|
||||
|
||||
TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) {
|
||||
|
||||
auto options = engine::OptionsFactory::Build();
|
||||
options.meta.path = "/tmp/milvus_test";
|
||||
options.meta.backend_uri = "sqlite://:@:/";
|
||||
auto db_ = engine::DBFactory::Build(options);
|
||||
|
||||
engine::meta::TableSchema table_info = BuildTableSchema();
|
||||
engine::Status stat = db_->CreateTable(table_info);
|
||||
|
||||
engine::meta::TableSchema table_info_get;
|
||||
table_info_get.table_id_ = TABLE_NAME;
|
||||
stat = db_->DescribeTable(table_info_get);
|
||||
ASSERT_STATS(stat);
|
||||
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
|
||||
|
||||
std::map<int64_t, std::vector<float>> search_vectors;
|
||||
{
|
||||
engine::IDNumbers vector_ids;
|
||||
int64_t nb = 1024000;
|
||||
std::vector<float> xb;
|
||||
BuildVectors(nb, xb);
|
||||
engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(3));
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<int64_t> dis(0, nb - 1);
|
||||
|
||||
int64_t num_query = 20;
|
||||
for (int64_t i = 0; i < num_query; ++i) {
|
||||
int64_t index = dis(gen);
|
||||
std::vector<float> search;
|
||||
for (int64_t j = 0; j < TABLE_DIM; j++) {
|
||||
search.push_back(xb[index * TABLE_DIM + j]);
|
||||
}
|
||||
search_vectors.insert(std::make_pair(vector_ids[index], search));
|
||||
}
|
||||
}
|
||||
|
||||
int k = 10;
|
||||
for (auto &pair : search_vectors) {
|
||||
auto &search = pair.second;
|
||||
engine::QueryResults results;
|
||||
stat = db_->Query(TABLE_NAME, k, 1, search.data(), results);
|
||||
ASSERT_EQ(results[0][0].first, pair.first);
|
||||
ASSERT_LT(results[0][0].second, 0.00001);
|
||||
}
|
||||
|
||||
delete db_;
|
||||
boost::filesystem::remove_all(options.meta.path);
|
||||
|
||||
}
|
||||
|
||||
TEST_F(NewMemManagerTest, INSERT_TEST) {
|
||||
|
||||
auto options = engine::OptionsFactory::Build();
|
||||
options.meta.path = "/tmp/milvus_test";
|
||||
options.meta.backend_uri = "sqlite://:@:/";
|
||||
auto db_ = engine::DBFactory::Build(options);
|
||||
|
||||
engine::meta::TableSchema table_info = BuildTableSchema();
|
||||
engine::Status stat = db_->CreateTable(table_info);
|
||||
|
||||
engine::meta::TableSchema table_info_get;
|
||||
table_info_get.table_id_ = TABLE_NAME;
|
||||
stat = db_->DescribeTable(table_info_get);
|
||||
ASSERT_STATS(stat);
|
||||
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
|
||||
|
||||
auto start_time = METRICS_NOW_TIME;
|
||||
|
||||
int insert_loop = 20;
|
||||
for (int i = 0; i < insert_loop; ++i) {
|
||||
int64_t nb = 409600;
|
||||
std::vector<float> xb;
|
||||
BuildVectors(nb, xb);
|
||||
engine::IDNumbers vector_ids;
|
||||
engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
|
||||
ASSERT_TRUE(status.ok());
|
||||
}
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time;
|
||||
|
||||
delete db_;
|
||||
boost::filesystem::remove_all(options.meta.path);
|
||||
|
||||
}
|
||||
|
||||
TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) {
|
||||
|
||||
auto options = engine::OptionsFactory::Build();
|
||||
options.meta.path = "/tmp/milvus_test";
|
||||
options.meta.backend_uri = "sqlite://:@:/";
|
||||
auto db_ = engine::DBFactory::Build(options);
|
||||
|
||||
engine::meta::TableSchema table_info = BuildTableSchema();
|
||||
engine::Status stat = db_->CreateTable(table_info);
|
||||
|
||||
engine::meta::TableSchema table_info_get;
|
||||
table_info_get.table_id_ = TABLE_NAME;
|
||||
stat = db_->DescribeTable(table_info_get);
|
||||
ASSERT_STATS(stat);
|
||||
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
|
||||
|
||||
engine::IDNumbers vector_ids;
|
||||
engine::IDNumbers target_ids;
|
||||
|
||||
int64_t nb = 409600;
|
||||
std::vector<float> xb;
|
||||
BuildVectors(nb, xb);
|
||||
|
||||
int64_t qb = 5;
|
||||
std::vector<float> qxb;
|
||||
BuildVectors(qb, qxb);
|
||||
|
||||
std::thread search([&]() {
|
||||
engine::QueryResults results;
|
||||
int k = 10;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||
|
||||
INIT_TIMER;
|
||||
std::stringstream ss;
|
||||
uint64_t count = 0;
|
||||
uint64_t prev_count = 0;
|
||||
|
||||
for (auto j = 0; j < 10; ++j) {
|
||||
ss.str("");
|
||||
db_->Size(count);
|
||||
prev_count = count;
|
||||
|
||||
START_TIMER;
|
||||
stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results);
|
||||
ss << "Search " << j << " With Size " << count / engine::meta::M << " M";
|
||||
STOP_TIMER(ss.str());
|
||||
|
||||
ASSERT_STATS(stat);
|
||||
for (auto k = 0; k < qb; ++k) {
|
||||
ASSERT_EQ(results[k][0].first, target_ids[k]);
|
||||
ss.str("");
|
||||
ss << "Result [" << k << "]:";
|
||||
for (auto result : results[k]) {
|
||||
ss << result.first << " ";
|
||||
}
|
||||
/* LOG(DEBUG) << ss.str(); */
|
||||
}
|
||||
ASSERT_TRUE(count >= prev_count);
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
});
|
||||
|
||||
int loop = 20;
|
||||
|
||||
for (auto i = 0; i < loop; ++i) {
|
||||
if (i == 0) {
|
||||
db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
|
||||
ASSERT_EQ(target_ids.size(), qb);
|
||||
} else {
|
||||
db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
|
||||
search.join();
|
||||
|
||||
delete db_;
|
||||
boost::filesystem::remove_all(options.meta.path);
|
||||
|
||||
};
|
||||
|
|
@ -106,6 +106,18 @@ zilliz::milvus::engine::Options MySQLDBTest::GetOptions() {
|
|||
return options;
|
||||
}
|
||||
|
||||
void NewMemManagerTest::InitLog() {
|
||||
el::Configurations defaultConf;
|
||||
defaultConf.setToDefault();
|
||||
defaultConf.set(el::Level::Debug,
|
||||
el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)");
|
||||
el::Loggers::reconfigureLogger("default", defaultConf);
|
||||
}
|
||||
|
||||
void NewMemManagerTest::SetUp() {
|
||||
InitLog();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
if (argc > 1) {
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#define STOP_TIMER(name)
|
||||
#endif
|
||||
|
||||
void ASSERT_STATS(zilliz::milvus::engine::Status& stat);
|
||||
void ASSERT_STATS(zilliz::milvus::engine::Status &stat);
|
||||
|
||||
//class TestEnv : public ::testing::Environment {
|
||||
//public:
|
||||
|
@ -54,8 +54,8 @@ void ASSERT_STATS(zilliz::milvus::engine::Status& stat);
|
|||
// ::testing::AddGlobalTestEnvironment(new TestEnv);
|
||||
|
||||
class DBTest : public ::testing::Test {
|
||||
protected:
|
||||
zilliz::milvus::engine::DB* db_;
|
||||
protected:
|
||||
zilliz::milvus::engine::DB *db_;
|
||||
|
||||
void InitLog();
|
||||
virtual void SetUp() override;
|
||||
|
@ -64,13 +64,13 @@ protected:
|
|||
};
|
||||
|
||||
class DBTest2 : public DBTest {
|
||||
protected:
|
||||
protected:
|
||||
virtual zilliz::milvus::engine::Options GetOptions() override;
|
||||
};
|
||||
|
||||
|
||||
class MetaTest : public DBTest {
|
||||
protected:
|
||||
protected:
|
||||
std::shared_ptr<zilliz::milvus::engine::meta::DBMetaImpl> impl_;
|
||||
|
||||
virtual void SetUp() override;
|
||||
|
@ -78,12 +78,17 @@ protected:
|
|||
};
|
||||
|
||||
class MySQLTest : public ::testing::Test {
|
||||
protected:
|
||||
protected:
|
||||
// std::shared_ptr<zilliz::milvus::engine::meta::MySQLMetaImpl> impl_;
|
||||
zilliz::milvus::engine::DBMetaOptions getDBMetaOptions();
|
||||
};
|
||||
|
||||
class MySQLDBTest : public ::testing::Test {
|
||||
protected:
|
||||
class MySQLDBTest : public ::testing::Test {
|
||||
protected:
|
||||
zilliz::milvus::engine::Options GetOptions();
|
||||
};
|
||||
|
||||
class NewMemManagerTest : public ::testing::Test {
|
||||
void InitLog();
|
||||
void SetUp() override;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue