From 09074384e9ba2e6d5259fbf8f4734483439b5877 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Mon, 24 Jun 2019 19:52:31 +0800 Subject: [PATCH 1/7] comment unfixed function Former-commit-id: eef2587059cb8ca61e49870ba6488b8dd494f79e --- cpp/unittest/metrics/metrics_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/unittest/metrics/metrics_test.cpp b/cpp/unittest/metrics/metrics_test.cpp index 9a315f5557..004f0dc33d 100644 --- a/cpp/unittest/metrics/metrics_test.cpp +++ b/cpp/unittest/metrics/metrics_test.cpp @@ -33,7 +33,7 @@ TEST_F(DBTest, Metric_Tes) { server::Metrics::GetInstance().Init(); // server::PrometheusMetrics::GetInstance().exposer_ptr()->RegisterCollectable(server::PrometheusMetrics::GetInstance().registry_ptr()); - zilliz::milvus::cache::CpuCacheMgr::GetInstance()->SetCapacity(2UL*1024*1024*1024); + zilliz::milvus::cache::CpuCacheMgr::GetInstance()->SetCapacity(1UL*1024*1024*1024); std::cout<CacheCapacity()< Date: Tue, 25 Jun 2019 18:42:18 +0800 Subject: [PATCH 2/7] MS-110 - Avoid huge file size Former-commit-id: a36dd97a21b2e0d6e5cf99250a7b884500991708 --- cpp/CHANGELOG.md | 1 + cpp/src/db/DBImpl.cpp | 10 +++-- cpp/src/db/MemManager.cpp | 91 +++++++++++++++++++++++++-------------- cpp/src/db/MemManager.h | 27 ++++++------ 4 files changed, 79 insertions(+), 50 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 1ebe05eb21..0520eaf331 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -11,6 +11,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-89 - Fix compile failed, libgpufaiss.a link missing - MS-90 - Fix arch match incorrect on ARM - MS-99 - Fix compilation bug +- MS-110 - Avoid huge file size ## Improvement - MS-82 - Update server startup welcome message diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index e66471d5c3..a0abb5e1ad 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -472,7 +472,7 @@ void DBImpl::StartCompactionTask() { } //serialize memory data - std::vector temp_table_ids; + std::set temp_table_ids; mem_mgr_->Serialize(temp_table_ids); for(auto& id : temp_table_ids) { compact_table_ids_.insert(id); @@ -543,7 +543,8 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, ENGINE_LOG_DEBUG << "New merged file " << table_file.file_id_ << " of size=" << index->PhysicalSize()/(1024*1024) << " M"; - index->Cache(); + //current disable this line to avoid memory + //index->Cache(); return status; } @@ -659,7 +660,8 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { << index->PhysicalSize()/(1024*1024) << " M" << " from file " << to_remove.file_id_; - index->Cache(); + //current disable this line to avoid memory + //index->Cache(); } catch (std::exception& ex) { return Status::Error("Build index encounter exception", ex.what()); @@ -698,7 +700,7 @@ Status DBImpl::Size(uint64_t& result) { DBImpl::~DBImpl() { shutting_down_.store(true, std::memory_order_release); bg_timer_thread_.join(); - std::vector ids; + std::set ids; mem_mgr_->Serialize(ids); } diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index d7a2087ee0..fa7f3c54b0 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -20,36 +20,54 @@ namespace engine { MemVectors::MemVectors(const std::shared_ptr& meta_ptr, const meta::TableFileSchema& schema, const Options& options) - : pMeta_(meta_ptr), + : meta_(meta_ptr), options_(options), schema_(schema), - pIdGenerator_(new SimpleIDGenerator()), - pEE_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType)schema_.engine_type_)) { + id_generator_(new SimpleIDGenerator()), + active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType)schema_.engine_type_)) { } -void MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { +Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { + if(active_engine_ == nullptr) { + return Status::Error("index engine is null"); + } + auto start_time = METRICS_NOW_TIME; - pIdGenerator_->GetNextIDNumbers(n_, vector_ids_); - pEE_->AddWithIds(n_, vectors_, vector_ids_.data()); + id_generator_->GetNextIDNumbers(n_, vector_ids_); + Status status = active_engine_->AddWithIds(n_, vectors_, vector_ids_.data()); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(schema_.dimension_), total_time); + + return status; } -size_t MemVectors::Total() const { - return pEE_->Count(); +size_t MemVectors::RowCount() const { + if(active_engine_ == nullptr) { + return 0; + } + + return active_engine_->Count(); } -size_t MemVectors::ApproximateSize() const { - return pEE_->Size(); +size_t MemVectors::Size() const { + if(active_engine_ == nullptr) { + return 0; + } + + return active_engine_->Size(); } Status MemVectors::Serialize(std::string& table_id) { + if(active_engine_ == nullptr) { + return Status::Error("index engine is null"); + } + table_id = schema_.table_id_; - auto size = ApproximateSize(); + auto size = Size(); auto start_time = METRICS_NOW_TIME; - pEE_->Serialize(); + active_engine_->Serialize(); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); schema_.size_ = size; @@ -59,20 +77,20 @@ Status MemVectors::Serialize(std::string& table_id) { schema_.file_type_ = (size >= options_.index_trigger_size) ? meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; - auto status = pMeta_->UpdateTableFile(schema_); + auto status = meta_->UpdateTableFile(schema_); LOG(DEBUG) << "New " << ((schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index") - << " file " << schema_.file_id_ << " of size " << (double)(pEE_->Size()) / (double)meta::M << " M"; + << " file " << schema_.file_id_ << " of size " << (double)(active_engine_->Size()) / (double)meta::M << " M"; - pEE_->Cache(); + active_engine_->Cache(); return status; } MemVectors::~MemVectors() { - if (pIdGenerator_ != nullptr) { - delete pIdGenerator_; - pIdGenerator_ = nullptr; + if (id_generator_ != nullptr) { + delete id_generator_; + id_generator_ = nullptr; } } @@ -81,20 +99,20 @@ MemVectors::~MemVectors() { */ MemManager::MemVectorsPtr MemManager::GetMemByTable( const std::string& table_id) { - auto memIt = memMap_.find(table_id); - if (memIt != memMap_.end()) { + auto memIt = mem_id_map_.find(table_id); + if (memIt != mem_id_map_.end()) { return memIt->second; } meta::TableFileSchema table_file; table_file.table_id_ = table_id; - auto status = pMeta_->CreateTableFile(table_file); + auto status = meta_->CreateTableFile(table_file); if (!status.ok()) { return nullptr; } - memMap_[table_id] = MemVectorsPtr(new MemVectors(pMeta_, table_file, options_)); - return memMap_[table_id]; + mem_id_map_[table_id] = MemVectorsPtr(new MemVectors(meta_, table_file, options_)); + return mem_id_map_[table_id]; } Status MemManager::InsertVectors(const std::string& table_id_, @@ -114,37 +132,44 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, if (mem == nullptr) { return Status::NotFound("Group " + table_id + " not found!"); } - mem->Add(n, vectors, vector_ids); - return Status::OK(); + //makesure each file size less than index_trigger_size + if(mem->Size() > options_.index_trigger_size) { + std::unique_lock lock(serialization_mtx_); + immu_mem_list_.push_back(mem); + mem_id_map_.erase(table_id); + return InsertVectorsNoLock(table_id, n, vectors, vector_ids); + } else { + return mem->Add(n, vectors, vector_ids); + } } Status MemManager::ToImmutable() { std::unique_lock lock(mutex_); - for (auto& kv: memMap_) { - immMems_.push_back(kv.second); + for (auto& kv: mem_id_map_) { + immu_mem_list_.push_back(kv.second); } - memMap_.clear(); + mem_id_map_.clear(); return Status::OK(); } -Status MemManager::Serialize(std::vector& table_ids) { +Status MemManager::Serialize(std::set& table_ids) { ToImmutable(); std::unique_lock lock(serialization_mtx_); std::string table_id; table_ids.clear(); - for (auto& mem : immMems_) { + for (auto& mem : immu_mem_list_) { mem->Serialize(table_id); - table_ids.push_back(table_id); + table_ids.insert(table_id); } - immMems_.clear(); + immu_mem_list_.clear(); return Status::OK(); } Status MemManager::EraseMemVector(const std::string& table_id) { std::unique_lock lock(mutex_); - memMap_.erase(table_id); + mem_id_map_.erase(table_id); return Status::OK(); } diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index 1b329f971b..2aa0183898 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace zilliz { namespace milvus { @@ -32,11 +33,11 @@ public: explicit MemVectors(const std::shared_ptr&, const meta::TableFileSchema&, const Options&); - void Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_); + Status Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_); - size_t Total() const; + size_t RowCount() const; - size_t ApproximateSize() const; + size_t Size() const; Status Serialize(std::string& table_id); @@ -49,11 +50,11 @@ private: MemVectors(const MemVectors&) = delete; MemVectors& operator=(const MemVectors&) = delete; - MetaPtr pMeta_; + MetaPtr meta_; Options options_; meta::TableFileSchema schema_; - IDGenerator* pIdGenerator_; - ExecutionEnginePtr pEE_; + IDGenerator* id_generator_; + ExecutionEnginePtr active_engine_; }; // MemVectors @@ -66,14 +67,14 @@ public: using Ptr = std::shared_ptr; MemManager(const std::shared_ptr& meta, const Options& options) - : pMeta_(meta), options_(options) {} + : meta_(meta), options_(options) {} MemVectorsPtr GetMemByTable(const std::string& table_id); Status InsertVectors(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids); - Status Serialize(std::vector& table_ids); + Status Serialize(std::set& table_ids); Status EraseMemVector(const std::string& table_id); @@ -82,11 +83,11 @@ private: size_t n, const float* vectors, IDNumbers& vector_ids); Status ToImmutable(); - using MemMap = std::map; - using ImmMemPool = std::vector; - MemMap memMap_; - ImmMemPool immMems_; - MetaPtr pMeta_; + using MemIdMap = std::map; + using MemList = std::vector; + MemIdMap mem_id_map_; + MemList immu_mem_list_; + MetaPtr meta_; Options options_; std::mutex mutex_; std::mutex serialization_mtx_; From 52047ba75d55c6045319aa2fb3f26d1e7e1f9eb7 Mon Sep 17 00:00:00 2001 From: groot Date: Wed, 26 Jun 2019 09:19:12 +0800 Subject: [PATCH 3/7] remove unuse config Former-commit-id: d426be7162d79534f9a992118da4d4e979c55662 --- cpp/conf/server_config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index 717e9b10cb..edb77bd20c 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -1,8 +1,6 @@ server_config: address: 0.0.0.0 port: 19530 - transfer_protocol: binary #optional: binary, compact, json - server_mode: thread_pool #optional: simple, thread_pool gpu_index: 0 #which gpu to be used mode: single #optional: single, cluster From 9fd3d8e2ea659e2c3dd85412a6457b28ceeaf7d7 Mon Sep 17 00:00:00 2001 From: groot Date: Wed, 26 Jun 2019 10:13:11 +0800 Subject: [PATCH 4/7] remove unuse config Former-commit-id: a1d441477a586bf206d6750ad4a2ddc59336a236 --- cpp/conf/server_config.yaml | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index edb77bd20c..8bc83dcebb 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -1,25 +1,25 @@ server_config: address: 0.0.0.0 - port: 19530 - gpu_index: 0 #which gpu to be used - mode: single #optional: single, cluster + port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534 + gpu_index: 0 # the gpu milvus use, default: 0, range: 0 ~ gpu number - 1 + mode: single # milvus deployment type: single, cluster db_config: - db_path: /tmp/milvus - db_backend_url: http://127.0.0.1 - index_building_threshold: 1024 #build index file when raw data file size larger than this value, unit: MB + db_path: /tmp/milvus # milvus data storage path + db_backend_url: http://127.0.0.1 # meta database uri + index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB metric_config: - is_startup: true # true is on, false is off - collector: prometheus # prometheus, now we only have prometheus - prometheus_config: - collect_type: pull # pull means prometheus pull the message from server, push means server push metric to push gateway - port: 8080 - push_gateway_ip_address: 127.0.0.1 - push_gateway_port: 9091 + is_startup: true # if monitoring start: on, off + collector: prometheus # metrics collector: prometheus + prometheus_config: # following are prometheus configure + collect_type: pull # prometheus collect data method + port: 8080 # the port prometheus use to fetch metrics + push_gateway_ip_address: 127.0.0.1 # push method configure: push gateway ip address + push_gateway_port: 9091 # push method configure: push gateway port -license_config: - license_path: "/tmp/system.license" +license_config: # license configure + license_path: "/tmp/system.license" # license file path -cache_config: - cpu_cache_capacity: 16 # memory pool to hold index data, unit: GB \ No newline at end of file +cache_config: # cache configure + cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory \ No newline at end of file From c497ce5d188cf0237961baaa706fb7919f3a132e Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 26 Jun 2019 11:21:55 +0800 Subject: [PATCH 5/7] alter server_config Former-commit-id: abb109f62d540d3b0d1b53ae49308692a04e17af --- cpp/conf/server_config.yaml | 2 +- cpp/src/metrics/PrometheusMetrics.cpp | 3 ++- cpp/unittest/metrics/metrics_test.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index 717e9b10cb..40ee6f2efa 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -12,7 +12,7 @@ db_config: index_building_threshold: 1024 #build index file when raw data file size larger than this value, unit: MB metric_config: - is_startup: true # true is on, false is off + is_startup: off # on is activated, otherwise is down. note: case sensitive collector: prometheus # prometheus, now we only have prometheus prometheus_config: collect_type: pull # pull means prometheus pull the message from server, push means server push metric to push gateway diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 07e1d2ee71..d0d50800ad 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -17,7 +17,8 @@ ServerError PrometheusMetrics::Init() { try { ConfigNode &configNode = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC); - startup_ = configNode.GetValue(CONFIG_METRIC_IS_STARTUP) == "true" ? true : false; + startup_ = configNode.GetValue(CONFIG_METRIC_IS_STARTUP) == "on"; + if(!startup_) return SERVER_SUCCESS; // Following should be read from config file. const std::string bind_address = configNode.GetChild(CONFIG_PROMETHEUS).GetValue(CONFIG_METRIC_PROMETHEUS_PORT); const std::string uri = std::string("/metrics"); diff --git a/cpp/unittest/metrics/metrics_test.cpp b/cpp/unittest/metrics/metrics_test.cpp index 00075fbb5f..72596dc79e 100644 --- a/cpp/unittest/metrics/metrics_test.cpp +++ b/cpp/unittest/metrics/metrics_test.cpp @@ -102,7 +102,7 @@ TEST_F(DBTest, Metric_Tes) { } }); - int loop = 10; + int loop = 10000; for (auto i=0; i Date: Wed, 26 Jun 2019 11:51:47 +0800 Subject: [PATCH 6/7] change CHANGELOG Former-commit-id: 02ccd2e584dad6754342da2170e40d5bbedfb4f7 --- cpp/CHANGELOG.md | 2 +- cpp/unittest/metrics/metrics_test.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 0520eaf331..ca4e466cf9 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -20,7 +20,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-22 - Enhancement for MemVector size control - MS-92 - Unify behavior of debug and release build - MS-98 - Install all unit test to installation directory - +- MS-115 - Change is_startup of metric_config switch from true to on ## New Feature - MS-57 - Implement index load/search pipeline diff --git a/cpp/unittest/metrics/metrics_test.cpp b/cpp/unittest/metrics/metrics_test.cpp index 4346036efe..72596dc79e 100644 --- a/cpp/unittest/metrics/metrics_test.cpp +++ b/cpp/unittest/metrics/metrics_test.cpp @@ -30,7 +30,6 @@ TEST_F(DBTest, Metric_Tes) { server::SystemInfo::GetInstance().Init(); // server::Metrics::GetInstance().Init(); // server::Metrics::GetInstance().exposer_ptr()->RegisterCollectable(server::Metrics::GetInstance().registry_ptr()); - server::Metrics::GetInstance().Init(); // server::PrometheusMetrics::GetInstance().exposer_ptr()->RegisterCollectable(server::PrometheusMetrics::GetInstance().registry_ptr()); From a76e87112207c19cbe6345c8de0882852afcd217 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 26 Jun 2019 11:54:51 +0800 Subject: [PATCH 7/7] off Former-commit-id: db7db3c0c5c14b962cf34dd1ad8e3801ce56084e --- cpp/conf/server_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index 1c81a14c41..4ace334778 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -10,7 +10,7 @@ db_config: index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB metric_config: - is_startup: on # if monitoring start: on, off + is_startup: off # if monitoring start: on, off collector: prometheus # metrics collector: prometheus prometheus_config: # following are prometheus configure collect_type: pull # prometheus collect data method