From 8e12445e66c3124ce9f643711f2001508649fc69 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Sat, 24 Aug 2019 19:10:53 +0800 Subject: [PATCH 1/4] add GpuCache Metrics Former-commit-id: 9da6fcb11f69302fb1c00bb16fe4c6f7b7699ee5 --- cpp/src/db/DBImpl.cpp | 1 + cpp/src/grpc/gen-milvus/milvus.pb.cc | 4 ++-- cpp/src/grpc/milvus.proto | 1 + cpp/src/metrics/MetricBase.h | 2 +- cpp/src/metrics/PrometheusMetrics.cpp | 25 ++++++++++++++++++------- cpp/src/metrics/PrometheusMetrics.h | 4 ++-- cpp/src/scheduler/ResourceMgr.cpp | 2 +- cpp/src/sdk/grpc/ClientProxy.cpp | 1 + 8 files changed, 27 insertions(+), 13 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 4299801cb9..3b448df8b7 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -312,6 +312,7 @@ void DBImpl::StartMetricTask() { int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage*100/cache_total); + server::Metrics::GetInstance().GpuCacheUsageGaugeSet(); uint64_t size; Size(size); server::Metrics::GetInstance().DataFileSizeGaugeSet(size); diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.cc b/cpp/src/grpc/gen-milvus/milvus.pb.cc index 25f115db18..9f72ae97a4 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.cc +++ b/cpp/src/grpc/gen-milvus/milvus.pb.cc @@ -576,7 +576,7 @@ const char descriptor_table_protodef_milvus_2eproto[] PROTOBUF_SECTION_VARIABLE( "rpc.Status\"\000\022B\n\rDescribeIndex\022\026.milvus.g" "rpc.TableName\032\027.milvus.grpc.IndexParam\"\000" "\022:\n\tDropIndex\022\026.milvus.grpc.TableName\032\023." - "milvus.grpc.Status\"\000b\006proto3" + "milvus.grpc.Status\"\000B\002H\001b\006proto3" ; static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_milvus_2eproto_deps[1] = { &::descriptor_table_status_2eproto, @@ -603,7 +603,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_mil static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_milvus_2eproto_once; static bool descriptor_table_milvus_2eproto_initialized = false; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_milvus_2eproto = { - &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2388, + &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2392, &descriptor_table_milvus_2eproto_once, descriptor_table_milvus_2eproto_sccs, descriptor_table_milvus_2eproto_deps, 17, 1, schemas, file_default_instances, TableStruct_milvus_2eproto::offsets, file_level_metadata_milvus_2eproto, 17, file_level_enum_descriptors_milvus_2eproto, file_level_service_descriptors_milvus_2eproto, diff --git a/cpp/src/grpc/milvus.proto b/cpp/src/grpc/milvus.proto index f8058c1fe4..65cdab2730 100644 --- a/cpp/src/grpc/milvus.proto +++ b/cpp/src/grpc/milvus.proto @@ -1,4 +1,5 @@ syntax = "proto3"; +option optimize_for = SPEED; import "status.proto"; diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index a11bf14179..3fb947cb10 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -32,7 +32,7 @@ class MetricsBase{ virtual void BuildIndexDurationSecondsHistogramObserve(double value) {}; virtual void CpuCacheUsageGaugeSet(double value) {}; - virtual void GpuCacheUsageGaugeSet(double value) {}; + virtual void GpuCacheUsageGaugeSet() {}; virtual void MetaAccessTotalIncrement(double value = 1) {}; virtual void MetaAccessDurationSecondsHistogramObserve(double value) {}; diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 08dad64724..08f1fe7dc2 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -167,15 +167,26 @@ void PrometheusMetrics::CPUTemperature() { } } -void PrometheusMetrics::GpuCacheUsageGaugeSet(double value) { +void PrometheusMetrics::GpuCacheUsageGaugeSet() { if(!startup_) return; - int64_t num_processors = server::SystemInfo::GetInstance().num_processor(); + server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); + std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1"); - for (auto i = 0; i < num_processors; ++i) { -// int gpu_cache_usage = cache::GpuCacheMgr::GetInstance(i)->CacheUsage(); -// int gpu_cache_total = cache::GpuCacheMgr::GetInstance(i)->CacheCapacity(); -// prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); -// gpu_cache.Set(gpu_cache_usage * 100 / gpu_cache_total); + std::vector gpu_ids; + + std::stringstream ss(gpu_ids_str); + for (int i; ss >> i;) { + gpu_ids.push_back(i); + if (ss.peek() == ',') { + ss.ignore(); + } + } + + for(auto i = 0; i < gpu_ids.size(); ++i) { + uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage(); + uint64_t cache_capacity = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheCapacity(); + prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); + gpu_cache.Set(cache_usage * 100 / cache_capacity); } } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index ab37195583..f0fe74cfaa 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -55,7 +55,7 @@ class PrometheusMetrics: public MetricsBase { void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);}; void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);}; void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);}; - void GpuCacheUsageGaugeSet(double value) override; + void GpuCacheUsageGaugeSet() override; void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);}; void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);}; @@ -343,7 +343,7 @@ class PrometheusMetrics: public MetricsBase { .Help("current cache usage by bytes") .Register(*registry_); prometheus::Gauge &cpu_cache_usage_gauge_ = cpu_cache_usage_.Add({}); - + //record GPU cache usage and % prometheus::Family &gpu_cache_usage_ = prometheus::BuildGauge() .Name("gpu_cache_usage_bytes") diff --git a/cpp/src/scheduler/ResourceMgr.cpp b/cpp/src/scheduler/ResourceMgr.cpp index 0ef8d7b01f..7d519cc7c3 100644 --- a/cpp/src/scheduler/ResourceMgr.cpp +++ b/cpp/src/scheduler/ResourceMgr.cpp @@ -60,7 +60,7 @@ ResourceMgr::Connect(const std::string &name1, const std::string &name2, Connect void ResourceMgr::Connect(ResourceWPtr &res1, ResourceWPtr &res2, Connection &connection) { - if (auto observe_a = res1.lock()) { + if (auto observe_a = res1.loc k()) { if (auto observe_b = res2.lock()) { observe_a->AddNeighbour(std::static_pointer_cast(observe_b), connection); observe_b->AddNeighbour(std::static_pointer_cast(observe_a), connection); diff --git a/cpp/src/sdk/grpc/ClientProxy.cpp b/cpp/src/sdk/grpc/ClientProxy.cpp index 6421a34d19..742f35ae99 100644 --- a/cpp/src/sdk/grpc/ClientProxy.cpp +++ b/cpp/src/sdk/grpc/ClientProxy.cpp @@ -184,6 +184,7 @@ ClientProxy::Insert(const std::string &table_name, for (auto &record : record_array) { ::milvus::grpc::RowRecord *grpc_record = insert_param.add_row_record_array(); +// memcpy(grpc_record->vector_data().data(), record.data.data(), record.data.size() * sizeof(float)); for (size_t i = 0; i < record.data.size(); i++) { grpc_record->add_vector_data(record.data[i]); } From c8b31c16a674e8b0616239a8cffef6a26ac0f276 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Sat, 24 Aug 2019 20:00:25 +0800 Subject: [PATCH 2/4] modify DescribeIndex bug Former-commit-id: 284bf1fc830ace11385fc96790eb9d525541295e --- cpp/src/db/meta/SqliteMetaImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index d0d6423688..99487c1b70 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -421,7 +421,7 @@ Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableInde if (groups.size() == 1) { index.engine_type_ = std::get<0>(groups[0]); index.nlist_ = std::get<1>(groups[0]); - index.metric_type_ = std::get<2>(groups[0]); + index.metric_type_ = std::get<3>(groups[0]); } else { return Status::NotFound("Table " + table_id + " not found"); } From 30fdd4a026798a8187be7bc1ca4f7e618974205b Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Sun, 25 Aug 2019 15:38:18 +0800 Subject: [PATCH 3/4] add metrics Former-commit-id: 3f894aacdc7eda6ef900d6aafcabffe8bf5a0cb4 --- cpp/src/grpc/gen-milvus/milvus.pb.cc | 4 ++-- cpp/src/grpc/milvus.proto | 1 - cpp/src/sdk/grpc/ClientProxy.cpp | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.cc b/cpp/src/grpc/gen-milvus/milvus.pb.cc index 7d85e50ccc..7cb053a9bb 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.cc +++ b/cpp/src/grpc/gen-milvus/milvus.pb.cc @@ -576,7 +576,7 @@ const char descriptor_table_protodef_milvus_2eproto[] PROTOBUF_SECTION_VARIABLE( "rpc.Status\"\000\022B\n\rDescribeIndex\022\026.milvus.g" "rpc.TableName\032\027.milvus.grpc.IndexParam\"\000" "\022:\n\tDropIndex\022\026.milvus.grpc.TableName\032\023." - "milvus.grpc.Status\"\000B\002H\001b\006proto3" + "milvus.grpc.Status\"\000b\006proto3" ; static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_milvus_2eproto_deps[1] = { &::descriptor_table_status_2eproto, @@ -603,7 +603,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_mil static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_milvus_2eproto_once; static bool descriptor_table_milvus_2eproto_initialized = false; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_milvus_2eproto = { - &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2392, + &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2388, &descriptor_table_milvus_2eproto_once, descriptor_table_milvus_2eproto_sccs, descriptor_table_milvus_2eproto_deps, 17, 1, schemas, file_default_instances, TableStruct_milvus_2eproto::offsets, file_level_metadata_milvus_2eproto, 17, file_level_enum_descriptors_milvus_2eproto, file_level_service_descriptors_milvus_2eproto, diff --git a/cpp/src/grpc/milvus.proto b/cpp/src/grpc/milvus.proto index 838a5e5761..ff95a305ab 100644 --- a/cpp/src/grpc/milvus.proto +++ b/cpp/src/grpc/milvus.proto @@ -1,5 +1,4 @@ syntax = "proto3"; -option optimize_for = SPEED; import "status.proto"; diff --git a/cpp/src/sdk/grpc/ClientProxy.cpp b/cpp/src/sdk/grpc/ClientProxy.cpp index 2185c06d8f..a135910383 100644 --- a/cpp/src/sdk/grpc/ClientProxy.cpp +++ b/cpp/src/sdk/grpc/ClientProxy.cpp @@ -184,7 +184,6 @@ ClientProxy::Insert(const std::string &table_name, for (auto &record : record_array) { ::milvus::grpc::RowRecord *grpc_record = insert_param.add_row_record_array(); -// memcpy(grpc_record->vector_data().data(), record.data.data(), record.data.size() * sizeof(float)); for (size_t i = 0; i < record.data.size(); i++) { grpc_record->add_vector_data(record.data[i]); } From e089b488e0497c611f999484ef3eae4e0ef655b0 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Sun, 25 Aug 2019 16:23:22 +0800 Subject: [PATCH 4/4] fix metricbase_test bugs Former-commit-id: 720470f165ab92744b6c4c1caa73092b3b27103e --- cpp/src/scheduler/ResourceMgr.cpp | 2 +- cpp/unittest/metrics/metricbase_test.cpp | 2 +- cpp/unittest/metrics/prometheus_test.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/scheduler/ResourceMgr.cpp b/cpp/src/scheduler/ResourceMgr.cpp index 2abeaae3b3..96e2589382 100644 --- a/cpp/src/scheduler/ResourceMgr.cpp +++ b/cpp/src/scheduler/ResourceMgr.cpp @@ -60,7 +60,7 @@ ResourceMgr::Connect(const std::string &name1, const std::string &name2, Connect void ResourceMgr::Connect(ResourceWPtr &res1, ResourceWPtr &res2, Connection &connection) { - if (auto observe_a = res1.loc k()) { + if (auto observe_a = res1.lock()) { if (auto observe_b = res2.lock()) { observe_a->AddNeighbour(std::static_pointer_cast(observe_b), connection); observe_b->AddNeighbour(std::static_pointer_cast(observe_a), connection); diff --git a/cpp/unittest/metrics/metricbase_test.cpp b/cpp/unittest/metrics/metricbase_test.cpp index feda6d1bf9..10087b370a 100644 --- a/cpp/unittest/metrics/metricbase_test.cpp +++ b/cpp/unittest/metrics/metricbase_test.cpp @@ -22,7 +22,7 @@ TEST(MetricbaseTest, METRICBASE_TEST){ instance.IndexFileSizeHistogramObserve(1.0); instance.BuildIndexDurationSecondsHistogramObserve(1.0); instance.CpuCacheUsageGaugeSet(1.0); - instance.GpuCacheUsageGaugeSet(1.0); + instance.GpuCacheUsageGaugeSet(); instance.MetaAccessTotalIncrement(); instance.MetaAccessDurationSecondsHistogramObserve(1.0); instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0); diff --git a/cpp/unittest/metrics/prometheus_test.cpp b/cpp/unittest/metrics/prometheus_test.cpp index ece3cf9012..912e910f32 100644 --- a/cpp/unittest/metrics/prometheus_test.cpp +++ b/cpp/unittest/metrics/prometheus_test.cpp @@ -23,7 +23,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST){ instance.IndexFileSizeHistogramObserve(1.0); instance.BuildIndexDurationSecondsHistogramObserve(1.0); instance.CpuCacheUsageGaugeSet(1.0); - instance.GpuCacheUsageGaugeSet(1.0); + instance.GpuCacheUsageGaugeSet(); instance.MetaAccessTotalIncrement(); instance.MetaAccessDurationSecondsHistogramObserve(1.0); instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0);