diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 7145db04b2..a9386ff1eb 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -317,6 +317,7 @@ void DBImpl::StartMetricTask() { int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage*100/cache_total); + server::Metrics::GetInstance().GpuCacheUsageGaugeSet(); uint64_t size; Size(size); server::Metrics::GetInstance().DataFileSizeGaugeSet(size); diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index d0d6423688..99487c1b70 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -421,7 +421,7 @@ Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableInde if (groups.size() == 1) { index.engine_type_ = std::get<0>(groups[0]); index.nlist_ = std::get<1>(groups[0]); - index.metric_type_ = std::get<2>(groups[0]); + index.metric_type_ = std::get<3>(groups[0]); } else { return Status::NotFound("Table " + table_id + " not found"); } diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index a11bf14179..3fb947cb10 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -32,7 +32,7 @@ class MetricsBase{ virtual void BuildIndexDurationSecondsHistogramObserve(double value) {}; virtual void CpuCacheUsageGaugeSet(double value) {}; - virtual void GpuCacheUsageGaugeSet(double value) {}; + virtual void GpuCacheUsageGaugeSet() {}; virtual void MetaAccessTotalIncrement(double value = 1) {}; virtual void MetaAccessDurationSecondsHistogramObserve(double value) {}; diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 08dad64724..08f1fe7dc2 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -167,15 +167,26 @@ void PrometheusMetrics::CPUTemperature() { } } -void PrometheusMetrics::GpuCacheUsageGaugeSet(double value) { +void PrometheusMetrics::GpuCacheUsageGaugeSet() { if(!startup_) return; - int64_t num_processors = server::SystemInfo::GetInstance().num_processor(); + server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); + std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1"); - for (auto i = 0; i < num_processors; ++i) { -// int gpu_cache_usage = cache::GpuCacheMgr::GetInstance(i)->CacheUsage(); -// int gpu_cache_total = cache::GpuCacheMgr::GetInstance(i)->CacheCapacity(); -// prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); -// gpu_cache.Set(gpu_cache_usage * 100 / gpu_cache_total); + std::vector gpu_ids; + + std::stringstream ss(gpu_ids_str); + for (int i; ss >> i;) { + gpu_ids.push_back(i); + if (ss.peek() == ',') { + ss.ignore(); + } + } + + for(auto i = 0; i < gpu_ids.size(); ++i) { + uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage(); + uint64_t cache_capacity = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheCapacity(); + prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); + gpu_cache.Set(cache_usage * 100 / cache_capacity); } } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index ab37195583..f0fe74cfaa 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -55,7 +55,7 @@ class PrometheusMetrics: public MetricsBase { void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);}; void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);}; void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);}; - void GpuCacheUsageGaugeSet(double value) override; + void GpuCacheUsageGaugeSet() override; void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);}; void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);}; @@ -343,7 +343,7 @@ class PrometheusMetrics: public MetricsBase { .Help("current cache usage by bytes") .Register(*registry_); prometheus::Gauge &cpu_cache_usage_gauge_ = cpu_cache_usage_.Add({}); - + //record GPU cache usage and % prometheus::Family &gpu_cache_usage_ = prometheus::BuildGauge() .Name("gpu_cache_usage_bytes") diff --git a/cpp/unittest/metrics/metricbase_test.cpp b/cpp/unittest/metrics/metricbase_test.cpp index feda6d1bf9..10087b370a 100644 --- a/cpp/unittest/metrics/metricbase_test.cpp +++ b/cpp/unittest/metrics/metricbase_test.cpp @@ -22,7 +22,7 @@ TEST(MetricbaseTest, METRICBASE_TEST){ instance.IndexFileSizeHistogramObserve(1.0); instance.BuildIndexDurationSecondsHistogramObserve(1.0); instance.CpuCacheUsageGaugeSet(1.0); - instance.GpuCacheUsageGaugeSet(1.0); + instance.GpuCacheUsageGaugeSet(); instance.MetaAccessTotalIncrement(); instance.MetaAccessDurationSecondsHistogramObserve(1.0); instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0); diff --git a/cpp/unittest/metrics/prometheus_test.cpp b/cpp/unittest/metrics/prometheus_test.cpp index ece3cf9012..912e910f32 100644 --- a/cpp/unittest/metrics/prometheus_test.cpp +++ b/cpp/unittest/metrics/prometheus_test.cpp @@ -23,7 +23,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST){ instance.IndexFileSizeHistogramObserve(1.0); instance.BuildIndexDurationSecondsHistogramObserve(1.0); instance.CpuCacheUsageGaugeSet(1.0); - instance.GpuCacheUsageGaugeSet(1.0); + instance.GpuCacheUsageGaugeSet(); instance.MetaAccessTotalIncrement(); instance.MetaAccessDurationSecondsHistogramObserve(1.0); instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0);