mirror of https://github.com/milvus-io/milvus.git
Merge branch 'branch-0.4.0' into 'branch-0.4.0'
modify DescribeIndexTask bug and add GpuCache metrics See merge request megasearch/milvus!428 Former-commit-id: 081e84de414512fea88f1aaf6c8eff7e782fd1c6pull/191/head
commit
3c1dcb6f0a
|
@ -317,6 +317,7 @@ void DBImpl::StartMetricTask() {
|
|||
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
|
||||
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
|
||||
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage*100/cache_total);
|
||||
server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
|
||||
uint64_t size;
|
||||
Size(size);
|
||||
server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
|
||||
|
|
|
@ -421,7 +421,7 @@ Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableInde
|
|||
if (groups.size() == 1) {
|
||||
index.engine_type_ = std::get<0>(groups[0]);
|
||||
index.nlist_ = std::get<1>(groups[0]);
|
||||
index.metric_type_ = std::get<2>(groups[0]);
|
||||
index.metric_type_ = std::get<3>(groups[0]);
|
||||
} else {
|
||||
return Status::NotFound("Table " + table_id + " not found");
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ class MetricsBase{
|
|||
virtual void BuildIndexDurationSecondsHistogramObserve(double value) {};
|
||||
|
||||
virtual void CpuCacheUsageGaugeSet(double value) {};
|
||||
virtual void GpuCacheUsageGaugeSet(double value) {};
|
||||
virtual void GpuCacheUsageGaugeSet() {};
|
||||
|
||||
virtual void MetaAccessTotalIncrement(double value = 1) {};
|
||||
virtual void MetaAccessDurationSecondsHistogramObserve(double value) {};
|
||||
|
|
|
@ -167,15 +167,26 @@ void PrometheusMetrics::CPUTemperature() {
|
|||
}
|
||||
}
|
||||
|
||||
void PrometheusMetrics::GpuCacheUsageGaugeSet(double value) {
|
||||
void PrometheusMetrics::GpuCacheUsageGaugeSet() {
|
||||
if(!startup_) return;
|
||||
int64_t num_processors = server::SystemInfo::GetInstance().num_processor();
|
||||
server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
|
||||
std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1");
|
||||
|
||||
for (auto i = 0; i < num_processors; ++i) {
|
||||
// int gpu_cache_usage = cache::GpuCacheMgr::GetInstance(i)->CacheUsage();
|
||||
// int gpu_cache_total = cache::GpuCacheMgr::GetInstance(i)->CacheCapacity();
|
||||
// prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}});
|
||||
// gpu_cache.Set(gpu_cache_usage * 100 / gpu_cache_total);
|
||||
std::vector<uint64_t > gpu_ids;
|
||||
|
||||
std::stringstream ss(gpu_ids_str);
|
||||
for (int i; ss >> i;) {
|
||||
gpu_ids.push_back(i);
|
||||
if (ss.peek() == ',') {
|
||||
ss.ignore();
|
||||
}
|
||||
}
|
||||
|
||||
for(auto i = 0; i < gpu_ids.size(); ++i) {
|
||||
uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage();
|
||||
uint64_t cache_capacity = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheCapacity();
|
||||
prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}});
|
||||
gpu_cache.Set(cache_usage * 100 / cache_capacity);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ class PrometheusMetrics: public MetricsBase {
|
|||
void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);};
|
||||
void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);};
|
||||
void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);};
|
||||
void GpuCacheUsageGaugeSet(double value) override;
|
||||
void GpuCacheUsageGaugeSet() override;
|
||||
|
||||
void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);};
|
||||
void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);};
|
||||
|
@ -343,7 +343,7 @@ class PrometheusMetrics: public MetricsBase {
|
|||
.Help("current cache usage by bytes")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &cpu_cache_usage_gauge_ = cpu_cache_usage_.Add({});
|
||||
|
||||
|
||||
//record GPU cache usage and %
|
||||
prometheus::Family<prometheus::Gauge> &gpu_cache_usage_ = prometheus::BuildGauge()
|
||||
.Name("gpu_cache_usage_bytes")
|
||||
|
|
|
@ -22,7 +22,7 @@ TEST(MetricbaseTest, METRICBASE_TEST){
|
|||
instance.IndexFileSizeHistogramObserve(1.0);
|
||||
instance.BuildIndexDurationSecondsHistogramObserve(1.0);
|
||||
instance.CpuCacheUsageGaugeSet(1.0);
|
||||
instance.GpuCacheUsageGaugeSet(1.0);
|
||||
instance.GpuCacheUsageGaugeSet();
|
||||
instance.MetaAccessTotalIncrement();
|
||||
instance.MetaAccessDurationSecondsHistogramObserve(1.0);
|
||||
instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0);
|
||||
|
|
|
@ -23,7 +23,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST){
|
|||
instance.IndexFileSizeHistogramObserve(1.0);
|
||||
instance.BuildIndexDurationSecondsHistogramObserve(1.0);
|
||||
instance.CpuCacheUsageGaugeSet(1.0);
|
||||
instance.GpuCacheUsageGaugeSet(1.0);
|
||||
instance.GpuCacheUsageGaugeSet();
|
||||
instance.MetaAccessTotalIncrement();
|
||||
instance.MetaAccessDurationSecondsHistogramObserve(1.0);
|
||||
instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0);
|
||||
|
|
Loading…
Reference in New Issue