mirror of https://github.com/milvus-io/milvus.git
Merge branch 'without-grpc-version' into 'branch-0.3.1'
Without grpc version See merge request megasearch/milvus!292 Former-commit-id: 9f08a29c1b17a8e9f051b57055c334e674dd2464pull/191/head
commit
678897aa2d
|
@ -318,6 +318,9 @@ void DBImpl::StartMetricTask() {
|
|||
server::Metrics::GetInstance().GPUMemoryUsageGaugeSet();
|
||||
server::Metrics::GetInstance().OctetsSet();
|
||||
|
||||
server::Metrics::GetInstance().CPUCoreUsagePercentSet();
|
||||
|
||||
|
||||
ENGINE_LOG_TRACE << "Metric task finished";
|
||||
}
|
||||
|
||||
|
|
|
@ -64,6 +64,8 @@ class MetricsBase{
|
|||
virtual void ConnectionGaugeDecrement() {};
|
||||
virtual void KeepingAliveCounterIncrement(double value = 1) {};
|
||||
virtual void OctetsSet() {};
|
||||
|
||||
virtual void CPUCoreUsagePercentSet() {};
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -44,6 +44,8 @@ PrometheusMetrics::Init() {
|
|||
void
|
||||
PrometheusMetrics::CPUUsagePercentSet() {
|
||||
if(!startup_) return ;
|
||||
int numProcessor = server::SystemInfo::GetInstance().num_processor();
|
||||
|
||||
double usage_percent = server::SystemInfo::GetInstance().CPUPercent();
|
||||
CPU_usage_percent_.Set(usage_percent);
|
||||
}
|
||||
|
@ -58,36 +60,30 @@ PrometheusMetrics::RAMUsagePercentSet() {
|
|||
void
|
||||
PrometheusMetrics::GPUPercentGaugeSet() {
|
||||
if(!startup_) return;
|
||||
int numDevide = server::SystemInfo::GetInstance().num_device();
|
||||
std::vector<unsigned int> values = server::SystemInfo::GetInstance().GPUPercent();
|
||||
if(numDevide >= 1) GPU0_percent_gauge_.Set(static_cast<double>(values[0]));
|
||||
if(numDevide >= 2) GPU1_percent_gauge_.Set(static_cast<double>(values[1]));
|
||||
if(numDevide >= 3) GPU2_percent_gauge_.Set(static_cast<double>(values[2]));
|
||||
if(numDevide >= 4) GPU3_percent_gauge_.Set(static_cast<double>(values[3]));
|
||||
if(numDevide >= 5) GPU4_percent_gauge_.Set(static_cast<double>(values[4]));
|
||||
if(numDevide >= 6) GPU5_percent_gauge_.Set(static_cast<double>(values[5]));
|
||||
if(numDevide >= 7) GPU6_percent_gauge_.Set(static_cast<double>(values[6]));
|
||||
if(numDevide >= 8) GPU7_percent_gauge_.Set(static_cast<double>(values[7]));
|
||||
int numDevice = server::SystemInfo::GetInstance().num_device();
|
||||
std::vector<unsigned long long > used_total = server::SystemInfo::GetInstance().GPUMemoryTotal();
|
||||
std::vector<unsigned long long > used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed();
|
||||
|
||||
|
||||
for (int i = 0; i < numDevice; i++) {
|
||||
prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}});
|
||||
double percent = (double)used_memory[i] / (double)used_total[i];
|
||||
GPU_percent.Set(percent * 100);
|
||||
}
|
||||
|
||||
// to do
|
||||
}
|
||||
|
||||
void PrometheusMetrics::GPUMemoryUsageGaugeSet() {
|
||||
if(!startup_) return;
|
||||
std::vector<unsigned long long> values = server::SystemInfo::GetInstance().GPUMemoryUsed();
|
||||
constexpr unsigned long long MtoB = 1024*1024;
|
||||
int numDevice = values.size();
|
||||
int numDevice = server::SystemInfo::GetInstance().num_device();
|
||||
|
||||
if(numDevice >=1) GPU0_memory_usage_gauge_.Set(values[0]/MtoB);
|
||||
if(numDevice >=2) GPU1_memory_usage_gauge_.Set(values[1]/MtoB);
|
||||
if(numDevice >=3) GPU2_memory_usage_gauge_.Set(values[2]/MtoB);
|
||||
if(numDevice >=4) GPU3_memory_usage_gauge_.Set(values[3]/MtoB);
|
||||
if(numDevice >=5) GPU4_memory_usage_gauge_.Set(values[4]/MtoB);
|
||||
if(numDevice >=6) GPU5_memory_usage_gauge_.Set(values[5]/MtoB);
|
||||
if(numDevice >=7) GPU6_memory_usage_gauge_.Set(values[6]/MtoB);
|
||||
if(numDevice >=8) GPU7_memory_usage_gauge_.Set(values[7]/MtoB);
|
||||
for (int i = 0; i < numDevice; i++) {
|
||||
prometheus::Gauge &GPU_memory = GPU_memory_usage_.Add({{"DeviceNum", std::to_string(i)}});
|
||||
GPU_memory.Set(values[i] / MtoB);
|
||||
}
|
||||
|
||||
// to do
|
||||
}
|
||||
void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
|
||||
// MB/s
|
||||
|
@ -140,6 +136,17 @@ void PrometheusMetrics::OctetsSet() {
|
|||
outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second);
|
||||
}
|
||||
|
||||
void PrometheusMetrics::CPUCoreUsagePercentSet() {
|
||||
if (!startup_)
|
||||
return;
|
||||
|
||||
std::vector<double> cpu_core_percent = server::SystemInfo::GetInstance().CPUCorePercent();
|
||||
|
||||
for (int i = 0; i < cpu_core_percent.size(); i++) {
|
||||
prometheus::Gauge &core_percent = CPU_.Add({{"CPU", std::to_string(i)}});
|
||||
core_percent.Set(cpu_core_percent[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <prometheus/registry.h>
|
||||
#include <prometheus/exposer.h>
|
||||
#include <iostream>
|
||||
#include "server/ServerConfig.h"
|
||||
#include "MetricBase.h"
|
||||
|
||||
|
@ -78,6 +79,9 @@ class PrometheusMetrics: public MetricsBase {
|
|||
void QueryVectorResponseSummaryObserve(double value, int count = 1) override { if (startup_) for(int i = 0 ; i < count ; ++i) query_vector_response_summary_.Observe(value);};
|
||||
void QueryVectorResponsePerSecondGaugeSet(double value) override {if (startup_) query_vector_response_per_second_gauge_.Set(value);};
|
||||
void CPUUsagePercentSet() override ;
|
||||
|
||||
void CPUCoreUsagePercentSet() override;
|
||||
|
||||
void RAMUsagePercentSet() override ;
|
||||
void QueryResponsePerSecondGaugeSet(double value) override {if(startup_) query_response_per_second_gauge.Set(value);};
|
||||
void GPUPercentGaugeSet() override ;
|
||||
|
@ -322,7 +326,7 @@ class PrometheusMetrics: public MetricsBase {
|
|||
prometheus::Gauge &faiss_disk_load_IO_speed_gauge_ = faiss_disk_load_IO_speed_.Add({{"DB","Faiss"}});
|
||||
|
||||
|
||||
////all from CacheMgr.cpp
|
||||
////all from CacheMgr.cpp
|
||||
//record cache access count
|
||||
prometheus::Family<prometheus::Counter> &cache_access_ = prometheus::BuildCounter()
|
||||
.Name("cache_access_total")
|
||||
|
@ -392,7 +396,8 @@ class PrometheusMetrics: public MetricsBase {
|
|||
.Name("CPU_usage_percent")
|
||||
.Help("CPU usage percent by this this process")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({});
|
||||
prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({{"CPU", "0"}});
|
||||
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &RAM_ = prometheus::BuildGauge()
|
||||
.Name("RAM_usage_percent")
|
||||
|
@ -405,33 +410,12 @@ class PrometheusMetrics: public MetricsBase {
|
|||
.Name("Gpu_usage_percent")
|
||||
.Help("GPU_usage_percent ")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &GPU0_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "0"}});
|
||||
prometheus::Gauge &GPU1_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "1"}});
|
||||
prometheus::Gauge &GPU2_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "2"}});
|
||||
prometheus::Gauge &GPU3_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "3"}});
|
||||
prometheus::Gauge &GPU4_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "4"}});
|
||||
prometheus::Gauge &GPU5_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "5"}});
|
||||
prometheus::Gauge &GPU6_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "6"}});
|
||||
prometheus::Gauge &GPU7_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "7"}});
|
||||
// std::vector<prometheus::Gauge> GPU_percent_gauges_;
|
||||
|
||||
|
||||
|
||||
|
||||
//GPU Mempry used
|
||||
prometheus::Family<prometheus::Gauge> &GPU_memory_usage_ = prometheus::BuildGauge()
|
||||
.Name("GPU_memory_usage_total")
|
||||
.Help("GPU memory usage total ")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &GPU0_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "0"}});
|
||||
prometheus::Gauge &GPU1_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "1"}});
|
||||
prometheus::Gauge &GPU2_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "2"}});
|
||||
prometheus::Gauge &GPU3_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "3"}});
|
||||
prometheus::Gauge &GPU4_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "4"}});
|
||||
prometheus::Gauge &GPU5_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "5"}});
|
||||
prometheus::Gauge &GPU6_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "6"}});
|
||||
prometheus::Gauge &GPU7_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "7"}});
|
||||
// std::vector<prometheus::Gauge> GPU_memory_usage_gauges_;
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &query_index_type_per_second_ = prometheus::BuildGauge()
|
||||
.Name("query_index_throughtout_per_microsecond")
|
||||
|
|
|
@ -105,9 +105,65 @@ SystemInfo::GetProcessUsedMemory() {
|
|||
double
|
||||
SystemInfo::MemoryPercent() {
|
||||
if (!initialized_) Init();
|
||||
return GetProcessUsedMemory()*100/total_ram_;
|
||||
return (double)(GetProcessUsedMemory()*100)/(double)total_ram_;
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::vector<double>
|
||||
SystemInfo::CPUCorePercent() {
|
||||
std::vector<unsigned long long> prev_work_time_array;
|
||||
std::vector<unsigned long long> prev_total_time_array = getTotalCpuTime(prev_work_time_array);
|
||||
usleep(100000);
|
||||
std::vector<unsigned long long> cur_work_time_array;
|
||||
std::vector<unsigned long long> cur_total_time_array = getTotalCpuTime(cur_work_time_array);
|
||||
|
||||
std::vector<double> cpu_core_percent;
|
||||
for (int i = 0; i < num_processors_; i++) {
|
||||
double total_cpu_time = cur_total_time_array[i] - prev_total_time_array[i];
|
||||
double cpu_work_time = cur_work_time_array[i] - prev_work_time_array[i];
|
||||
cpu_core_percent.push_back((cpu_work_time / total_cpu_time) * 100);
|
||||
}
|
||||
return cpu_core_percent;
|
||||
}
|
||||
|
||||
std::vector<unsigned long long>
|
||||
SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array)
|
||||
{
|
||||
std::vector<unsigned long long> total_time_array;
|
||||
FILE* file = fopen("/proc/stat", "r");
|
||||
if (file == NULL) {
|
||||
perror("Could not open stat file");
|
||||
return total_time_array;
|
||||
}
|
||||
|
||||
unsigned long long user = 0, nice = 0, system = 0, idle = 0;
|
||||
unsigned long long iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0;
|
||||
|
||||
for (int i = 0; i < num_processors_; i++) {
|
||||
char buffer[1024];
|
||||
char* ret = fgets(buffer, sizeof(buffer) - 1, file);
|
||||
if (ret == NULL) {
|
||||
perror("Could not read stat file");
|
||||
fclose(file);
|
||||
return total_time_array;
|
||||
}
|
||||
|
||||
sscanf(buffer,
|
||||
"cpu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu",
|
||||
&user, &nice, &system, &idle, &iowait, &irq, &softirq, &steal, &guest, &guestnice);
|
||||
|
||||
work_time_array.push_back(user + nice + system);
|
||||
total_time_array.push_back(user + nice + system + idle + iowait + irq + softirq + steal);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return total_time_array;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
double
|
||||
SystemInfo::CPUPercent() {
|
||||
if (!initialized_) Init();
|
||||
|
@ -136,17 +192,17 @@ SystemInfo::CPUPercent() {
|
|||
}
|
||||
|
||||
|
||||
std::vector<unsigned int>
|
||||
SystemInfo::GPUPercent() {
|
||||
std::vector<unsigned long long>
|
||||
SystemInfo::GPUMemoryTotal() {
|
||||
// get GPU usage percent
|
||||
if(!initialized_) Init();
|
||||
std::vector<unsigned int> result;
|
||||
nvmlUtilization_t utilization;
|
||||
std::vector<unsigned long long > result;
|
||||
nvmlMemory_t nvmlMemory;
|
||||
for (int i = 0; i < num_device_; ++i) {
|
||||
nvmlDevice_t device;
|
||||
nvmlDeviceGetHandleByIndex(i, &device);
|
||||
nvmlDeviceGetUtilizationRates(device, &utilization);
|
||||
result.push_back(utilization.gpu);
|
||||
nvmlDeviceGetMemoryInfo(device, &nvmlMemory);
|
||||
result.push_back(nvmlMemory.total);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@ class SystemInfo {
|
|||
}
|
||||
|
||||
void Init();
|
||||
int num_processor() const { return num_processors_;};
|
||||
int num_device() const {return num_device_;};
|
||||
unsigned long long get_inoctets() { return in_octets_;};
|
||||
unsigned long long get_octets() { return out_octets_;};
|
||||
|
@ -59,9 +60,13 @@ class SystemInfo {
|
|||
double MemoryPercent();
|
||||
double CPUPercent();
|
||||
std::pair<unsigned long long , unsigned long long > Octets();
|
||||
std::vector<unsigned int> GPUPercent();
|
||||
std::vector<unsigned long long> GPUMemoryTotal();
|
||||
std::vector<unsigned long long> GPUMemoryUsed();
|
||||
|
||||
std::vector<double> CPUCorePercent();
|
||||
std::vector<unsigned long long> getTotalCpuTime(std::vector<unsigned long long> &workTime);
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue