limit the frequency of GetMetrics() log (#21514) (#21519)

Signed-off-by: yah01 <yang.cen@zilliz.com>
pull/21472/head
yah01 2023-01-04 20:03:35 +08:00 committed by GitHub
parent d88846b20c
commit 989ea16a20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 82 additions and 93 deletions

View File

@ -831,7 +831,7 @@ func (s *Server) ShowConfigurations(ctx context.Context, req *internalpb.ShowCon
func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if s.isClosed() { if s.isClosed() {
log.Warn("DataCoord.GetMetrics failed", log.Warn("DataCoord.GetMetrics failed",
zap.Int64("node_id", Params.DataCoordCfg.GetNodeID()), zap.Int64("nodeID", s.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(errDataCoordIsUnhealthy(Params.DataCoordCfg.GetNodeID()))) zap.Error(errDataCoordIsUnhealthy(Params.DataCoordCfg.GetNodeID())))
@ -848,7 +848,7 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
metricType, err := metricsinfo.ParseMetricType(req.Request) metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil { if err != nil {
log.Warn("DataCoord.GetMetrics failed to parse metric type", log.Warn("DataCoord.GetMetrics failed to parse metric type",
zap.Int64("node_id", Params.DataCoordCfg.GetNodeID()), zap.Int64("nodeID", s.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(err)) zap.Error(err))
@ -874,10 +874,10 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
}, nil }, nil
} }
log.Debug("DataCoord.GetMetrics", log.RatedDebug(60, "DataCoord.GetMetrics",
zap.Int64("node_id", Params.DataCoordCfg.GetNodeID()), zap.Int64("nodeID", s.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric_type", metricType), zap.String("metricType", metricType),
zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large
zap.Error(err)) zap.Error(err))
@ -885,9 +885,9 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
} }
log.RatedWarn(60.0, "DataCoord.GetMetrics failed, request metric type is not implemented yet", log.RatedWarn(60.0, "DataCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", Params.DataCoordCfg.GetNodeID()), zap.Int64("nodeID", s.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric_type", metricType)) zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
ComponentName: metricsinfo.ConstructComponentName(typeutil.DataCoordRole, Params.DataCoordCfg.GetNodeID()), ComponentName: metricsinfo.ConstructComponentName(typeutil.DataCoordRole, Params.DataCoordCfg.GetNodeID()),

View File

@ -760,14 +760,14 @@ func (node *DataNode) ShowConfigurations(ctx context.Context, req *internalpb.Sh
func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !node.isHealthy() { if !node.isHealthy() {
log.Warn("DataNode.GetMetrics failed", log.Warn("DataNode.GetMetrics failed",
zap.Int64("node_id", Params.DataNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(errDataNodeIsUnhealthy(Params.DataNodeCfg.GetNodeID()))) zap.Error(errDataNodeIsUnhealthy(node.session.ServerID)))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{ Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError, ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: msgDataNodeIsUnhealthy(Params.DataNodeCfg.GetNodeID()), Reason: msgDataNodeIsUnhealthy(node.session.ServerID),
}, },
}, nil }, nil
} }
@ -775,14 +775,14 @@ func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRe
metricType, err := metricsinfo.ParseMetricType(req.Request) metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil { if err != nil {
log.Warn("DataNode.GetMetrics failed to parse metric type", log.Warn("DataNode.GetMetrics failed to parse metric type",
zap.Int64("node_id", Params.DataNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(err)) zap.Error(err))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{ Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError, ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: fmt.Sprintf("datanode GetMetrics failed, nodeID=%d, err=%s", Params.DataNodeCfg.GetNodeID(), err.Error()), Reason: fmt.Sprintf("datanode GetMetrics failed, nodeID=%d, err=%s", node.session.ServerID, err.Error()),
}, },
}, nil }, nil
} }
@ -790,11 +790,11 @@ func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRe
if metricType == metricsinfo.SystemInfoMetrics { if metricType == metricsinfo.SystemInfoMetrics {
systemInfoMetrics, err := node.getSystemInfoMetrics(ctx, req) systemInfoMetrics, err := node.getSystemInfoMetrics(ctx, req)
if err != nil { if err != nil {
log.Warn("DataNode GetMetrics failed", zap.Int64("nodeID", Params.DataNodeCfg.GetNodeID()), zap.Error(err)) log.Warn("DataNode GetMetrics failed", zap.Int64("nodeID", node.session.ServerID), zap.Error(err))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{ Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError, ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: fmt.Sprintf("datanode GetMetrics failed, nodeID=%d, err=%s", Params.DataNodeCfg.GetNodeID(), err.Error()), Reason: fmt.Sprintf("datanode GetMetrics failed, nodeID=%d, err=%s", node.session.ServerID, err.Error()),
}, },
}, nil }, nil
} }
@ -802,8 +802,8 @@ func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRe
return systemInfoMetrics, nil return systemInfoMetrics, nil
} }
log.Debug("DataNode.GetMetrics failed, request metric type is not implemented yet", log.RatedWarn(60, "DataNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", Params.DataNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric_type", metricType)) zap.String("metric_type", metricType))

View File

@ -944,7 +944,7 @@ func (i *IndexCoord) ShowConfigurations(ctx context.Context, req *internalpb.Sho
// GetMetrics gets the metrics info of IndexCoord. // GetMetrics gets the metrics info of IndexCoord.
func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
log.Debug("IndexCoord.GetMetrics", zap.Int64("node id", i.serverID), zap.String("req", req.Request)) log.RatedInfo(60, "IndexCoord.GetMetrics", zap.Int64("nodeID", i.serverID), zap.String("req", req.Request))
if !i.isHealthy() { if !i.isHealthy() {
log.Warn(msgIndexCoordIsUnhealthy(i.serverID)) log.Warn(msgIndexCoordIsUnhealthy(i.serverID))
@ -961,7 +961,7 @@ func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReq
metricType, err := metricsinfo.ParseMetricType(req.Request) metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil { if err != nil {
log.Error("IndexCoord.GetMetrics failed to parse metric type", log.Error("IndexCoord.GetMetrics failed to parse metric type",
zap.Int64("node id", i.session.ServerID), zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(err)) zap.Error(err))
@ -974,35 +974,30 @@ func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReq
}, nil }, nil
} }
log.Debug("IndexCoord.GetMetrics",
zap.String("metric type", metricType))
if metricType == metricsinfo.SystemInfoMetrics { if metricType == metricsinfo.SystemInfoMetrics {
ret, err := i.metricsCacheManager.GetSystemInfoMetrics() metrics, err := i.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil { if err != nil {
return ret, nil // Miss cache
metrics, err = getSystemInfoMetrics(ctx, req, i)
} }
log.Warn("failed to get system info metrics from cache, recompute instead",
zap.Error(err))
metrics, err := getSystemInfoMetrics(ctx, req, i) log.RatedDebug(60, "IndexCoord.GetMetrics",
zap.Int64("nodeID", i.session.ServerID),
log.Debug("IndexCoord.GetMetrics",
zap.Int64("node id", i.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric type", metricType), zap.String("metricType", metricType),
zap.String("metrics", metrics.Response), // TODO(dragondriver): necessary? may be very large zap.String("metrics", metrics.Response), // TODO(dragondriver): necessary? may be very large
zap.Error(err)) zap.Error(err),
)
i.metricsCacheManager.UpdateSystemInfoMetrics(metrics) i.metricsCacheManager.UpdateSystemInfoMetrics(metrics)
return metrics, nil return metrics, nil
} }
log.Debug("IndexCoord.GetMetrics failed, request metric type is not implemented yet", log.RatedWarn(60, "IndexCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node id", i.session.ServerID), zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric type", metricType)) zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{ Status: &commonpb.Status{

View File

@ -213,7 +213,7 @@ func (i *IndexNode) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsReq
func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !i.isHealthy() { if !i.isHealthy() {
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed", log.Ctx(ctx).Warn("IndexNode.GetMetrics failed",
zap.Int64("node_id", Params.IndexNodeCfg.GetNodeID()), zap.Int64("nodeID", i.GetNodeID()),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(errIndexNodeIsUnhealthy(Params.IndexNodeCfg.GetNodeID()))) zap.Error(errIndexNodeIsUnhealthy(Params.IndexNodeCfg.GetNodeID())))
@ -229,7 +229,7 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
metricType, err := metricsinfo.ParseMetricType(req.Request) metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil { if err != nil {
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed to parse metric type", log.Ctx(ctx).Warn("IndexNode.GetMetrics failed to parse metric type",
zap.Int64("node_id", Params.IndexNodeCfg.GetNodeID()), zap.Int64("nodeID", i.GetNodeID()),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(err)) zap.Error(err))
@ -245,8 +245,8 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
if metricType == metricsinfo.SystemInfoMetrics { if metricType == metricsinfo.SystemInfoMetrics {
metrics, err := getSystemInfoMetrics(ctx, req, i) metrics, err := getSystemInfoMetrics(ctx, req, i)
log.Ctx(ctx).Debug("IndexNode.GetMetrics", log.Ctx(ctx).RatedDebug(60, "IndexNode.GetMetrics",
zap.Int64("node_id", Params.IndexNodeCfg.GetNodeID()), zap.Int64("nodeID", i.GetNodeID()),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric_type", metricType), zap.String("metric_type", metricType),
zap.Error(err)) zap.Error(err))
@ -254,8 +254,8 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
return metrics, nil return metrics, nil
} }
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed, request metric type is not implemented yet", log.Ctx(ctx).RatedWarn(60, "IndexNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", Params.IndexNodeCfg.GetNodeID()), zap.Int64("nodeID", i.GetNodeID()),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric_type", metricType)) zap.String("metric_type", metricType))

View File

@ -3658,13 +3658,18 @@ func (node *Proxy) RegisterLink(ctx context.Context, req *milvuspb.RegisterLinkR
// GetMetrics gets the metrics of proxy // GetMetrics gets the metrics of proxy
// TODO(dragondriver): cache the Metrics and set a retention to the cache // TODO(dragondriver): cache the Metrics and set a retention to the cache
func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
log.Debug("Proxy.GetMetrics", sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-GetMetrics")
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()), defer sp.Finish()
log := log.Ctx(ctx)
log.RatedDebug(60, "Proxy.GetMetrics",
zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request)) zap.String("req", req.Request))
if !node.checkHealthy() { if !node.checkHealthy() {
log.Warn("Proxy.GetMetrics failed", log.Warn("Proxy.GetMetrics failed",
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(errProxyIsUnhealthy(Params.ProxyCfg.GetNodeID()))) zap.Error(errProxyIsUnhealthy(Params.ProxyCfg.GetNodeID())))
@ -3680,7 +3685,7 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
metricType, err := metricsinfo.ParseMetricType(req.Request) metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil { if err != nil {
log.Warn("Proxy.GetMetrics failed to parse metric type", log.Warn("Proxy.GetMetrics failed to parse metric type",
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(err)) zap.Error(err))
@ -3693,9 +3698,6 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
}, nil }, nil
} }
log.Debug("Proxy.GetMetrics",
zap.String("metric_type", metricType))
req.Base = commonpbutil.NewMsgBase( req.Base = commonpbutil.NewMsgBase(
commonpbutil.WithMsgType(commonpb.MsgType_SystemInfo), commonpbutil.WithMsgType(commonpb.MsgType_SystemInfo),
commonpbutil.WithMsgID(0), commonpbutil.WithMsgID(0),
@ -3703,19 +3705,15 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
commonpbutil.WithSourceID(Params.ProxyCfg.GetNodeID()), commonpbutil.WithSourceID(Params.ProxyCfg.GetNodeID()),
) )
if metricType == metricsinfo.SystemInfoMetrics { if metricType == metricsinfo.SystemInfoMetrics {
ret, err := node.metricsCacheManager.GetSystemInfoMetrics() metrics, err := node.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil { if err != nil {
return ret, nil metrics, err = getSystemInfoMetrics(ctx, req, node)
} }
log.Debug("failed to get system info metrics from cache, recompute instead",
zap.Error(err))
metrics, err := getSystemInfoMetrics(ctx, req, node) log.RatedDebug(60, "Proxy.GetMetrics",
zap.Int64("nodeID", node.session.ServerID),
log.Debug("Proxy.GetMetrics",
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric_type", metricType), zap.String("metricType", metricType),
zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large
zap.Error(err)) zap.Error(err))
@ -3724,10 +3722,10 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
return metrics, nil return metrics, nil
} }
log.Warn("Proxy.GetMetrics failed, request metric type is not implemented yet", log.RatedWarn(60, "Proxy.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metric_type", metricType)) zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{ Status: &commonpb.Status{
@ -3741,6 +3739,13 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
// GetProxyMetrics gets the metrics of proxy, it's an internal interface which is different from GetMetrics interface, // GetProxyMetrics gets the metrics of proxy, it's an internal interface which is different from GetMetrics interface,
// because it only obtains the metrics of Proxy, not including the topological metrics of Query cluster and Data cluster. // because it only obtains the metrics of Proxy, not including the topological metrics of Query cluster and Data cluster.
func (node *Proxy) GetProxyMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (node *Proxy) GetProxyMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-GetProxyMetrics")
defer sp.Finish()
log := log.Ctx(ctx).With(
zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request))
if !node.checkHealthy() { if !node.checkHealthy() {
log.Warn("Proxy.GetProxyMetrics failed", log.Warn("Proxy.GetProxyMetrics failed",
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()), zap.Int64("node_id", Params.ProxyCfg.GetNodeID()),
@ -3794,17 +3799,13 @@ func (node *Proxy) GetProxyMetrics(ctx context.Context, req *milvuspb.GetMetrics
} }
log.Debug("Proxy.GetProxyMetrics", log.Debug("Proxy.GetProxyMetrics",
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()), zap.String("metricType", metricType))
zap.String("req", req.Request),
zap.String("metric_type", metricType))
return proxyMetrics, nil return proxyMetrics, nil
} }
log.Warn("Proxy.GetProxyMetrics failed, request metric type is not implemented yet", log.Warn("Proxy.GetProxyMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", Params.ProxyCfg.GetNodeID()), zap.String("metricType", metricType))
zap.String("req", req.Request),
zap.String("metric_type", metricType))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{ Status: &commonpb.Status{

View File

@ -540,9 +540,7 @@ func (s *Server) ShowConfigurations(ctx context.Context, req *internalpb.ShowCon
} }
func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
log := log.With(zap.Int64("msgID", req.Base.GetMsgID())) log.RatedDebug(60, "get metrics request received",
log.Debug("get metrics request received",
zap.String("metricType", req.GetRequest())) zap.String("metricType", req.GetRequest()))
if s.status.Load() != commonpb.StateCode_Healthy { if s.status.Load() != commonpb.StateCode_Healthy {

View File

@ -1175,7 +1175,7 @@ func (node *QueryNode) SyncReplicaSegments(ctx context.Context, req *querypb.Syn
func (node *QueryNode) ShowConfigurations(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) { func (node *QueryNode) ShowConfigurations(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) {
if !node.isHealthy() { if !node.isHealthy() {
log.Warn("QueryNode.ShowConfigurations failed", log.Warn("QueryNode.ShowConfigurations failed",
zap.Int64("nodeId", Params.QueryNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Pattern), zap.String("req", req.Pattern),
zap.Error(errQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()))) zap.Error(errQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID())))
@ -1195,7 +1195,7 @@ func (node *QueryNode) ShowConfigurations(ctx context.Context, req *internalpb.S
func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) { func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !node.isHealthy() { if !node.isHealthy() {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed", log.Ctx(ctx).Warn("QueryNode.GetMetrics failed",
zap.Int64("nodeId", Params.QueryNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(errQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()))) zap.Error(errQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID())))
@ -1211,7 +1211,7 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
metricType, err := metricsinfo.ParseMetricType(req.Request) metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil { if err != nil {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed to parse metric type", log.Ctx(ctx).Warn("QueryNode.GetMetrics failed to parse metric type",
zap.Int64("nodeId", Params.QueryNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.Error(err)) zap.Error(err))
@ -1227,7 +1227,7 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
queryNodeMetrics, err := getSystemInfoMetrics(ctx, req, node) queryNodeMetrics, err := getSystemInfoMetrics(ctx, req, node)
if err != nil { if err != nil {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed", log.Ctx(ctx).Warn("QueryNode.GetMetrics failed",
zap.Int64("NodeId", Params.QueryNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metricType", metricType), zap.String("metricType", metricType),
zap.Error(err)) zap.Error(err))
@ -1241,8 +1241,8 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
return queryNodeMetrics, nil return queryNodeMetrics, nil
} }
log.Ctx(ctx).Debug("QueryNode.GetMetrics failed, request metric type is not implemented yet", log.Ctx(ctx).RatedDebug(60, "QueryNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("NodeId", Params.QueryNodeCfg.GetNodeID()), zap.Int64("nodeID", node.session.ServerID),
zap.String("req", req.Request), zap.String("req", req.Request),
zap.String("metricType", metricType)) zap.String("metricType", metricType))

View File

@ -1403,41 +1403,36 @@ func (c *Core) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (
metricType, err := metricsinfo.ParseMetricType(in.Request) metricType, err := metricsinfo.ParseMetricType(in.Request)
if err != nil { if err != nil {
log.Warn("ParseMetricType failed", zap.String("role", typeutil.RootCoordRole), log.Warn("ParseMetricType failed", zap.String("role", typeutil.RootCoordRole),
zap.Int64("node_id", c.session.ServerID), zap.String("req", in.Request), zap.Error(err)) zap.Int64("nodeID", c.session.ServerID), zap.String("req", in.Request), zap.Error(err))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, "ParseMetricType failed: "+err.Error()), Status: failStatus(commonpb.ErrorCode_UnexpectedError, "ParseMetricType failed: "+err.Error()),
Response: "", Response: "",
}, nil }, nil
} }
log.Debug("GetMetrics success", zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType), zap.Int64("msgID", in.GetBase().GetMsgID()))
if metricType == metricsinfo.SystemInfoMetrics { if metricType == metricsinfo.SystemInfoMetrics {
ret, err := c.metricsCacheManager.GetSystemInfoMetrics() metrics, err := c.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil { if err != nil {
return ret, nil metrics, err = c.getSystemInfoMetrics(ctx, in)
} }
log.Debug("GetSystemInfoMetrics from cache failed, recompute instead", zap.String("role", typeutil.RootCoordRole),
zap.Int64("msgID", in.GetBase().GetMsgID()), zap.Error(err))
systemInfoMetrics, err := c.getSystemInfoMetrics(ctx, in)
if err != nil { if err != nil {
log.Warn("GetSystemInfoMetrics failed", zap.String("role", typeutil.RootCoordRole), log.Warn("GetSystemInfoMetrics failed",
zap.String("metric_type", metricType), zap.Int64("msgID", in.GetBase().GetMsgID()), zap.Error(err)) zap.String("role", typeutil.RootCoordRole),
zap.String("metricType", metricType),
zap.Error(err))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, fmt.Sprintf("getSystemInfoMetrics failed: %s", err.Error())), Status: failStatus(commonpb.ErrorCode_UnexpectedError, fmt.Sprintf("getSystemInfoMetrics failed: %s", err.Error())),
Response: "", Response: "",
}, nil }, nil
} }
c.metricsCacheManager.UpdateSystemInfoMetrics(systemInfoMetrics) c.metricsCacheManager.UpdateSystemInfoMetrics(metrics)
return systemInfoMetrics, err return metrics, err
} }
log.Warn("GetMetrics failed, metric type not implemented", zap.String("role", typeutil.RootCoordRole), log.RatedWarn(60, "GetMetrics failed, metric type not implemented", zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType), zap.Int64("msgID", in.GetBase().GetMsgID())) zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{ return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, metricsinfo.MsgUnimplementedMetric), Status: failStatus(commonpb.ErrorCode_UnexpectedError, metricsinfo.MsgUnimplementedMetric),