limit the frequency of GetMetrics() log (#21514)

Signed-off-by: yah01 <yang.cen@zilliz.com>
pull/21517/head
yah01 2023-01-04 17:39:35 +08:00 committed by GitHub
parent bf3c02155a
commit 7b39873ae0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 63 additions and 84 deletions

View File

@ -860,7 +860,7 @@ func (s *Server) ShowConfigurations(ctx context.Context, req *internalpb.ShowCon
func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if s.isClosed() {
log.Warn("DataCoord.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errDataCoordIsUnhealthy(paramtable.GetNodeID())))
@ -877,7 +877,7 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Warn("DataCoord.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
@ -903,10 +903,10 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
}, nil
}
log.Debug("DataCoord.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedDebug(60, "DataCoord.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType),
zap.String("metricType", metricType),
zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large
zap.Error(err))
@ -914,9 +914,9 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
}
log.RatedWarn(60.0, "DataCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
ComponentName: metricsinfo.ConstructComponentName(typeutil.DataCoordRole, paramtable.GetNodeID()),

View File

@ -240,7 +240,7 @@ func (node *DataNode) ShowConfigurations(ctx context.Context, req *internalpb.Sh
func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !node.isHealthy() {
log.Warn("DataNode.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errDataNodeIsUnhealthy(paramtable.GetNodeID())))
@ -255,7 +255,7 @@ func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRe
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Warn("DataNode.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
@ -282,8 +282,8 @@ func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRe
return systemInfoMetrics, nil
}
log.Debug("DataNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedWarn(60, "DataNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))

View File

@ -967,7 +967,7 @@ func (i *IndexCoord) ShowConfigurations(ctx context.Context, req *internalpb.Sho
// GetMetrics gets the metrics info of IndexCoord.
func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
log.RatedInfo(5, "IndexCoord.GetMetrics", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("req", req.Request))
log.RatedInfo(60, "IndexCoord.GetMetrics", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("req", req.Request))
if !i.isHealthy() {
log.Warn(msgIndexCoordIsUnhealthy(paramtable.GetNodeID()))
@ -984,7 +984,7 @@ func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReq
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Error("IndexCoord.GetMetrics failed to parse metric type",
zap.Int64("node id", i.session.ServerID),
zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request),
zap.Error(err))
@ -997,35 +997,30 @@ func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReq
}, nil
}
log.Debug("IndexCoord.GetMetrics",
zap.String("metric type", metricType))
if metricType == metricsinfo.SystemInfoMetrics {
ret, err := i.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil {
return ret, nil
metrics, err := i.metricsCacheManager.GetSystemInfoMetrics()
if err != nil {
// Miss cache
metrics, err = getSystemInfoMetrics(ctx, req, i)
}
log.Warn("failed to get system info metrics from cache, recompute instead",
zap.Error(err))
metrics, err := getSystemInfoMetrics(ctx, req, i)
log.Debug("IndexCoord.GetMetrics",
zap.Int64("node id", i.session.ServerID),
log.RatedDebug(60, "IndexCoord.GetMetrics",
zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request),
zap.String("metric type", metricType),
zap.String("metricType", metricType),
zap.String("metrics", metrics.Response), // TODO(dragondriver): necessary? may be very large
zap.Error(err))
zap.Error(err),
)
i.metricsCacheManager.UpdateSystemInfoMetrics(metrics)
return metrics, nil
}
log.Warn("IndexCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node id", i.session.ServerID),
log.RatedWarn(60, "IndexCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request),
zap.String("metric type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{

View File

@ -230,7 +230,7 @@ func (i *IndexNode) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsReq
func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !commonpbutil.IsHealthyOrStopping(i.stateCode) {
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errIndexNodeIsUnhealthy(paramtable.GetNodeID())))
@ -246,7 +246,7 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
@ -262,8 +262,8 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
if metricType == metricsinfo.SystemInfoMetrics {
metrics, err := getSystemInfoMetrics(ctx, req, i)
log.Ctx(ctx).Debug("IndexNode.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.Ctx(ctx).RatedDebug(60, "IndexNode.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType),
zap.Error(err))
@ -271,8 +271,8 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
return metrics, nil
}
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
log.Ctx(ctx).RatedWarn(60, "IndexNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))

View File

@ -3220,13 +3220,13 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
log := log.Ctx(ctx)
log.Debug("Proxy.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedDebug(60, "Proxy.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request))
if !node.checkHealthy() {
log.Warn("Proxy.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errProxyIsUnhealthy(paramtable.GetNodeID())))
@ -3242,7 +3242,7 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Warn("Proxy.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
@ -3255,28 +3255,21 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
}, nil
}
log.Debug("Proxy.GetMetrics",
zap.String("metric_type", metricType))
req.Base = commonpbutil.NewMsgBase(
commonpbutil.WithMsgType(commonpb.MsgType_SystemInfo),
commonpbutil.WithMsgID(0),
commonpbutil.WithSourceID(paramtable.GetNodeID()),
)
if metricType == metricsinfo.SystemInfoMetrics {
ret, err := node.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil {
return ret, nil
metrics, err := node.metricsCacheManager.GetSystemInfoMetrics()
if err != nil {
metrics, err = getSystemInfoMetrics(ctx, req, node)
}
log.Debug("failed to get system info metrics from cache, recompute instead",
zap.Error(err))
metrics, err := getSystemInfoMetrics(ctx, req, node)
log.Debug("Proxy.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedDebug(60, "Proxy.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType),
zap.String("metricType", metricType),
zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large
zap.Error(err))
@ -3285,10 +3278,10 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
return metrics, nil
}
log.Warn("Proxy.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedWarn(60, "Proxy.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{
@ -3306,7 +3299,7 @@ func (node *Proxy) GetProxyMetrics(ctx context.Context, req *milvuspb.GetMetrics
defer sp.Finish()
log := log.Ctx(ctx).With(
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request))
if !node.checkHealthy() {
@ -3355,13 +3348,13 @@ func (node *Proxy) GetProxyMetrics(ctx context.Context, req *milvuspb.GetMetrics
}
log.Debug("Proxy.GetProxyMetrics",
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return proxyMetrics, nil
}
log.Warn("Proxy.GetProxyMetrics failed, request metric type is not implemented yet",
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{

View File

@ -550,7 +550,7 @@ func (s *Server) ShowConfigurations(ctx context.Context, req *internalpb.ShowCon
func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
log := log.Ctx(ctx)
log.Debug("get metrics request received",
log.RatedDebug(60, "get metrics request received",
zap.String("metricType", req.GetRequest()))
if s.status.Load() != commonpb.StateCode_Healthy {

View File

@ -1208,7 +1208,7 @@ func (node *QueryNode) SyncReplicaSegments(ctx context.Context, req *querypb.Syn
func (node *QueryNode) ShowConfigurations(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) {
if !node.isHealthyOrStopping() {
log.Warn("QueryNode.ShowConfigurations failed",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Pattern),
zap.Error(errQueryNodeIsUnhealthy(paramtable.GetNodeID())))
@ -1245,7 +1245,7 @@ func (node *QueryNode) ShowConfigurations(ctx context.Context, req *internalpb.S
func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !node.isHealthyOrStopping() {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errQueryNodeIsUnhealthy(paramtable.GetNodeID())))
@ -1263,7 +1263,7 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed to parse metric type",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
@ -1279,7 +1279,7 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
queryNodeMetrics, err := getSystemInfoMetrics(ctx, req, node)
if err != nil {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metricType", metricType),
zap.Error(err))
@ -1293,8 +1293,8 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
return queryNodeMetrics, nil
}
log.Ctx(ctx).Debug("QueryNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeId", paramtable.GetNodeID()),
log.Ctx(ctx).RatedDebug(60, "QueryNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metricType", metricType))

View File

@ -1479,32 +1479,23 @@ func (c *Core) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (
metricType, err := metricsinfo.ParseMetricType(in.Request)
if err != nil {
log.Warn("ParseMetricType failed", zap.String("role", typeutil.RootCoordRole),
zap.Int64("node_id", c.session.ServerID), zap.String("req", in.Request), zap.Error(err))
zap.Int64("nodeID", c.session.ServerID), zap.String("req", in.Request), zap.Error(err))
return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, "ParseMetricType failed: "+err.Error()),
Response: "",
}, nil
}
log.Ctx(ctx).Debug("GetMetrics success",
zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType))
if metricType == metricsinfo.SystemInfoMetrics {
ret, err := c.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil {
return ret, nil
metrics, err := c.metricsCacheManager.GetSystemInfoMetrics()
if err != nil {
metrics, err = c.getSystemInfoMetrics(ctx, in)
}
log.Warn("GetSystemInfoMetrics from cache failed",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err))
systemInfoMetrics, err := c.getSystemInfoMetrics(ctx, in)
if err != nil {
log.Warn("GetSystemInfoMetrics failed",
zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType),
zap.String("metricType", metricType),
zap.Error(err))
return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, fmt.Sprintf("getSystemInfoMetrics failed: %s", err.Error())),
@ -1512,12 +1503,12 @@ func (c *Core) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (
}, nil
}
c.metricsCacheManager.UpdateSystemInfoMetrics(systemInfoMetrics)
return systemInfoMetrics, err
c.metricsCacheManager.UpdateSystemInfoMetrics(metrics)
return metrics, err
}
log.Warn("GetMetrics failed, metric type not implemented", zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType))
log.RatedWarn(60, "GetMetrics failed, metric type not implemented", zap.String("role", typeutil.RootCoordRole),
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, metricsinfo.MsgUnimplementedMetric),