Fix bugs in monitoring metrics (#17894)

Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>
pull/17959/head
zhenshan.cao 2022-06-30 17:26:19 +08:00 committed by GitHub
parent e1839e3534
commit 0baeb609dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 29 additions and 32 deletions

View File

@ -85,7 +85,7 @@ func (m *meta) reloadFromKV() error {
} }
state := segmentInfo.GetState() state := segmentInfo.GetState()
m.segments.SetSegment(segmentInfo.GetID(), NewSegmentInfo(segmentInfo)) m.segments.SetSegment(segmentInfo.GetID(), NewSegmentInfo(segmentInfo))
metrics.DataCoordNumSegments.WithLabelValues(string(state)).Inc() metrics.DataCoordNumSegments.WithLabelValues(state.String()).Inc()
if state == commonpb.SegmentState_Flushed { if state == commonpb.SegmentState_Flushed {
numStoredRows += segmentInfo.GetNumOfRows() numStoredRows += segmentInfo.GetNumOfRows()
} }
@ -175,7 +175,7 @@ func (m *meta) AddSegment(segment *SegmentInfo) error {
if err := m.saveSegmentInfo(segment); err != nil { if err := m.saveSegmentInfo(segment); err != nil {
return err return err
} }
metrics.DataCoordNumSegments.WithLabelValues(string(segment.GetState())).Inc() metrics.DataCoordNumSegments.WithLabelValues(segment.GetState().String()).Inc()
return nil return nil
} }
@ -233,8 +233,8 @@ func (m *meta) SetState(segmentID UniqueID, state commonpb.SegmentState) error {
if curSegInfo != nil && isSegmentHealthy(curSegInfo) { if curSegInfo != nil && isSegmentHealthy(curSegInfo) {
err := m.saveSegmentInfo(curSegInfo) err := m.saveSegmentInfo(curSegInfo)
if err == nil { if err == nil {
metrics.DataCoordNumSegments.WithLabelValues(string(oldState)).Dec() metrics.DataCoordNumSegments.WithLabelValues(oldState.String()).Dec()
metrics.DataCoordNumSegments.WithLabelValues(string(state)).Inc() metrics.DataCoordNumSegments.WithLabelValues(state.String()).Inc()
if state == commonpb.SegmentState_Flushed { if state == commonpb.SegmentState_Flushed {
metrics.DataCoordNumStoredRows.WithLabelValues().Add(float64(curSegInfo.GetNumOfRows())) metrics.DataCoordNumStoredRows.WithLabelValues().Add(float64(curSegInfo.GetNumOfRows()))
metrics.DataCoordNumStoredRowsCounter.WithLabelValues().Add(float64(curSegInfo.GetNumOfRows())) metrics.DataCoordNumStoredRowsCounter.WithLabelValues().Add(float64(curSegInfo.GetNumOfRows()))
@ -397,8 +397,8 @@ func (m *meta) UpdateFlushSegmentsInfo(
} }
oldSegmentState := segment.GetState() oldSegmentState := segment.GetState()
newSegmentState := clonedSegment.GetState() newSegmentState := clonedSegment.GetState()
metrics.DataCoordNumSegments.WithLabelValues(string(oldSegmentState)).Dec() metrics.DataCoordNumSegments.WithLabelValues(oldSegmentState.String()).Dec()
metrics.DataCoordNumSegments.WithLabelValues(string(newSegmentState)).Inc() metrics.DataCoordNumSegments.WithLabelValues(newSegmentState.String()).Inc()
if newSegmentState == commonpb.SegmentState_Flushed { if newSegmentState == commonpb.SegmentState_Flushed {
metrics.DataCoordNumStoredRows.WithLabelValues().Add(float64(clonedSegment.GetNumOfRows())) metrics.DataCoordNumStoredRows.WithLabelValues().Add(float64(clonedSegment.GetNumOfRows()))
metrics.DataCoordNumStoredRowsCounter.WithLabelValues().Add(float64(clonedSegment.GetNumOfRows())) metrics.DataCoordNumStoredRowsCounter.WithLabelValues().Add(float64(clonedSegment.GetNumOfRows()))
@ -448,7 +448,7 @@ func (m *meta) UpdateDropChannelSegmentInfo(channel string, segments []*SegmentI
for _, seg := range originSegments { for _, seg := range originSegments {
state := seg.GetState() state := seg.GetState()
metrics.DataCoordNumSegments.WithLabelValues( metrics.DataCoordNumSegments.WithLabelValues(
string(state)).Dec() state.String()).Dec()
if state == commonpb.SegmentState_Flushed { if state == commonpb.SegmentState_Flushed {
metrics.DataCoordNumStoredRows.WithLabelValues().Sub(float64(seg.GetNumOfRows())) metrics.DataCoordNumStoredRows.WithLabelValues().Sub(float64(seg.GetNumOfRows()))
} }

View File

@ -44,6 +44,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/milvuspb" "github.com/milvus-io/milvus/internal/proto/milvuspb"
"github.com/milvus-io/milvus/internal/types" "github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/dependency" "github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/internal/util/funcutil"
"github.com/milvus-io/milvus/internal/util/logutil" "github.com/milvus-io/milvus/internal/util/logutil"
"github.com/milvus-io/milvus/internal/util/metricsinfo" "github.com/milvus-io/milvus/internal/util/metricsinfo"
"github.com/milvus-io/milvus/internal/util/paramtable" "github.com/milvus-io/milvus/internal/util/paramtable"
@ -575,7 +576,9 @@ func (s *Server) handleTimetickMessage(ctx context.Context, ttMsg *msgstream.Dat
} }
utcT, _ := tsoutil.ParseHybridTs(ts) utcT, _ := tsoutil.ParseHybridTs(ts)
metrics.DataCoordSyncEpoch.WithLabelValues(ch).Set(float64(utcT))
pChannelName := funcutil.ToPhysicalChannel(ch)
metrics.DataCoordSyncEpoch.WithLabelValues(pChannelName).Set(float64(utcT))
s.updateSegmentStatistics(ttMsg.GetSegmentsStats()) s.updateSegmentStatistics(ttMsg.GetSegmentsStats())

View File

@ -159,7 +159,7 @@ func (dn *deleteNode) bufferDeleteMsg(msg *msgstream.DeleteMsg, tr TimeRange) er
// store // store
delDataBuf.updateSize(int64(rows)) delDataBuf.updateSize(int64(rows))
metrics.DataNodeConsumeMsgRowsCount.WithLabelValues(metrics.DeleteLabel, fmt.Sprint(Params.DataNodeCfg.GetNodeID())).Add(float64(rows)) metrics.DataNodeConsumeMsgRowsCount.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.GetNodeID()), metrics.DeleteLabel).Add(float64(rows))
delDataBuf.updateTimeRange(tr) delDataBuf.updateTimeRange(tr)
dn.delBuf.Store(segID, delDataBuf) dn.delBuf.Store(segID, delDataBuf)
} }

View File

@ -215,7 +215,7 @@ func (replica *SegmentReplica) new2FlushedSegment(segID UniqueID) {
replica.flushedSegments[segID] = &seg replica.flushedSegments[segID] = &seg
delete(replica.newSegments, segID) delete(replica.newSegments, segID)
metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.NodeID)).Dec() metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.GetNodeID())).Dec()
} }
// normal2FlushedSegment transfers a segment from *normal* to *flushed* by changing *isFlushed* // normal2FlushedSegment transfers a segment from *normal* to *flushed* by changing *isFlushed*
@ -287,7 +287,7 @@ func (replica *SegmentReplica) addNewSegment(segID, collID, partitionID UniqueID
replica.segMu.Lock() replica.segMu.Lock()
defer replica.segMu.Unlock() defer replica.segMu.Unlock()
replica.newSegments[segID] = seg replica.newSegments[segID] = seg
metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc() metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.GetNodeID())).Inc()
return nil return nil
} }
@ -381,7 +381,7 @@ func (replica *SegmentReplica) addNormalSegment(segID, collID, partitionID Uniqu
replica.segMu.Lock() replica.segMu.Lock()
replica.normalSegments[segID] = seg replica.normalSegments[segID] = seg
replica.segMu.Unlock() replica.segMu.Unlock()
metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc() metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.GetNodeID())).Inc()
return nil return nil
} }
@ -579,7 +579,7 @@ func (replica *SegmentReplica) removeSegments(segIDs ...UniqueID) {
cnt++ cnt++
} }
} }
metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.NodeID)).Sub(float64(cnt)) metrics.DataNodeNumUnflushedSegments.WithLabelValues(fmt.Sprint(Params.DataNodeCfg.GetNodeID())).Sub(float64(cnt))
for _, segID := range segIDs { for _, segID := range segIDs {
delete(replica.newSegments, segID) delete(replica.newSegments, segID)

View File

@ -137,5 +137,6 @@ func RegisterDataCoord(registry *prometheus.Registry) {
registry.MustRegister(DataCoordNumSegments) registry.MustRegister(DataCoordNumSegments)
registry.MustRegister(DataCoordNumCollections) registry.MustRegister(DataCoordNumCollections)
registry.MustRegister(DataCoordNumStoredRows) registry.MustRegister(DataCoordNumStoredRows)
registry.MustRegister(DataCoordNumStoredRowsCounter)
registry.MustRegister(DataCoordSyncEpoch) registry.MustRegister(DataCoordSyncEpoch)
} }

View File

@ -45,21 +45,11 @@ var (
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Namespace: milvusNamespace, Namespace: milvusNamespace,
Subsystem: typeutil.ProxyRole, Subsystem: typeutil.ProxyRole,
Name: "sq_lantency", Name: "sq_latency",
Help: "latency of search", Help: "latency of search",
Buckets: buckets, Buckets: buckets,
}, []string{nodeIDLabelName, queryTypeLabelName}) }, []string{nodeIDLabelName, queryTypeLabelName})
// ProxySendSQReqLatency record the latency that the proxy sent the search request to the message stream.
ProxySendSQReqLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.ProxyRole,
Name: "sq_send_latency",
Help: "latency that proxy sent the search request to the message stream",
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, queryTypeLabelName})
// ProxyWaitForSearchResultLatency record the time that the proxy waits for the search result. // ProxyWaitForSearchResultLatency record the time that the proxy waits for the search result.
ProxyWaitForSearchResultLatency = prometheus.NewHistogramVec( ProxyWaitForSearchResultLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
@ -249,7 +239,6 @@ func RegisterProxy(registry *prometheus.Registry) {
registry.MustRegister(ProxyInsertVectors) registry.MustRegister(ProxyInsertVectors)
registry.MustRegister(ProxySearchLatency) registry.MustRegister(ProxySearchLatency)
registry.MustRegister(ProxySendSQReqLatency)
registry.MustRegister(ProxyWaitForSearchResultLatency) registry.MustRegister(ProxyWaitForSearchResultLatency)
registry.MustRegister(ProxyReduceResultLatency) registry.MustRegister(ProxyReduceResultLatency)
registry.MustRegister(ProxyDecodeResultLatency) registry.MustRegister(ProxyDecodeResultLatency)

View File

@ -110,7 +110,7 @@ var (
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Namespace: milvusNamespace, Namespace: milvusNamespace,
Subsystem: typeutil.QueryNodeRole, Subsystem: typeutil.QueryNodeRole,
Name: "sq_queue_lantency", Name: "sq_queue_latency",
Help: "latency of search or query in queue", Help: "latency of search or query in queue",
Buckets: buckets, Buckets: buckets,
}, []string{ }, []string{
@ -285,7 +285,7 @@ var (
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: milvusNamespace, Namespace: milvusNamespace,
Subsystem: typeutil.QueryNodeRole, Subsystem: typeutil.QueryNodeRole,
Name: "entities_num", Name: "entity_num",
Help: "number of entities which can be searched/queried", Help: "number of entities which can be searched/queried",
}, []string{ }, []string{
nodeIDLabelName, nodeIDLabelName,

View File

@ -27,7 +27,7 @@ var (
Subsystem: typeutil.RootCoordRole, Subsystem: typeutil.RootCoordRole,
Name: "sync_epoch_time", Name: "sync_epoch_time",
Help: "synchronized unix epoch per physical channel", Help: "synchronized unix epoch per physical channel",
}, []string{"PChannel"}) }, []string{channelNameLabelName})
RootCoordDDLReqCounter = prometheus.NewCounterVec( RootCoordDDLReqCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{

View File

@ -22,6 +22,7 @@ import (
"github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/metrics" "github.com/milvus-io/milvus/internal/metrics"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/util/timerecord" "github.com/milvus-io/milvus/internal/util/timerecord"
) )
@ -31,6 +32,10 @@ func searchOnSegments(replica ReplicaInterface, segType segmentType, searchReq *
// results variables // results variables
searchResults := make([]*SearchResult, len(segIDs)) searchResults := make([]*SearchResult, len(segIDs))
errs := make([]error, len(segIDs)) errs := make([]error, len(segIDs))
searchLabel := metrics.SealedSegmentLabel
if segType == commonpb.SegmentState_Growing {
searchLabel = metrics.GrowingSegmentLabel
}
// calling segment search in goroutines // calling segment search in goroutines
var wg sync.WaitGroup var wg sync.WaitGroup
@ -50,8 +55,7 @@ func searchOnSegments(replica ReplicaInterface, segType segmentType, searchReq *
searchResults[i] = searchResult searchResults[i] = searchResult
// update metrics // update metrics
metrics.QueryNodeSQSegmentLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.QueryNodeSQSegmentLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
metrics.SearchLabel, metrics.SearchLabel, searchLabel).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.SealedSegmentLabel).Observe(float64(tr.ElapseSpan().Milliseconds()))
}(segID, i) }(segID, i)
} }
wg.Wait() wg.Wait()

View File

@ -71,6 +71,8 @@ func newDmlChannels(ctx context.Context, factory msgstream.Factory, chanNamePref
} }
log.Debug("init dml channels", zap.Int64("num", chanNum)) log.Debug("init dml channels", zap.Int64("num", chanNum))
metrics.RootCoordNumOfDMLChannel.Add(float64(chanNum)) metrics.RootCoordNumOfDMLChannel.Add(float64(chanNum))
metrics.RootCoordNumOfMsgStream.Add(float64(chanNum))
return d return d
} }
@ -163,7 +165,6 @@ func (d *dmlChannels) addChannels(names ...string) {
dms.refcnt++ dms.refcnt++
dms.mutex.Unlock() dms.mutex.Unlock()
} }
metrics.RootCoordNumOfDMLChannel.Inc()
} }
func (d *dmlChannels) removeChannels(names ...string) { func (d *dmlChannels) removeChannels(names ...string) {
@ -183,7 +184,6 @@ func (d *dmlChannels) removeChannels(names ...string) {
} }
dms.mutex.Unlock() dms.mutex.Unlock()
} }
metrics.RootCoordNumOfDMLChannel.Dec()
} }
func genChannelName(prefix string, idx int64) string { func genChannelName(prefix string, idx int64) string {