Fix metrics label conflict (#15839)

Signed-off-by: cai.zhang <cai.zhang@zilliz.com>
pull/15851/head
cai.zhang 2022-03-03 16:05:57 +08:00 committed by GitHub
parent 27a7875a15
commit 88527fc0a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 216 additions and 281 deletions

View File

@ -169,7 +169,7 @@ func (dn *deleteNode) bufferDeleteMsg(msg *msgstream.DeleteMsg, tr TimeRange) er
// store
delDataBuf.updateSize(int64(rows))
metrics.DataNodeConsumeMsgRowsCount.WithLabelValues(metrics.DataNodeMsgTypeDelete, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(rows))
metrics.DataNodeConsumeMsgRowsCount.WithLabelValues(metrics.DeleteLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(rows))
delDataBuf.updateTimeRange(tr)
dn.delBuf.Store(segID, delDataBuf)
}

View File

@ -346,13 +346,13 @@ func (ibNode *insertBufferNode) Operate(in []Msg) []Msg {
err := ibNode.flushManager.flushBufferData(task.buffer, task.segmentID, task.flushed, task.dropped, endPositions[0])
if err != nil {
log.Warn("failed to invoke flushBufferData", zap.Error(err))
metrics.DataNodeFlushSegmentCount.WithLabelValues(metrics.DataNodeMetricLabelFail, fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc()
metrics.DataNodeFlushSegmentCount.WithLabelValues(metrics.FailLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc()
} else {
segmentsToFlush = append(segmentsToFlush, task.segmentID)
ibNode.insertBuffer.Delete(task.segmentID)
metrics.DataNodeFlushSegmentCount.WithLabelValues(metrics.DataNodeMetricLabelSuccess, fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc()
metrics.DataNodeFlushSegmentCount.WithLabelValues(metrics.SuccessLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc()
}
metrics.DataNodeFlushSegmentCount.WithLabelValues(metrics.DataNodeMetricLabelTotal, fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc()
metrics.DataNodeFlushSegmentCount.WithLabelValues(metrics.TotalLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Inc()
}
if err := ibNode.writeHardTimeTick(fgMsg.timeRange.timestampMax, seg2Upload); err != nil {
@ -679,7 +679,7 @@ func (ibNode *insertBufferNode) bufferInsertMsg(msg *msgstream.InsertMsg, endPos
// update buffer size
buffer.updateSize(int64(len(msg.RowData)))
metrics.DataNodeConsumeMsgRowsCount.WithLabelValues(metrics.DataNodeMsgTypeInsert, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(len(msg.RowData)))
metrics.DataNodeConsumeMsgRowsCount.WithLabelValues(metrics.InsertLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(len(msg.RowData)))
// store in buffer
ibNode.insertBuffer.Store(currentSegID, buffer)

View File

@ -555,11 +555,11 @@ type flushBufferInsertTask struct {
func (t *flushBufferInsertTask) flushInsertData() error {
if t.BaseKV != nil && len(t.data) > 0 {
for _, d := range t.data {
metrics.DataNodeFlushedSize.WithLabelValues(metrics.DataNodeMsgTypeInsert, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(len(d)))
metrics.DataNodeFlushedSize.WithLabelValues(metrics.InsertLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(len(d)))
}
tr := timerecord.NewTimeRecorder("insertData")
err := t.MultiSave(t.data)
metrics.DataNodeSave2StorageLatency.WithLabelValues(metrics.DataNodeMsgTypeInsert, fmt.Sprint(Params.DataNodeCfg.NodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.DataNodeSave2StorageLatency.WithLabelValues(metrics.InsertLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
return err
}
return nil
@ -574,11 +574,11 @@ type flushBufferDeleteTask struct {
func (t *flushBufferDeleteTask) flushDeleteData() error {
if len(t.data) > 0 && t.BaseKV != nil {
for _, d := range t.data {
metrics.DataNodeFlushedSize.WithLabelValues(metrics.DataNodeMsgTypeDelete, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(len(d)))
metrics.DataNodeFlushedSize.WithLabelValues(metrics.DeleteLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(len(d)))
}
tr := timerecord.NewTimeRecorder("deleteData")
err := t.MultiSave(t.data)
metrics.DataNodeSave2StorageLatency.WithLabelValues(metrics.DataNodeMsgTypeDelete, fmt.Sprint(Params.DataNodeCfg.NodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.DataNodeSave2StorageLatency.WithLabelValues(metrics.DeleteLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
return err
}
return nil

View File

@ -17,35 +17,10 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/prometheus/client_golang/prometheus"
)
const (
// TODO: use the common status label
DataNodeMetricLabelSuccess = "success"
DataNodeMetricLabelFail = "fail"
DataNodeMetricLabelTotal = "total"
DataNodeMsgTypeInsert = "insert"
DataNodeMsgTypeDelete = "delete"
)
// TODO: move to metrics.go
const (
nodeIDLabelName = "node_id"
statusLabelName = "status"
msgTypeLabelName = "msg_type"
collectionIDLabelName = "collection_id"
channelNameLabelName = "channel_name"
)
// dataNodeDurationBuckets involves durations in milliseconds,
// [10 20 40 80 160 320 640 1280 2560 5120 10240 20480 40960 81920 163840 327680 655360 1.31072e+06]
var dataNodeDurationBuckets = prometheus.ExponentialBuckets(10, 2, 18)
var (
DataNodeNumFlowGraphs = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
@ -162,7 +137,7 @@ var (
Subsystem: typeutil.DataNodeRole,
Name: "flush_segment_latency",
Help: "The flush segment latency in DataNode.",
Buckets: dataNodeDurationBuckets,
Buckets: buckets,
}, []string{
collectionIDLabelName,
nodeIDLabelName,
@ -208,7 +183,7 @@ var (
Subsystem: typeutil.DataNodeRole,
Name: "compaction_latency",
Help: "Compaction latency in DataNode.",
Buckets: dataNodeDurationBuckets,
Buckets: buckets,
}, []string{
collectionIDLabelName,
nodeIDLabelName,

View File

@ -21,14 +21,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const (
UnissuedIndexTaskLabel = "unissued"
InProgressIndexTaskLabel = "in-progress"
FinishedIndexTaskLabel = "finished"
FailedIndexTaskLabel = "failed"
RecycledIndexTaskLabel = "recycled"
)
var (
// IndexCoordIndexRequestCounter records the number of the index requests.
IndexCoordIndexRequestCounter = prometheus.NewCounterVec(
@ -37,7 +29,7 @@ var (
Subsystem: typeutil.IndexCoordRole,
Name: "index_req_counter",
Help: "The number of requests to build index",
}, []string{"status"})
}, []string{statusLabelName})
// IndexCoordIndexTaskCounter records the number of index tasks of each type.
IndexCoordIndexTaskCounter = prometheus.NewGaugeVec(
@ -46,7 +38,7 @@ var (
Subsystem: typeutil.IndexCoordRole,
Name: "index_task_counter",
Help: "The number of index tasks of each type",
}, []string{"type"})
}, []string{"index_task_status"})
// IndexCoordIndexNodeNum records the number of IndexNodes managed by IndexCoord.
IndexCoordIndexNodeNum = prometheus.NewGaugeVec(

View File

@ -21,12 +21,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const (
SuccessLabel = "success"
FailLabel = "fail"
TotalLabel = "total"
)
var (
IndexNodeBuildIndexTaskCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
@ -34,7 +28,7 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "index_task_counter",
Help: "The number of tasks that index node received",
}, []string{"node_id", "status"})
}, []string{nodeIDLabelName, statusLabelName})
IndexNodeLoadBinlogLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -42,7 +36,8 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "load_segment_latency",
Help: "The latency of loading the segment",
}, []string{"node_id", "segment_id"})
Buckets: buckets,
}, []string{nodeIDLabelName, segmentIDLabelName})
IndexNodeDecodeBinlogLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -50,7 +45,8 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "decode_binlog_latency",
Help: "The latency of decode the binlog",
}, []string{"node_id", "segment_id"})
Buckets: buckets,
}, []string{nodeIDLabelName, segmentIDLabelName})
IndexNodeKnowhereBuildIndexLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -58,7 +54,8 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "knowhere_build_index_latency",
Help: "The latency of knowhere building the index",
}, []string{"node_id", "segment_id"})
Buckets: buckets,
}, []string{nodeIDLabelName, segmentIDLabelName})
IndexNodeEncodeIndexFileLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -66,7 +63,8 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "encode_index_file_latency",
Help: "The latency of encoding the index file",
}, []string{"node_id", "segment_id"})
Buckets: buckets,
}, []string{nodeIDLabelName, segmentIDLabelName})
IndexNodeSaveIndexFileLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -74,7 +72,8 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "save_index_file_latency",
Help: "The latency of saving the index file",
}, []string{"node_id", "segment_id"})
Buckets: buckets,
}, []string{nodeIDLabelName, segmentIDLabelName})
)
//RegisterIndexNode registers IndexNode metrics

View File

@ -34,12 +34,42 @@ const (
milvusNamespace = "milvus"
AbandonLabel = "abandon"
SuccessLabel = "success"
FailLabel = "fail"
TotalLabel = "total"
InsertLabel = "insert"
DeleteLabel = "delete"
SearchLabel = "search"
QueryLabel = "query"
CacheHitLabel = "hit"
CacheMissLabel = "miss"
UnissuedIndexTaskLabel = "unissued"
InProgressIndexTaskLabel = "in-progress"
FinishedIndexTaskLabel = "finished"
FailedIndexTaskLabel = "failed"
RecycledIndexTaskLabel = "recycled"
SealedSegmentLabel = "sealed"
GrowingSegmentLabel = "growing"
nodeIDLabelName = "node_id"
statusLabelName = "status"
msgTypeLabelName = "msg_type"
collectionIDLabelName = "collection_id"
channelNameLabelName = "channel_name"
segmentIDLabelName = "segment_id"
functionLabelName = "function_name"
queryTypeLabelName = "query_type"
segmentTypeLabelName = "segment_type"
)
var (
// buckets involves durations in milliseconds,
// [1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 1.31072e+05]
buckets = prometheus.ExponentialBuckets(1, 2, 18)
)
var (

View File

@ -21,11 +21,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
var (
coarseGrainedBuckets = []float64{1, 10, 20, 50, 100, 200, 500, 1000, 5000, 10000} // unit: ms
fineGrainedBuckets = []float64{1, 2, 5, 8, 10, 20, 30, 40, 50, 100} // unit: ms
)
var (
// ProxyDmlChannelTimeTick counts the time tick value of dml channels
ProxyDmlChannelTimeTick = prometheus.NewGaugeVec(
@ -34,7 +29,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "dml_channels_time_tick",
Help: "Time tick of dml channels",
}, []string{"node_id", "pchan"})
}, []string{nodeIDLabelName, "pchan"})
// ProxySearchCount record the number of times search succeeded or failed.
ProxySearchCount = prometheus.NewCounterVec(
@ -43,7 +38,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "search_counter",
Help: "The number of times search succeeded or failed",
}, []string{"node_id", "collection_id", "type", "status"})
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName, statusLabelName})
// ProxyInsertCount record the number of times insert succeeded or failed.
ProxyInsertCount = prometheus.NewCounterVec(
@ -52,7 +47,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "insert_counter",
Help: "The number of times insert succeeded or failed",
}, []string{"node_id", "collection_id", "status"})
}, []string{nodeIDLabelName, collectionIDLabelName, statusLabelName})
// ProxySearchVectors record the number of vectors search successfully.
ProxySearchVectors = prometheus.NewGaugeVec(
@ -61,7 +56,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "search_vectors",
Help: "The number of vectors search successfully",
}, []string{"node_id", "collection_id", "type"})
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName})
// ProxyInsertVectors record the number of vectors insert successfully.
ProxyInsertVectors = prometheus.NewGaugeVec(
@ -70,7 +65,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "insert_vectors",
Help: "The number of vectors insert successfully",
}, []string{"node_id", "collection_id"})
}, []string{nodeIDLabelName, collectionIDLabelName})
// ProxyLinkedSDKs record The number of SDK linked proxy.
// TODO: how to know when sdk disconnect?
@ -80,7 +75,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "linked_sdk_numbers",
Help: "The number of SDK linked proxy",
}, []string{"node_id"})
}, []string{nodeIDLabelName})
// ProxySearchLatency record the latency of search successfully.
ProxySearchLatency = prometheus.NewHistogramVec(
@ -89,8 +84,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "search_latency",
Help: "The latency of search successfully",
Buckets: coarseGrainedBuckets,
}, []string{"node_id", "collection_id", "type"})
Buckets: buckets,
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName})
// ProxySendMessageLatency record the latency that the proxy sent the search request to the message stream.
ProxySendMessageLatency = prometheus.NewHistogramVec(
@ -99,8 +94,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "send_search_msg_time",
Help: "The latency that the proxy sent the search request to the message stream",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id", "collection_id", "type"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName})
// ProxyWaitForSearchResultLatency record the time that the proxy waits for the search result.
ProxyWaitForSearchResultLatency = prometheus.NewHistogramVec(
@ -109,8 +104,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "wait_for_search_result_time",
Help: "The time that the proxy waits for the search result",
Buckets: coarseGrainedBuckets, // unit: ms
}, []string{"node_id", "collection_id", "type"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName})
// ProxyReduceSearchResultLatency record the time that the proxy reduces search result.
ProxyReduceSearchResultLatency = prometheus.NewHistogramVec(
@ -119,8 +114,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "reduce_search_result_time",
Help: "The time that the proxy reduces search result",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id", "collection_id", "type"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName})
// ProxyDecodeSearchResultLatency record the time that the proxy decodes the search result.
ProxyDecodeSearchResultLatency = prometheus.NewHistogramVec(
@ -129,8 +124,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "decode_search_result_time",
Help: "The time that the proxy decodes the search result",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id", "collection_id", "type"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName})
// ProxyMsgStreamObjectsForPChan record the number of MsgStream objects per PChannel on each collection_id on Proxy.
ProxyMsgStreamObjectsForPChan = prometheus.NewGaugeVec(
@ -139,7 +134,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "msg_stream_obj_for_PChan",
Help: "The number of MsgStream objects per PChannel on each collection on Proxy",
}, []string{"node_id", "collection_id"})
}, []string{nodeIDLabelName, collectionIDLabelName})
// ProxyMsgStreamObjectsForSearch record the number of MsgStream objects for search per collection_id.
ProxyMsgStreamObjectsForSearch = prometheus.NewGaugeVec(
@ -148,7 +143,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "msg_stream_obj_for_search",
Help: "The number of MsgStream objects for search per collection",
}, []string{"node_id", "collection_id", "type"})
}, []string{nodeIDLabelName, collectionIDLabelName, queryTypeLabelName})
// ProxyInsertLatency record the latency that insert successfully.
ProxyInsertLatency = prometheus.NewHistogramVec(
@ -157,8 +152,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "insert_latency",
Help: "The latency that insert successfully.",
Buckets: coarseGrainedBuckets, // unit: ms
}, []string{"node_id", "collection_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, collectionIDLabelName})
// ProxyInsertColToRowLatency record the latency that column to row for inserting in Proxy.
ProxyInsertColToRowLatency = prometheus.NewHistogramVec(
@ -167,8 +162,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "col_to_row_latency",
Help: "The time that column to row for inserting in Proxy",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id", "collection_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, collectionIDLabelName})
// ProxySendInsertReqLatency record the latency that Proxy send insert request to MsgStream.
ProxySendInsertReqLatency = prometheus.NewHistogramVec(
@ -177,8 +172,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "send_insert_req_latency",
Help: "The latency that Proxy send insert request to MsgStream",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id", "collection_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, collectionIDLabelName})
// ProxyCacheHitCounter record the number of Proxy cache hits or miss.
// TODO: @xiaocai2333 add more cache type
@ -188,7 +183,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "cache_hits",
Help: "Proxy cache hits",
}, []string{"node_id", "cache_type", "hit_type"})
}, []string{nodeIDLabelName, "cache_type", "hit_type"})
// ProxyUpdateCacheLatency record the time that proxy update cache when cache miss.
ProxyUpdateCacheLatency = prometheus.NewHistogramVec(
@ -197,8 +192,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "update_cache_latency",
Help: "The time that proxy update cache when cache miss",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName})
// ProxySyncTimeTick record Proxy synchronization timestamp statistics, differentiated by Channel.
ProxySyncTimeTick = prometheus.NewGaugeVec(
@ -207,7 +202,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "sync_time_tick",
Help: "Proxy synchronization timestamp statistics, differentiated by Channel",
}, []string{"node_id", "channel"})
}, []string{nodeIDLabelName, channelNameLabelName})
// ProxyApplyPrimaryKeyLatency record the latency that apply primary key.
ProxyApplyPrimaryKeyLatency = prometheus.NewHistogramVec(
@ -216,8 +211,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "apply_pk_latency",
Help: "The latency that apply primary key",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName})
// ProxyApplyTimestampLatency record the latency that proxy apply timestamp.
ProxyApplyTimestampLatency = prometheus.NewHistogramVec(
@ -226,8 +221,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "apply_timestamp_latency",
Help: "The latency that proxy apply timestamp",
Buckets: fineGrainedBuckets, // unit: ms
}, []string{"node_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName})
// ProxyDDLFunctionCall records the number of times the function of the DDL operation was executed, like `CreateCollection`.
ProxyDDLFunctionCall = prometheus.NewCounterVec(
@ -236,7 +231,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "DDL_call_counter",
Help: "the number of times the function of the DDL operation was executed",
}, []string{"node_id", "function", "status"})
}, []string{nodeIDLabelName, functionLabelName, statusLabelName})
// ProxyDQLFunctionCall records the number of times the function of the DQL operation was executed, like `HasCollection`.
ProxyDQLFunctionCall = prometheus.NewCounterVec(
@ -245,7 +240,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "DQL_call_counter",
Help: "",
}, []string{"node_id", "function", "collection_id", "status"})
}, []string{nodeIDLabelName, functionLabelName, collectionIDLabelName, statusLabelName})
// ProxyDMLFunctionCall records the number of times the function of the DML operation was executed, like `LoadCollection`.
ProxyDMLFunctionCall = prometheus.NewCounterVec(
@ -254,7 +249,7 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "DML_call_counter",
Help: "",
}, []string{"node_id", "function", "collection_id", "status"})
}, []string{nodeIDLabelName, functionLabelName, collectionIDLabelName, statusLabelName})
// ProxyDDLReqLatency records the latency that for DML request, like "CreateCollection".
ProxyDDLReqLatency = prometheus.NewHistogramVec(
@ -263,8 +258,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "DDL_call_latency",
Help: "The latency that for DDL request",
Buckets: coarseGrainedBuckets, // unit: ms
}, []string{"node_id", "function"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, functionLabelName})
// ProxyDMLReqLatency records the latency that for DML request.
ProxyDMLReqLatency = prometheus.NewHistogramVec(
@ -273,8 +268,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "DML_call_latency",
Help: "The latency that for DML request",
Buckets: coarseGrainedBuckets, // unit: ms
}, []string{"node_id", "function", "collection_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, functionLabelName, collectionIDLabelName})
// ProxyDQLReqLatency record the latency that for DQL request, like "HasCollection".
ProxyDQLReqLatency = prometheus.NewHistogramVec(
@ -283,8 +278,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "DQL_call_latency",
Help: "The latency that for DQL request",
Buckets: coarseGrainedBuckets, // unit: ms
}, []string{"node_id", "function", "collection_id"})
Buckets: buckets, // unit: ms
}, []string{nodeIDLabelName, functionLabelName, collectionIDLabelName})
// ProxySearchLatencyPerNQ records the latency for searching.
ProxySearchLatencyPerNQ = prometheus.NewHistogramVec(
@ -293,8 +288,8 @@ var (
Subsystem: typeutil.ProxyRole,
Name: "proxy_search_latency_count",
Help: "The latency for searching",
Buckets: fineGrainedBuckets,
}, []string{"node_id", "collection_id"})
Buckets: buckets,
}, []string{nodeIDLabelName, collectionIDLabelName})
)
//RegisterProxy registers Proxy metrics

View File

@ -1,17 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics

View File

@ -22,18 +22,6 @@ import (
"github.com/milvus-io/milvus/internal/util/typeutil"
)
const (
// TODO: move to metrics.go
queryCoordStatusLabel = "status"
QueryCoordMetricLabelSuccess = "success"
QueryCoordMetricLabelFail = "fail"
QueryCoordMetricLabelTotal = "total"
)
// queryCoordLoadBuckets involves durations in milliseconds,
// [10 20 40 80 160 320 640 1280 2560 5120 10240 20480 40960 81920 163840 327680 655360 1.31072e+06]
var queryCoordLoadBuckets = prometheus.ExponentialBuckets(10, 2, 18)
var (
QueryCoordNumCollections = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
@ -50,7 +38,7 @@ var (
Name: "num_entities",
Help: "Number of entities in collection.",
}, []string{
collectionIDLabel,
collectionIDLabelName,
})
QueryCoordLoadCount = prometheus.NewCounterVec(
@ -60,7 +48,7 @@ var (
Name: "load_count",
Help: "Load request statistic in QueryCoord.",
}, []string{
queryCoordStatusLabel,
statusLabelName,
})
QueryCoordReleaseCount = prometheus.NewCounterVec(
@ -70,7 +58,7 @@ var (
Name: "release_count",
Help: "Release request statistic in QueryCoord.",
}, []string{
queryCoordStatusLabel,
statusLabelName,
})
QueryCoordLoadLatency = prometheus.NewHistogramVec(
@ -79,7 +67,7 @@ var (
Subsystem: typeutil.QueryCoordRole,
Name: "load_latency",
Help: "Load request latency in QueryCoord",
Buckets: queryCoordLoadBuckets,
Buckets: buckets,
}, []string{})
QueryCoordReleaseLatency = prometheus.NewHistogramVec(
@ -113,7 +101,7 @@ var (
Subsystem: typeutil.QueryCoordRole,
Name: "child_task_latency",
Help: "Child tasks latency in QueryCoord.",
Buckets: queryCoordLoadBuckets,
Buckets: buckets,
}, []string{})
QueryCoordNumQueryNodes = prometheus.NewGaugeVec(

View File

@ -22,34 +22,6 @@ import (
"github.com/milvus-io/milvus/internal/util/typeutil"
)
const (
// TODO: use the common status label
queryNodeStatusLabel = "status"
QueryNodeMetricLabelSuccess = "success"
QueryNodeMetricLabelFail = "fail"
QueryNodeMetricLabelTotal = "total"
// TODO: use the common status label
nodeIDLabel = "node_id"
collectionIDLabel = "collection_id"
)
const (
// query type
queryTypeLabel = "query_type"
QueryNodeQueryTypeSearch = "search"
QueryNodeQueryTypeQuery = "query"
// segment type
segmentTypeLabel = "segment_type"
QueryNodeSegTypeSealed = "sealed"
QueryNodeSegTypeGrowing = "growing"
)
// queryNodeDurationBuckets involves durations in milliseconds,
// [10 20 40 80 160 320 640 1280 2560 5120 10240 20480 40960 81920 163840 327680 655360 1.31072e+06]
var queryNodeDurationBuckets = prometheus.ExponentialBuckets(10, 2, 18)
var (
QueryNodeNumCollections = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
@ -58,7 +30,7 @@ var (
Name: "num_collections",
Help: "Number of collections in QueryNode.",
}, []string{
nodeIDLabel,
nodeIDLabelName,
})
QueryNodeNumPartitions = prometheus.NewGaugeVec(
@ -68,8 +40,8 @@ var (
Name: "num_partitions",
Help: "Number of partitions per collection in QueryNode.",
}, []string{
collectionIDLabel,
nodeIDLabel,
collectionIDLabelName,
nodeIDLabelName,
})
QueryNodeNumSegments = prometheus.NewGaugeVec(
@ -79,8 +51,8 @@ var (
Name: "num_segments",
Help: "Number of segments per collection in QueryNode.",
}, []string{
collectionIDLabel,
nodeIDLabel,
collectionIDLabelName,
nodeIDLabelName,
})
QueryNodeNumDmlChannels = prometheus.NewGaugeVec(
@ -90,8 +62,8 @@ var (
Name: "num_dml_channels",
Help: "Number of dmlChannels per collection in QueryNode.",
}, []string{
collectionIDLabel,
nodeIDLabel,
collectionIDLabelName,
nodeIDLabelName,
})
QueryNodeNumDeltaChannels = prometheus.NewGaugeVec(
@ -101,8 +73,8 @@ var (
Name: "num_delta_channels",
Help: "Number of deltaChannels per collection in QueryNode.",
}, []string{
collectionIDLabel,
nodeIDLabel,
collectionIDLabelName,
nodeIDLabelName,
})
QueryNodeNumConsumers = prometheus.NewGaugeVec(
@ -112,8 +84,8 @@ var (
Name: "num_consumers",
Help: "Number of consumers per collection in QueryNode.",
}, []string{
collectionIDLabel,
nodeIDLabel,
collectionIDLabelName,
nodeIDLabelName,
})
QueryNodeNumReaders = prometheus.NewGaugeVec(
@ -123,8 +95,8 @@ var (
Name: "num_readers",
Help: "Number of readers per collection in QueryNode.",
}, []string{
collectionIDLabel,
nodeIDLabel,
collectionIDLabelName,
nodeIDLabelName,
})
QueryNodeSQCount = prometheus.NewCounterVec(
@ -134,9 +106,9 @@ var (
Name: "sq_count",
Help: "Search and query requests statistic in QueryNode.",
}, []string{
queryNodeStatusLabel,
queryTypeLabel,
nodeIDLabel,
statusLabelName,
queryTypeLabelName,
nodeIDLabelName,
})
QueryNodeSQReqLatency = prometheus.NewHistogramVec(
@ -145,10 +117,10 @@ var (
Subsystem: typeutil.QueryNodeRole,
Name: "sq_latency",
Help: "Search and query requests latency in QueryNode.",
Buckets: queryNodeDurationBuckets,
Buckets: buckets,
}, []string{
queryTypeLabel,
nodeIDLabel,
queryTypeLabelName,
nodeIDLabelName,
})
QueryNodeSQLatencyInQueue = prometheus.NewHistogramVec(
@ -157,10 +129,10 @@ var (
Subsystem: typeutil.QueryNodeRole,
Name: "sq_latency_in_queue",
Help: "The search and query latency in queue(unsolved buffer) in QueryNode.",
Buckets: queryNodeDurationBuckets,
Buckets: buckets,
}, []string{
queryTypeLabel,
nodeIDLabel,
queryTypeLabelName,
nodeIDLabelName,
})
QueryNodeSQSegmentLatency = prometheus.NewHistogramVec(
@ -169,11 +141,11 @@ var (
Subsystem: typeutil.QueryNodeRole,
Name: "sq_latency_per_segment",
Help: "The search and query on segments(sealed/growing segments).",
Buckets: queryNodeDurationBuckets,
Buckets: buckets,
}, []string{
queryTypeLabel,
segmentTypeLabel,
nodeIDLabel,
queryTypeLabelName,
segmentTypeLabelName,
nodeIDLabelName,
})
QueryNodeSQSegmentLatencyInCore = prometheus.NewHistogramVec(
@ -182,10 +154,10 @@ var (
Subsystem: typeutil.QueryNodeRole,
Name: "sq_latency_in_core",
Help: "The search and query latency in core.",
Buckets: queryNodeDurationBuckets,
Buckets: buckets,
}, []string{
queryTypeLabel,
nodeIDLabel,
queryTypeLabelName,
nodeIDLabelName,
})
QueryNodeTranslateHitsLatency = prometheus.NewHistogramVec(
@ -194,9 +166,9 @@ var (
Subsystem: typeutil.QueryNodeRole,
Name: "translate_hits_latency",
Help: "The search and query latency in translate hits.",
Buckets: queryNodeDurationBuckets,
Buckets: buckets,
}, []string{
nodeIDLabel,
nodeIDLabelName,
})
QueryNodeReduceLatency = prometheus.NewHistogramVec(
@ -205,10 +177,10 @@ var (
Subsystem: typeutil.QueryNodeRole,
Name: "reduce_latency",
Help: "The search and query latency in reduce(local reduce) in QueryNode.",
Buckets: queryNodeDurationBuckets,
Buckets: buckets,
}, []string{
segmentTypeLabel,
nodeIDLabel,
segmentTypeLabelName,
nodeIDLabelName,
})
QueryNodeLoadSegmentLatency = prometheus.NewHistogramVec(
@ -217,9 +189,9 @@ var (
Subsystem: typeutil.QueryNodeRole,
Name: "load_latency_per_segment",
Help: "The load latency per segment in QueryNode.",
Buckets: queryNodeDurationBuckets,
Buckets: buckets,
}, []string{
nodeIDLabel,
nodeIDLabelName,
})
QueryNodeServiceTime = prometheus.NewGaugeVec(
@ -229,8 +201,8 @@ var (
Name: "service_time",
Help: "ServiceTimes of collections in QueryNode.",
}, []string{
collectionIDLabel,
nodeIDLabel,
collectionIDLabelName,
nodeIDLabelName,
})
QueryNodeNumFlowGraphs = prometheus.NewGaugeVec(
@ -240,7 +212,7 @@ var (
Name: "num_flow_graphs",
Help: "Number of flow graphs in QueryNode.",
}, []string{
nodeIDLabel,
nodeIDLabelName,
})
)

View File

@ -13,7 +13,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "list_of_proxy",
Help: "List of proxy nodes which have registered with etcd",
}, []string{"node_id"})
}, []string{nodeIDLabelName})
////////////////////////////////////////////////////////////////////////////
// for grpc
@ -25,7 +25,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "create_collection_total",
Help: "Counter of create collection",
}, []string{"status"})
}, []string{statusLabelName})
// RootCoordDropCollectionCounter counts the num of calls of DropCollection
RootCoordDropCollectionCounter = prometheus.NewCounterVec(
@ -34,7 +34,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "drop_collection_total",
Help: "Counter of drop collection",
}, []string{"status"})
}, []string{statusLabelName})
// RootCoordHasCollectionCounter counts the num of calls of HasCollection
RootCoordHasCollectionCounter = prometheus.NewCounterVec(
@ -43,7 +43,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "has_collection_total",
Help: "Counter of has collection",
}, []string{"status"})
}, []string{statusLabelName})
// RootCoordDescribeCollectionCounter counts the num of calls of DescribeCollection
RootCoordDescribeCollectionCounter = prometheus.NewCounterVec(
@ -52,7 +52,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "describe_collection_total",
Help: "Counter of describe collection",
}, []string{"status"})
}, []string{statusLabelName})
// RootCoordShowCollectionsCounter counts the num of calls of ShowCollections
RootCoordShowCollectionsCounter = prometheus.NewCounterVec(
@ -61,7 +61,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "show_collections_total",
Help: "Counter of show collections",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordCreatePartitionCounter counts the num of calls of CreatePartition
RootCoordCreatePartitionCounter = prometheus.NewCounterVec(
@ -70,7 +70,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "create_partition_total",
Help: "Counter of create partition",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordDropPartitionCounter counts the num of calls of DropPartition
RootCoordDropPartitionCounter = prometheus.NewCounterVec(
@ -79,7 +79,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "drop_partition_total",
Help: "Counter of drop partition",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordHasPartitionCounter counts the num of calls of HasPartition
RootCoordHasPartitionCounter = prometheus.NewCounterVec(
@ -88,7 +88,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "has_partition_total",
Help: "Counter of has partition",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordShowPartitionsCounter counts the num of calls of ShowPartitions
RootCoordShowPartitionsCounter = prometheus.NewCounterVec(
@ -97,7 +97,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "show_partitions_total",
Help: "Counter of show partitions",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordCreateIndexCounter counts the num of calls of CreateIndex
RootCoordCreateIndexCounter = prometheus.NewCounterVec(
@ -106,7 +106,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "create_index_total",
Help: "Counter of create index",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordDropIndexCounter counts the num of calls of DropIndex
RootCoordDropIndexCounter = prometheus.NewCounterVec(
@ -115,7 +115,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "drop_index_total",
Help: "Counter of drop index",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordDescribeIndexCounter counts the num of calls of DescribeIndex
RootCoordDescribeIndexCounter = prometheus.NewCounterVec(
@ -124,7 +124,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "describe_index_total",
Help: "Counter of describe index",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordDescribeSegmentCounter counts the num of calls of DescribeSegment
RootCoordDescribeSegmentCounter = prometheus.NewCounterVec(
@ -133,7 +133,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "describe_segment_total",
Help: "Counter of describe segment",
}, []string{"type"})
}, []string{statusLabelName})
// RootCoordShowSegmentsCounter counts the num of calls of ShowSegments
RootCoordShowSegmentsCounter = prometheus.NewCounterVec(
@ -142,7 +142,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "show_segments_total",
Help: "Counter of show segments",
}, []string{"type"})
}, []string{statusLabelName})
////////////////////////////////////////////////////////////////////////////
// for time tick
@ -163,7 +163,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "ddl_read_type_latency",
Help: "The latency for read type of DDL operations",
}, []string{"function_name", "collection_id"})
}, []string{functionLabelName, collectionIDLabelName})
// RootCoordDDLWriteTypeLatency records the latency for write type of DDL operations.
RootCoordDDLWriteTypeLatency = prometheus.NewHistogramVec(
@ -172,7 +172,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "ddl_write_type_latency",
Help: "The latency for write type of DDL operations",
}, []string{"function_name", "collection_name"})
}, []string{functionLabelName, collectionIDLabelName})
// RootCoordSyncTimeTickLatency records the latency of sync time tick.
RootCoordSyncTimeTickLatency = prometheus.NewHistogram(
@ -226,7 +226,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "num_of_partitions",
Help: "The number of partitions per collection",
}, []string{"collection_id"})
}, []string{collectionIDLabelName})
// RootCoordNumOfSegments counts the number of segments per collections.
RootCoordNumOfSegments = prometheus.NewGaugeVec(
@ -235,7 +235,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "num_of_segments",
Help: "The number of segments per collection",
}, []string{"collection_id"})
}, []string{collectionIDLabelName})
// RootCoordNumOfIndexedSegments counts the number of indexed segments per collection.
RootCoordNumOfIndexedSegments = prometheus.NewGaugeVec(
@ -244,7 +244,7 @@ var (
Subsystem: typeutil.RootCoordRole,
Name: "num_of_indexed_segments",
Help: "The number of indexed segments per collection",
}, []string{"collection_id"})
}, []string{collectionIDLabelName})
// RootCoordNumOfDMLChannel counts the number of DML channels.
RootCoordNumOfDMLChannel = prometheus.NewGauge(

View File

@ -159,7 +159,7 @@ func (qc *QueryCoord) ShowCollections(ctx context.Context, req *querypb.ShowColl
// LoadCollection loads all the sealed segments of this collection to queryNodes, and assigns watchDmChannelRequest to queryNodes
func (qc *QueryCoord) LoadCollection(ctx context.Context, req *querypb.LoadCollectionRequest) (*commonpb.Status, error) {
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelTotal).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.TotalLabel).Inc()
collectionID := req.CollectionID
//schema := req.Schema
@ -177,7 +177,7 @@ func (qc *QueryCoord) LoadCollection(ctx context.Context, req *querypb.LoadColle
status.Reason = err.Error()
log.Error("load collection failed", zap.String("role", typeutil.QueryCoordRole), zap.Int64("msgID", req.Base.MsgID), zap.Error(err))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -189,7 +189,7 @@ func (qc *QueryCoord) LoadCollection(ctx context.Context, req *querypb.LoadColle
zap.Int64("collectionID", collectionID),
zap.Int64("msgID", req.Base.MsgID))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.SuccessLabel).Inc()
return status, nil
}
// if some partitions of the collection have been loaded by load partitions request, return error
@ -206,7 +206,7 @@ func (qc *QueryCoord) LoadCollection(ctx context.Context, req *querypb.LoadColle
zap.Int64("msgID", req.Base.MsgID),
zap.Error(err))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
}
@ -229,7 +229,7 @@ func (qc *QueryCoord) LoadCollection(ctx context.Context, req *querypb.LoadColle
status.ErrorCode = commonpb.ErrorCode_UnexpectedError
status.Reason = err.Error()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -243,7 +243,7 @@ func (qc *QueryCoord) LoadCollection(ctx context.Context, req *querypb.LoadColle
status.ErrorCode = commonpb.ErrorCode_UnexpectedError
status.Reason = err.Error()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -257,7 +257,7 @@ func (qc *QueryCoord) LoadCollection(ctx context.Context, req *querypb.LoadColle
// ReleaseCollection clears all data related to this collecion on the querynode
func (qc *QueryCoord) ReleaseCollection(ctx context.Context, req *querypb.ReleaseCollectionRequest) (*commonpb.Status, error) {
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelTotal).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.TotalLabel).Inc()
//dbID := req.DbID
collectionID := req.CollectionID
log.Debug("releaseCollectionRequest received",
@ -274,7 +274,7 @@ func (qc *QueryCoord) ReleaseCollection(ctx context.Context, req *querypb.Releas
status.Reason = err.Error()
log.Error("release collection failed", zap.String("role", typeutil.QueryCoordRole), zap.Int64("msgID", req.Base.MsgID), zap.Error(err))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -286,7 +286,7 @@ func (qc *QueryCoord) ReleaseCollection(ctx context.Context, req *querypb.Releas
zap.Int64("collectionID", collectionID),
zap.Int64("msgID", req.Base.MsgID))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
return status, nil
}
@ -308,7 +308,7 @@ func (qc *QueryCoord) ReleaseCollection(ctx context.Context, req *querypb.Releas
status.ErrorCode = commonpb.ErrorCode_UnexpectedError
status.Reason = err.Error()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -322,7 +322,7 @@ func (qc *QueryCoord) ReleaseCollection(ctx context.Context, req *querypb.Releas
status.ErrorCode = commonpb.ErrorCode_UnexpectedError
status.Reason = err.Error()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -333,7 +333,7 @@ func (qc *QueryCoord) ReleaseCollection(ctx context.Context, req *querypb.Releas
//qc.MetaReplica.printMeta()
//qc.cluster.printMeta()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
metrics.QueryCoordReleaseLatency.WithLabelValues().Observe(float64(releaseCollectionTask.elapseSpan().Milliseconds()))
return status, nil
}
@ -429,7 +429,7 @@ func (qc *QueryCoord) ShowPartitions(ctx context.Context, req *querypb.ShowParti
// LoadPartitions loads all the sealed segments of this partition to queryNodes, and assigns watchDmChannelRequest to queryNodes
func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadPartitionsRequest) (*commonpb.Status, error) {
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelTotal).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.TotalLabel).Inc()
collectionID := req.CollectionID
partitionIDs := req.PartitionIDs
@ -448,7 +448,7 @@ func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadParti
status.Reason = err.Error()
log.Error("load partition failed", zap.String("role", typeutil.QueryCoordRole), zap.Int64("msgID", req.Base.MsgID), zap.Error(err))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -464,7 +464,7 @@ func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadParti
zap.Int64("msgID", req.Base.MsgID),
zap.Error(err))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -505,7 +505,7 @@ func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadParti
zap.Int64("msgID", req.Base.MsgID),
zap.Error(err))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -515,7 +515,7 @@ func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadParti
zap.Int64s("partitionIDs", partitionIDs),
zap.Int64("msgID", req.Base.MsgID))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.SuccessLabel).Inc()
return status, nil
}
@ -538,7 +538,7 @@ func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadParti
status.ErrorCode = commonpb.ErrorCode_UnexpectedError
status.Reason = err.Error()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -553,7 +553,7 @@ func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadParti
zap.Int64("msgID", req.Base.MsgID),
zap.Error(err))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -568,7 +568,7 @@ func (qc *QueryCoord) LoadPartitions(ctx context.Context, req *querypb.LoadParti
// ReleasePartitions clears all data related to this partition on the querynode
func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.ReleasePartitionsRequest) (*commonpb.Status, error) {
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelTotal).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.TotalLabel).Inc()
//dbID := req.DbID
collectionID := req.CollectionID
@ -588,7 +588,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
status.Reason = err.Error()
log.Error("release partition failed", zap.String("role", typeutil.QueryCoordRole), zap.Int64("msgID", req.Base.MsgID), zap.Error(err))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -602,7 +602,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
zap.Int64s("partitionIDs", partitionIDs),
zap.Int64("msgID", req.Base.MsgID), zap.Error(err))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -622,7 +622,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
zap.Int64("msgID", req.Base.MsgID),
zap.Error(err))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -644,7 +644,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
zap.Int64("collectionID", req.CollectionID),
zap.Int64("msgID", req.Base.MsgID))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
return status, nil
}
@ -655,7 +655,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
zap.Int64s("partitionIDs", partitionIDs),
zap.Int64("msgID", req.Base.MsgID))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
return status, nil
}
@ -699,7 +699,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
status.ErrorCode = commonpb.ErrorCode_UnexpectedError
status.Reason = err.Error()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -714,7 +714,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
status.ErrorCode = commonpb.ErrorCode_UnexpectedError
status.Reason = err.Error()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelFail).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
return status, nil
}
@ -727,7 +727,7 @@ func (qc *QueryCoord) ReleasePartitions(ctx context.Context, req *querypb.Releas
//qc.MetaReplica.printMeta()
//qc.cluster.printMeta()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
metrics.QueryCoordReleaseLatency.WithLabelValues().Observe(float64(releaseTask.elapseSpan().Milliseconds()))
return status, nil
}

View File

@ -344,7 +344,7 @@ func (lct *loadCollectionTask) updateTaskProcess() {
lct.setResultInfo(err)
}
lct.once.Do(func() {
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.SuccessLabel).Inc()
metrics.QueryCoordLoadLatency.WithLabelValues().Observe(float64(lct.elapseSpan().Milliseconds()))
metrics.QueryCoordNumChildTasks.WithLabelValues().Sub(float64(len(lct.getChildTask())))
})
@ -698,7 +698,7 @@ func (lpt *loadPartitionTask) updateTaskProcess() {
}
}
lpt.once.Do(func() {
metrics.QueryCoordLoadCount.WithLabelValues(metrics.QueryCoordMetricLabelSuccess).Inc()
metrics.QueryCoordLoadCount.WithLabelValues(metrics.SuccessLabel).Inc()
metrics.QueryCoordLoadLatency.WithLabelValues().Observe(float64(lpt.elapseSpan().Milliseconds()))
metrics.QueryCoordNumChildTasks.WithLabelValues().Sub(float64(len(lpt.getChildTask())))
})

View File

@ -185,8 +185,8 @@ func (h *historical) search(searchReqs []*searchRequest, collID UniqueID, partID
err2 = err
return
}
metrics.QueryNodeSQSegmentLatency.WithLabelValues(metrics.QueryNodeQueryTypeSearch,
metrics.QueryNodeSegTypeSealed,
metrics.QueryNodeSQSegmentLatency.WithLabelValues(metrics.SearchLabel,
metrics.SealedSegmentLabel,
fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
segmentLock.Lock()

View File

@ -677,11 +677,11 @@ func (q *queryCollection) doUnsolvedQueryMsg() {
)
switch msgType {
case commonpb.MsgType_Retrieve:
metrics.QueryNodeSQLatencyInQueue.WithLabelValues(metrics.QueryNodeQueryTypeQuery,
metrics.QueryNodeSQLatencyInQueue.WithLabelValues(metrics.QueryLabel,
fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(m.RecordSpan().Milliseconds()))
err = q.retrieve(m)
case commonpb.MsgType_Search:
metrics.QueryNodeSQLatencyInQueue.WithLabelValues(metrics.QueryNodeQueryTypeSearch,
metrics.QueryNodeSQLatencyInQueue.WithLabelValues(metrics.SearchLabel,
fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(m.RecordSpan().Milliseconds()))
err = q.search(m)
default:
@ -1135,8 +1135,8 @@ func (q *queryCollection) search(msg queryMsg) error {
if err != nil {
return err
}
metrics.QueryNodeSQReqLatency.WithLabelValues(metrics.QueryNodeQueryTypeSearch, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(msg.ElapseSpan().Milliseconds()))
metrics.QueryNodeSQCount.WithLabelValues(metrics.QueryNodeMetricLabelSuccess, metrics.QueryNodeQueryTypeSearch, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
metrics.QueryNodeSQReqLatency.WithLabelValues(metrics.SearchLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(msg.ElapseSpan().Milliseconds()))
metrics.QueryNodeSQCount.WithLabelValues(metrics.SuccessLabel, metrics.SearchLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
tr.Record(fmt.Sprintf("publish empty search result done, msgID = %d", searchMsg.ID()))
tr.Elapse(fmt.Sprintf("all done, msgID = %d", searchMsg.ID()))
@ -1167,7 +1167,7 @@ func (q *queryCollection) search(msg queryMsg) error {
if err != nil {
return err
}
metrics.QueryNodeReduceLatency.WithLabelValues(metrics.QueryNodeQueryTypeSearch, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(tr.RecordSpan().Milliseconds()))
metrics.QueryNodeReduceLatency.WithLabelValues(metrics.SearchLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(tr.RecordSpan().Milliseconds()))
var offset int64
for index := range searchRequests {
@ -1247,10 +1247,10 @@ func (q *queryCollection) search(msg queryMsg) error {
if err != nil {
return err
}
metrics.QueryNodeSQReqLatency.WithLabelValues(metrics.QueryNodeQueryTypeSearch,
metrics.QueryNodeSQReqLatency.WithLabelValues(metrics.SearchLabel,
fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(msg.ElapseSpan().Milliseconds()))
metrics.QueryNodeSQCount.WithLabelValues(metrics.QueryNodeMetricLabelSuccess,
metrics.QueryNodeQueryTypeSearch,
metrics.QueryNodeSQCount.WithLabelValues(metrics.SuccessLabel,
metrics.SearchLabel,
fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
tr.Record(fmt.Sprintf("publish search result, msgID = %d", searchMsg.ID()))
}
@ -1335,7 +1335,7 @@ func (q *queryCollection) retrieve(msg queryMsg) error {
return err
}
reduceDuration := tr.Record(fmt.Sprintf("merge result done, msgID = %d", retrieveMsg.ID()))
metrics.QueryNodeReduceLatency.WithLabelValues(metrics.QueryNodeQueryTypeQuery, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(reduceDuration.Milliseconds()))
metrics.QueryNodeReduceLatency.WithLabelValues(metrics.QueryLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(reduceDuration.Milliseconds()))
resultChannelInt := 0
retrieveResultMsg := &msgstream.RetrieveResultMsg{
@ -1360,8 +1360,8 @@ func (q *queryCollection) retrieve(msg queryMsg) error {
if err != nil {
return err
}
metrics.QueryNodeSQCount.WithLabelValues(metrics.QueryNodeMetricLabelSuccess, metrics.QueryNodeQueryTypeQuery, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
metrics.QueryNodeSQReqLatency.WithLabelValues(metrics.QueryNodeQueryTypeQuery, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(msg.ElapseSpan().Milliseconds()))
metrics.QueryNodeSQCount.WithLabelValues(metrics.SuccessLabel, metrics.QueryLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
metrics.QueryNodeSQReqLatency.WithLabelValues(metrics.QueryLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(msg.ElapseSpan().Milliseconds()))
log.Debug("QueryNode publish RetrieveResultMsg",
zap.Int64("msgID", retrieveMsg.ID()),
@ -1432,7 +1432,7 @@ func (q *queryCollection) publishSearchResultWithCtx(ctx context.Context, result
}
func (q *queryCollection) publishSearchResult(result *internalpb.SearchResults, nodeID UniqueID) error {
metrics.QueryNodeSQCount.WithLabelValues(metrics.QueryNodeMetricLabelTotal, metrics.QueryNodeQueryTypeSearch, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
metrics.QueryNodeSQCount.WithLabelValues(metrics.TotalLabel, metrics.SearchLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
return q.publishSearchResultWithCtx(q.releaseCtx, result, nodeID)
}
@ -1441,7 +1441,7 @@ func (q *queryCollection) publishRetrieveResultWithCtx(ctx context.Context, resu
}
func (q *queryCollection) publishRetrieveResult(result *internalpb.RetrieveResults, nodeID UniqueID) error {
metrics.QueryNodeSQCount.WithLabelValues(metrics.QueryNodeMetricLabelTotal, metrics.QueryNodeQueryTypeQuery, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
metrics.QueryNodeSQCount.WithLabelValues(metrics.TotalLabel, metrics.QueryLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
return q.publishRetrieveResultWithCtx(q.releaseCtx, result, nodeID)
}
@ -1461,7 +1461,7 @@ func (q *queryCollection) publishFailedQueryResultWithCtx(ctx context.Context, m
case commonpb.MsgType_Retrieve:
retrieveMsg := msg.(*msgstream.RetrieveMsg)
baseResult.MsgType = commonpb.MsgType_RetrieveResult
metrics.QueryNodeSQCount.WithLabelValues(metrics.QueryNodeMetricLabelFail, metrics.QueryNodeQueryTypeQuery, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
metrics.QueryNodeSQCount.WithLabelValues(metrics.FailLabel, metrics.QueryLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
return q.publishRetrieveResult(&internalpb.RetrieveResults{
Base: baseResult,
Status: &commonpb.Status{ErrorCode: commonpb.ErrorCode_UnexpectedError, Reason: errMsg},
@ -1472,7 +1472,7 @@ func (q *queryCollection) publishFailedQueryResultWithCtx(ctx context.Context, m
case commonpb.MsgType_Search:
searchMsg := msg.(*msgstream.SearchMsg)
baseResult.MsgType = commonpb.MsgType_SearchResult
metrics.QueryNodeSQCount.WithLabelValues(metrics.QueryNodeMetricLabelFail, metrics.QueryNodeQueryTypeSearch, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
metrics.QueryNodeSQCount.WithLabelValues(metrics.FailLabel, metrics.SearchLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Inc()
return q.publishSearchResultWithCtx(ctx, &internalpb.SearchResults{
Base: baseResult,
Status: &commonpb.Status{ErrorCode: commonpb.ErrorCode_UnexpectedError, Reason: errMsg},

View File

@ -32,12 +32,13 @@ import (
"encoding/binary"
"errors"
"fmt"
"github.com/milvus-io/milvus/internal/metrics"
"github.com/milvus-io/milvus/internal/util/timerecord"
"strconv"
"sync"
"unsafe"
"github.com/milvus-io/milvus/internal/metrics"
"github.com/milvus-io/milvus/internal/util/timerecord"
"github.com/bits-and-blooms/bloom/v3"
"github.com/golang/protobuf/proto"
"github.com/stretchr/testify/assert"
@ -312,7 +313,7 @@ func (s *Segment) search(plan *SearchPlan,
log.Debug("do search on segment", zap.Int64("segmentID", s.segmentID), zap.Int32("segmentType", int32(s.segmentType)))
tr := timerecord.NewTimeRecorder("cgoSearch")
status := C.Search(s.segmentPtr, plan.cSearchPlan, cPlaceHolderGroup, ts, &searchResult.cSearchResult, C.int64_t(s.segmentID))
metrics.QueryNodeSQSegmentLatencyInCore.WithLabelValues(metrics.QueryNodeQueryTypeSearch, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.QueryNodeSQSegmentLatencyInCore.WithLabelValues(metrics.SearchLabel, fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
if err := HandleCStatus(&status, "Search failed"); err != nil {
return nil, err
}
@ -342,7 +343,7 @@ func (s *Segment) retrieve(plan *RetrievePlan) (*segcorepb.RetrieveResults, erro
ts := C.uint64_t(plan.Timestamp)
tr := timerecord.NewTimeRecorder("cgoRetrieve")
status := C.Retrieve(s.segmentPtr, plan.cRetrievePlan, ts, &retrieveResult.cRetrieveResult)
metrics.QueryNodeSQSegmentLatencyInCore.WithLabelValues(metrics.QueryNodeQueryTypeQuery,
metrics.QueryNodeSQSegmentLatencyInCore.WithLabelValues(metrics.QueryLabel,
fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
if err := HandleCStatus(&status, "Retrieve failed"); err != nil {
return nil, err

View File

@ -214,8 +214,8 @@ func (s *streaming) search(searchReqs []*searchRequest, collID UniqueID, partIDs
err2 = err
return
}
metrics.QueryNodeSQSegmentLatency.WithLabelValues(metrics.QueryNodeQueryTypeSearch,
metrics.QueryNodeSegTypeGrowing,
metrics.QueryNodeSQSegmentLatency.WithLabelValues(metrics.SearchLabel,
metrics.GrowingSegmentLabel,
fmt.Sprint(Params.QueryNodeCfg.QueryNodeID)).Observe(float64(tr.ElapseSpan().Milliseconds()))
segmentLock.Lock()
searchResults = append(searchResults, searchResult)