mirror of https://github.com/milvus-io/milvus.git
Refine QueryCoordV2 metrics (#20461)
Signed-off-by: yah01 <yang.cen@zilliz.com> Signed-off-by: yah01 <yang.cen@zilliz.com>pull/20456/head
parent
6bff0b6688
commit
a68cec85af
|
@ -77,29 +77,12 @@ var (
|
||||||
Buckets: []float64{0, 5, 10, 20, 40, 100, 200, 400, 1000, 10000},
|
Buckets: []float64{0, 5, 10, 20, 40, 100, 200, 400, 1000, 10000},
|
||||||
}, []string{})
|
}, []string{})
|
||||||
|
|
||||||
QueryCoordNumChildTasks = prometheus.NewGaugeVec(
|
QueryCoordTaskNum = prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Namespace: milvusNamespace,
|
Namespace: milvusNamespace,
|
||||||
Subsystem: typeutil.QueryCoordRole,
|
Subsystem: typeutil.QueryCoordRole,
|
||||||
Name: "child_task_num",
|
Name: "task_num",
|
||||||
Help: "number of child tasks in QueryCoord's queue",
|
Help: "the number of tasks in QueryCoord's scheduler",
|
||||||
}, []string{})
|
|
||||||
|
|
||||||
QueryCoordNumParentTasks = prometheus.NewGaugeVec(
|
|
||||||
prometheus.GaugeOpts{
|
|
||||||
Namespace: milvusNamespace,
|
|
||||||
Subsystem: typeutil.QueryCoordRole,
|
|
||||||
Name: "parent_task_num",
|
|
||||||
Help: "number of parent tasks in QueryCoord's queue",
|
|
||||||
}, []string{})
|
|
||||||
|
|
||||||
QueryCoordChildTaskLatency = prometheus.NewHistogramVec(
|
|
||||||
prometheus.HistogramOpts{
|
|
||||||
Namespace: milvusNamespace,
|
|
||||||
Subsystem: typeutil.QueryCoordRole,
|
|
||||||
Name: "child_task_latency",
|
|
||||||
Help: "latency of child tasks",
|
|
||||||
Buckets: buckets,
|
|
||||||
}, []string{})
|
}, []string{})
|
||||||
|
|
||||||
QueryCoordNumQueryNodes = prometheus.NewGaugeVec(
|
QueryCoordNumQueryNodes = prometheus.NewGaugeVec(
|
||||||
|
@ -119,8 +102,6 @@ func RegisterQueryCoord(registry *prometheus.Registry) {
|
||||||
registry.MustRegister(QueryCoordReleaseCount)
|
registry.MustRegister(QueryCoordReleaseCount)
|
||||||
registry.MustRegister(QueryCoordLoadLatency)
|
registry.MustRegister(QueryCoordLoadLatency)
|
||||||
registry.MustRegister(QueryCoordReleaseLatency)
|
registry.MustRegister(QueryCoordReleaseLatency)
|
||||||
registry.MustRegister(QueryCoordNumChildTasks)
|
registry.MustRegister(QueryCoordTaskNum)
|
||||||
registry.MustRegister(QueryCoordNumParentTasks)
|
|
||||||
registry.MustRegister(QueryCoordChildTaskLatency)
|
|
||||||
registry.MustRegister(QueryCoordNumQueryNodes)
|
registry.MustRegister(QueryCoordNumQueryNodes)
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ import (
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/metrics"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||||
|
@ -227,6 +228,7 @@ func (job *LoadCollectionJob) Execute() error {
|
||||||
return utils.WrapError(msg, err)
|
return utils.WrapError(msg, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metrics.QueryCoordNumCollections.WithLabelValues().Inc()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -285,6 +287,7 @@ func (job *ReleaseCollectionJob) Execute() error {
|
||||||
|
|
||||||
job.targetMgr.RemoveCollection(req.GetCollectionID())
|
job.targetMgr.RemoveCollection(req.GetCollectionID())
|
||||||
waitCollectionReleased(job.dist, req.GetCollectionID())
|
waitCollectionReleased(job.dist, req.GetCollectionID())
|
||||||
|
metrics.QueryCoordNumCollections.WithLabelValues().Dec()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -422,6 +425,7 @@ func (job *LoadPartitionJob) Execute() error {
|
||||||
return utils.WrapError(msg, err)
|
return utils.WrapError(msg, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metrics.QueryCoordNumCollections.WithLabelValues().Inc()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,5 +514,6 @@ func (job *ReleasePartitionJob) Execute() error {
|
||||||
job.targetMgr.RemovePartition(req.GetCollectionID(), toRelease...)
|
job.targetMgr.RemovePartition(req.GetCollectionID(), toRelease...)
|
||||||
waitCollectionReleased(job.dist, req.GetCollectionID(), toRelease...)
|
waitCollectionReleased(job.dist, req.GetCollectionID(), toRelease...)
|
||||||
}
|
}
|
||||||
|
metrics.QueryCoordNumCollections.WithLabelValues().Dec()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import (
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/metrics"
|
||||||
"github.com/milvus-io/milvus/internal/util/timerecord"
|
"github.com/milvus-io/milvus/internal/util/timerecord"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
||||||
|
@ -253,6 +254,8 @@ func (s *Server) initMeta() error {
|
||||||
log.Error("failed to recover collections")
|
log.Error("failed to recover collections")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
metrics.QueryCoordNumCollections.WithLabelValues().Set(float64(len(s.meta.GetAll())))
|
||||||
|
|
||||||
err = s.meta.ReplicaManager.Recover()
|
err = s.meta.ReplicaManager.Recover()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("failed to recover replicas")
|
log.Error("failed to recover replicas")
|
||||||
|
|
|
@ -16,7 +16,11 @@
|
||||||
|
|
||||||
package session
|
package session
|
||||||
|
|
||||||
import "sync"
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
type Manager interface {
|
type Manager interface {
|
||||||
Add(node *NodeInfo)
|
Add(node *NodeInfo)
|
||||||
|
@ -34,12 +38,14 @@ func (m *NodeManager) Add(node *NodeInfo) {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
defer m.mu.Unlock()
|
defer m.mu.Unlock()
|
||||||
m.nodes[node.ID()] = node
|
m.nodes[node.ID()] = node
|
||||||
|
metrics.QueryCoordNumQueryNodes.WithLabelValues().Set(float64(len(m.nodes)))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NodeManager) Remove(nodeID int64) {
|
func (m *NodeManager) Remove(nodeID int64) {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
defer m.mu.Unlock()
|
defer m.mu.Unlock()
|
||||||
delete(m.nodes, nodeID)
|
delete(m.nodes, nodeID)
|
||||||
|
metrics.QueryCoordNumQueryNodes.WithLabelValues().Set(float64(len(m.nodes)))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NodeManager) Get(nodeID int64) *NodeInfo {
|
func (m *NodeManager) Get(nodeID int64) *NodeInfo {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import (
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/metrics"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||||
|
@ -232,6 +233,7 @@ func (scheduler *taskScheduler) Add(task Task) error {
|
||||||
log.Warn("failed to add task", zap.String("task", task.String()))
|
log.Warn("failed to add task", zap.String("task", task.String()))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
metrics.QueryCoordTaskNum.WithLabelValues().Set(float64(scheduler.tasks.Len()))
|
||||||
log.Info("task added", zap.String("task", task.String()))
|
log.Info("task added", zap.String("task", task.String()))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -582,6 +584,7 @@ func (scheduler *taskScheduler) remove(task Task) {
|
||||||
log = log.With(zap.String("channel", task.Channel()))
|
log = log.With(zap.String("channel", task.Channel()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metrics.QueryCoordTaskNum.WithLabelValues().Set(float64(scheduler.tasks.Len()))
|
||||||
log.Debug("task removed")
|
log.Debug("task removed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue