fix: [2.5] Remove frequently updating metric to avoid mutex contention (#38778)

issue: https://github.com/milvus-io/milvus/issues/37630

Reduce the frequency of `updateIndexTasksMetrics` to avoid holding the
mutex for long periods.

pr: https://github.com/milvus-io/milvus/pull/38775

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
pull/39330/head
yihao.dai 2025-01-16 11:51:02 +08:00 committed by GitHub
parent 1602390734
commit c741b8be2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 18 deletions

View File

@ -27,6 +27,7 @@ import (
"github.com/hashicorp/golang-lru/v2/expirable"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"go.uber.org/atomic"
"go.uber.org/zap"
"google.golang.org/protobuf/proto"
@ -65,6 +66,8 @@ type indexMeta struct {
// segmentID -> indexID -> segmentIndex
segmentIndexes map[UniqueID]map[UniqueID]*model.SegmentIndex
lastUpdateMetricTime atomic.Time
}
func newIndexTaskStats(s *model.SegmentIndex) *metricsinfo.IndexTaskStats {
@ -205,6 +208,10 @@ func (m *indexMeta) updateSegIndexMeta(segIdx *model.SegmentIndex, updateFunc fu
}
func (m *indexMeta) updateIndexTasksMetrics() {
if time.Since(m.lastUpdateMetricTime.Load()) < 120*time.Second {
return
}
defer m.lastUpdateMetricTime.Store(time.Now())
taskMetrics := make(map[UniqueID]map[commonpb.IndexState]int)
for _, segIdx := range m.segmentBuildInfo.List() {
if segIdx.IsDeleted {
@ -233,6 +240,7 @@ func (m *indexMeta) updateIndexTasksMetrics() {
}
}
}
log.Ctx(m.ctx).Info("update index metric", zap.Int("collectionNum", len(taskMetrics)))
}
func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool {
@ -874,7 +882,7 @@ func (m *indexMeta) GetAllSegIndexes() map[int64]*model.SegmentIndex {
tasks := m.segmentBuildInfo.List()
segIndexes := make(map[int64]*model.SegmentIndex, len(tasks))
for buildID, segIndex := range tasks {
segIndexes[buildID] = model.CloneSegmentIndex(segIndex)
segIndexes[buildID] = segIndex
}
return segIndexes
}
@ -971,22 +979,6 @@ func (m *indexMeta) CheckCleanSegmentIndex(buildID UniqueID) (bool, *model.Segme
return true, nil
}
func (m *indexMeta) GetMetasByNodeID(nodeID UniqueID) []*model.SegmentIndex {
m.RLock()
defer m.RUnlock()
metas := make([]*model.SegmentIndex, 0)
for _, segIndex := range m.segmentBuildInfo.List() {
if segIndex.IsDeleted {
continue
}
if nodeID == segIndex.NodeID {
metas = append(metas, model.CloneSegmentIndex(segIndex))
}
}
return metas
}
func (m *indexMeta) getSegmentsIndexStates(collectionID UniqueID, segmentIDs []UniqueID) map[int64]map[int64]*indexpb.SegmentIndexState {
m.RLock()
defer m.RUnlock()

View File

@ -170,7 +170,9 @@ type taskScheduler struct {
channelTasks map[replicaChannelIndex]Task
processQueue *taskQueue
waitQueue *taskQueue
taskStats *expirable.LRU[UniqueID, Task]
taskStats *expirable.LRU[UniqueID, Task]
lastUpdateMetricTime atomic.Time
}
func NewScheduler(ctx context.Context,
@ -292,6 +294,9 @@ func (scheduler *taskScheduler) Add(task Task) error {
}
func (scheduler *taskScheduler) updateTaskMetrics() {
if time.Since(scheduler.lastUpdateMetricTime.Load()) < 30*time.Second {
return
}
segmentGrowNum, segmentReduceNum, segmentMoveNum := 0, 0, 0
channelGrowNum, channelReduceNum, channelMoveNum := 0, 0, 0
for _, task := range scheduler.segmentTasks {
@ -324,6 +329,7 @@ func (scheduler *taskScheduler) updateTaskMetrics() {
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelGrowTaskLabel).Set(float64(channelGrowNum))
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelReduceTaskLabel).Set(float64(channelReduceNum))
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelMoveTaskLabel).Set(float64(channelMoveNum))
scheduler.lastUpdateMetricTime.Store(time.Now())
}
// check whether the task is valid to add,