mirror of https://github.com/milvus-io/milvus.git
fix: [2.5] Remove frequently updating metric to avoid mutex contention (#38778)
issue: https://github.com/milvus-io/milvus/issues/37630 Reduce the frequency of `updateIndexTasksMetrics` to avoid holding the mutex for long periods. pr: https://github.com/milvus-io/milvus/pull/38775 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>pull/39330/head
parent
1602390734
commit
c741b8be2b
|
@ -27,6 +27,7 @@ import (
|
|||
"github.com/hashicorp/golang-lru/v2/expirable"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/samber/lo"
|
||||
"go.uber.org/atomic"
|
||||
"go.uber.org/zap"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
|
@ -65,6 +66,8 @@ type indexMeta struct {
|
|||
|
||||
// segmentID -> indexID -> segmentIndex
|
||||
segmentIndexes map[UniqueID]map[UniqueID]*model.SegmentIndex
|
||||
|
||||
lastUpdateMetricTime atomic.Time
|
||||
}
|
||||
|
||||
func newIndexTaskStats(s *model.SegmentIndex) *metricsinfo.IndexTaskStats {
|
||||
|
@ -205,6 +208,10 @@ func (m *indexMeta) updateSegIndexMeta(segIdx *model.SegmentIndex, updateFunc fu
|
|||
}
|
||||
|
||||
func (m *indexMeta) updateIndexTasksMetrics() {
|
||||
if time.Since(m.lastUpdateMetricTime.Load()) < 120*time.Second {
|
||||
return
|
||||
}
|
||||
defer m.lastUpdateMetricTime.Store(time.Now())
|
||||
taskMetrics := make(map[UniqueID]map[commonpb.IndexState]int)
|
||||
for _, segIdx := range m.segmentBuildInfo.List() {
|
||||
if segIdx.IsDeleted {
|
||||
|
@ -233,6 +240,7 @@ func (m *indexMeta) updateIndexTasksMetrics() {
|
|||
}
|
||||
}
|
||||
}
|
||||
log.Ctx(m.ctx).Info("update index metric", zap.Int("collectionNum", len(taskMetrics)))
|
||||
}
|
||||
|
||||
func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool {
|
||||
|
@ -874,7 +882,7 @@ func (m *indexMeta) GetAllSegIndexes() map[int64]*model.SegmentIndex {
|
|||
tasks := m.segmentBuildInfo.List()
|
||||
segIndexes := make(map[int64]*model.SegmentIndex, len(tasks))
|
||||
for buildID, segIndex := range tasks {
|
||||
segIndexes[buildID] = model.CloneSegmentIndex(segIndex)
|
||||
segIndexes[buildID] = segIndex
|
||||
}
|
||||
return segIndexes
|
||||
}
|
||||
|
@ -971,22 +979,6 @@ func (m *indexMeta) CheckCleanSegmentIndex(buildID UniqueID) (bool, *model.Segme
|
|||
return true, nil
|
||||
}
|
||||
|
||||
func (m *indexMeta) GetMetasByNodeID(nodeID UniqueID) []*model.SegmentIndex {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
||||
metas := make([]*model.SegmentIndex, 0)
|
||||
for _, segIndex := range m.segmentBuildInfo.List() {
|
||||
if segIndex.IsDeleted {
|
||||
continue
|
||||
}
|
||||
if nodeID == segIndex.NodeID {
|
||||
metas = append(metas, model.CloneSegmentIndex(segIndex))
|
||||
}
|
||||
}
|
||||
return metas
|
||||
}
|
||||
|
||||
func (m *indexMeta) getSegmentsIndexStates(collectionID UniqueID, segmentIDs []UniqueID) map[int64]map[int64]*indexpb.SegmentIndexState {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
|
|
@ -170,7 +170,9 @@ type taskScheduler struct {
|
|||
channelTasks map[replicaChannelIndex]Task
|
||||
processQueue *taskQueue
|
||||
waitQueue *taskQueue
|
||||
taskStats *expirable.LRU[UniqueID, Task]
|
||||
|
||||
taskStats *expirable.LRU[UniqueID, Task]
|
||||
lastUpdateMetricTime atomic.Time
|
||||
}
|
||||
|
||||
func NewScheduler(ctx context.Context,
|
||||
|
@ -292,6 +294,9 @@ func (scheduler *taskScheduler) Add(task Task) error {
|
|||
}
|
||||
|
||||
func (scheduler *taskScheduler) updateTaskMetrics() {
|
||||
if time.Since(scheduler.lastUpdateMetricTime.Load()) < 30*time.Second {
|
||||
return
|
||||
}
|
||||
segmentGrowNum, segmentReduceNum, segmentMoveNum := 0, 0, 0
|
||||
channelGrowNum, channelReduceNum, channelMoveNum := 0, 0, 0
|
||||
for _, task := range scheduler.segmentTasks {
|
||||
|
@ -324,6 +329,7 @@ func (scheduler *taskScheduler) updateTaskMetrics() {
|
|||
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelGrowTaskLabel).Set(float64(channelGrowNum))
|
||||
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelReduceTaskLabel).Set(float64(channelReduceNum))
|
||||
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelMoveTaskLabel).Set(float64(channelMoveNum))
|
||||
scheduler.lastUpdateMetricTime.Store(time.Now())
|
||||
}
|
||||
|
||||
// check whether the task is valid to add,
|
||||
|
|
Loading…
Reference in New Issue