fix: Fix logic dead lock when delegator has high memory usage (#36065)

issue: #36064
when delegator has high memory usage, load l0 segment will failed. and
balance segment task will blocked by load segment task, then delegator
cann't free memory by moving out some segment, causes a logic dead lock.

this PR remove the limit for balance, we permit segment and balance
execute in parallel. which won't cause side effect due to:
1. one segment can only has one task in qc's scheduler, and load/release
task will replace balance task if necessary
2. balance speed has been limited, and it won't block load segment task.

3. if collection has load task and balance task at same time, load task
will be scheduled first due to high proirity.

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/36099/head
wei liu 2024-09-09 10:21:06 +08:00 committed by GitHub
parent 208c8a2328
commit 30a99b66c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 24 additions and 18 deletions

View File

@ -30,15 +30,18 @@ import (
)
type SegmentAssignPlan struct {
Segment *meta.Segment
Replica *meta.Replica
From int64 // -1 if empty
To int64
Segment *meta.Segment
Replica *meta.Replica
From int64 // -1 if empty
To int64
FromScore int64
ToScore int64
SegmentScore int64
}
func (segPlan *SegmentAssignPlan) ToString() string {
return fmt.Sprintf("SegmentPlan:[collectionID: %d, replicaID: %d, segmentID: %d, from: %d, to: %d]\n",
segPlan.Segment.CollectionID, segPlan.Replica.GetID(), segPlan.Segment.ID, segPlan.From, segPlan.To)
return fmt.Sprintf("SegmentPlan:[collectionID: %d, replicaID: %d, segmentID: %d, from: %d, to: %d, fromScore: %d, toScore: %d, segmentScore: %d]\n",
segPlan.Segment.CollectionID, segPlan.Replica.GetID(), segPlan.Segment.ID, segPlan.From, segPlan.To, segPlan.FromScore, segPlan.ToScore, segPlan.SegmentScore)
}
type ChannelAssignPlan struct {

View File

@ -99,14 +99,24 @@ func (b *ScoreBasedBalancer) AssignSegment(collectionID int64, segments []*meta.
return
}
from := int64(-1)
fromScore := int64(0)
if sourceNode != nil {
from = sourceNode.nodeID
fromScore = int64(sourceNode.getPriority())
}
plan := SegmentAssignPlan{
From: -1,
To: targetNode.nodeID,
Segment: s,
From: from,
To: targetNode.nodeID,
Segment: s,
FromScore: fromScore,
ToScore: int64(targetNode.getPriority()),
SegmentScore: int64(priorityChange),
}
plans = append(plans, plan)
// update the targetNode's score
// update the sourceNode and targetNode's score
if sourceNode != nil {
sourceNode.setPriority(sourceNode.getPriority() - priorityChange)
}

View File

@ -119,11 +119,6 @@ func (b *BalanceChecker) replicasToBalance() []int64 {
return nil
}
// scheduler is handling segment task, skip
if b.scheduler.GetSegmentTaskNum() != 0 {
return nil
}
// iterator one normal collection in one round
normalReplicasToBalance := make([]int64, 0)
hasUnbalancedCollection := false

View File

@ -220,9 +220,7 @@ func (suite *BalanceCheckerTestSuite) TestBusyScheduler() {
return 1
})
replicasToBalance := suite.checker.replicasToBalance()
suite.Empty(replicasToBalance)
segPlans, _ := suite.checker.balanceReplicas(replicasToBalance)
suite.Empty(segPlans)
suite.Len(replicasToBalance, 1)
}
func (suite *BalanceCheckerTestSuite) TestStoppingBalance() {