fix: query node may stuck at stopping progress (#33104)

issue: #33103 
when try to do stopping balance for stopping query node, balancer will
try to get node list from replica.GetNodes, then check whether node is
stopping, if so, stopping balance will be triggered for this replica.

after the replica refactor, replica.GetNodes only return rwNodes, and
the stopping node maintains in roNodes, so balancer couldn't find
replica which contains stopping node, and stopping balance for replica
won't be triggered, then query node will stuck forever due to
segment/channel doesn't move out.

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/33150/head
wei liu 2024-05-20 10:21:38 +08:00 committed by GitHub
parent c6e2dd05fc
commit a7f6193bfc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 182 additions and 352 deletions

View File

@ -77,68 +77,51 @@ func (b *ChannelLevelScoreBalancer) BalanceReplica(replica *meta.Replica) ([]Seg
return nil, nil
}
onlineNodes := make([]int64, 0)
offlineNodes := make([]int64, 0)
// read only nodes is offline in current replica.
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
rwNodes := replica.GetChannelRWNodes(channelName)
roNodes := replica.GetRONodes()
// mark channel's outbound access node as offline
channelRWNode := typeutil.NewUniqueSet(replica.GetChannelRWNodes(channelName)...)
channelRWNode := typeutil.NewUniqueSet(rwNodes...)
channelDist := b.dist.ChannelDistManager.GetByFilter(meta.WithChannelName2Channel(channelName), meta.WithReplica2Channel(replica))
for _, channel := range channelDist {
if !channelRWNode.Contain(channel.Node) {
offlineNodes = append(offlineNodes, channel.Node)
roNodes = append(roNodes, channel.Node)
}
}
segmentDist := b.dist.SegmentDistManager.GetByFilter(meta.WithChannel(channelName), meta.WithReplica(replica))
for _, segment := range segmentDist {
if !channelRWNode.Contain(segment.Node) {
offlineNodes = append(offlineNodes, segment.Node)
roNodes = append(roNodes, segment.Node)
}
}
for nid := range channelRWNode {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
if len(rwNodes) == 0 {
// no available nodes to balance
return nil, nil
}
if len(offlineNodes) != 0 {
if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes))
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil
}
log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes),
zap.Any("available nodes", onlineNodes),
zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", rwNodes),
)
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, channelName, onlineNodes, offlineNodes)...)
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, channelName, rwNodes, roNodes)...)
if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, channelName, onlineNodes, offlineNodes)...)
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, channelName, rwNodes, roNodes)...)
}
} else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, channelName, onlineNodes)...)
channelPlans = append(channelPlans, b.genChannelPlan(replica, channelName, rwNodes)...)
}
if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, channelName, onlineNodes)...)
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, channelName, rwNodes)...)
}
}
}

View File

@ -1162,8 +1162,13 @@ func (suite *ChannelLevelScoreBalancerTestSuite) TestExclusiveChannelBalance_Nod
},
}...)
suite.balancer.nodeManager.Stopping(ch1Nodes[0])
suite.balancer.nodeManager.Stopping(ch2Nodes[0])
balancer.nodeManager.Stopping(ch1Nodes[0])
balancer.nodeManager.Stopping(ch2Nodes[0])
suite.balancer.meta.ResourceManager.HandleNodeStopping(ch1Nodes[0])
suite.balancer.meta.ResourceManager.HandleNodeStopping(ch2Nodes[0])
utils.RecoverAllCollection(balancer.meta)
replica = balancer.meta.ReplicaManager.Get(replica.GetID())
sPlans, cPlans := balancer.BalanceReplica(replica)
suite.Len(sPlans, 0)
suite.Len(cPlans, 2)

View File

@ -466,67 +466,49 @@ func (b *MultiTargetBalancer) BalanceReplica(replica *meta.Replica) ([]SegmentAs
return nil, nil
}
onlineNodes := make([]int64, 0)
offlineNodes := make([]int64, 0)
rwNodes := replica.GetRWNodes()
roNodes := replica.GetRONodes()
// read only nodes is offline in current replica.
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
for _, nid := range replica.GetNodes() {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
if len(rwNodes) == 0 {
// no available nodes to balance
return nil, nil
}
// print current distribution before generating plans
segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0)
if len(offlineNodes) != 0 {
if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes))
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil
}
log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes),
zap.Any("available nodes", onlineNodes),
zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", rwNodes),
)
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, onlineNodes, offlineNodes)...)
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, rwNodes, roNodes)...)
if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, onlineNodes, offlineNodes)...)
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, rwNodes, roNodes)...)
}
} else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, onlineNodes)...)
channelPlans = append(channelPlans, b.genChannelPlan(replica, rwNodes)...)
}
if len(channelPlans) == 0 {
segmentPlans = b.genSegmentPlan(replica)
segmentPlans = b.genSegmentPlan(replica, rwNodes)
}
}
return segmentPlans, channelPlans
}
func (b *MultiTargetBalancer) genSegmentPlan(replica *meta.Replica) []SegmentAssignPlan {
func (b *MultiTargetBalancer) genSegmentPlan(replica *meta.Replica, rwNodes []int64) []SegmentAssignPlan {
// get segments distribution on replica level and global level
nodeSegments := make(map[int64][]*meta.Segment)
globalNodeSegments := make(map[int64][]*meta.Segment)
for _, node := range replica.GetNodes() {
for _, node := range rwNodes {
dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))
segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool {
return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil &&

View File

@ -126,9 +126,7 @@ func (b *RowCountBasedBalancer) AssignChannel(channels []*meta.DmChannel, nodes
func (b *RowCountBasedBalancer) convertToNodeItemsBySegment(nodeIDs []int64) []*nodeItem {
ret := make([]*nodeItem, 0, len(nodeIDs))
for _, nodeInfo := range b.getNodes(nodeIDs) {
node := nodeInfo.ID()
for _, node := range nodeIDs {
// calculate sealed segment row count on node
segments := b.dist.SegmentDistManager.GetByFilter(meta.WithNodeID(node))
rowcnt := 0
@ -151,8 +149,7 @@ func (b *RowCountBasedBalancer) convertToNodeItemsBySegment(nodeIDs []int64) []*
func (b *RowCountBasedBalancer) convertToNodeItemsByChannel(nodeIDs []int64) []*nodeItem {
ret := make([]*nodeItem, 0, len(nodeIDs))
for _, nodeInfo := range b.getNodes(nodeIDs) {
node := nodeInfo.ID()
for _, node := range nodeIDs {
channels := b.dist.ChannelDistManager.GetByFilter(meta.WithNodeID2Channel(node))
// more channel num, less priority
@ -172,71 +169,52 @@ func (b *RowCountBasedBalancer) BalanceReplica(replica *meta.Replica) ([]Segment
return nil, nil
}
onlineNodes := make([]int64, 0)
offlineNodes := make([]int64, 0)
// read only nodes is offline in current replica.
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
for _, nid := range replica.GetNodes() {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
rwNodes := replica.GetRWNodes()
roNodes := replica.GetRONodes()
if len(rwNodes) == 0 {
// no available nodes to balance
return nil, nil
}
segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0)
if len(offlineNodes) != 0 {
if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes))
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil
}
log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes),
zap.Any("available nodes", onlineNodes),
zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", rwNodes),
)
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, onlineNodes, offlineNodes)...)
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, rwNodes, roNodes)...)
if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, onlineNodes, offlineNodes)...)
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, rwNodes, roNodes)...)
}
} else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, onlineNodes)...)
channelPlans = append(channelPlans, b.genChannelPlan(replica, rwNodes)...)
}
if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, onlineNodes)...)
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, rwNodes)...)
}
}
return segmentPlans, channelPlans
}
func (b *RowCountBasedBalancer) genStoppingSegmentPlan(replica *meta.Replica, onlineNodes []int64, offlineNodes []int64) []SegmentAssignPlan {
func (b *RowCountBasedBalancer) genStoppingSegmentPlan(replica *meta.Replica, rwNodes []int64, roNodes []int64) []SegmentAssignPlan {
segmentPlans := make([]SegmentAssignPlan, 0)
for _, nodeID := range offlineNodes {
for _, nodeID := range roNodes {
dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(nodeID))
segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool {
return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil &&
b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.NextTarget) != nil &&
segment.GetLevel() != datapb.SegmentLevel_L0
})
plans := b.AssignSegment(replica.GetCollectionID(), segments, onlineNodes, false)
plans := b.AssignSegment(replica.GetCollectionID(), segments, rwNodes, false)
for i := range plans {
plans[i].From = nodeID
plans[i].Replica = replica
@ -246,13 +224,13 @@ func (b *RowCountBasedBalancer) genStoppingSegmentPlan(replica *meta.Replica, on
return segmentPlans
}
func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, onlineNodes []int64) []SegmentAssignPlan {
func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, rwNodes []int64) []SegmentAssignPlan {
segmentsToMove := make([]*meta.Segment, 0)
nodeRowCount := make(map[int64]int, 0)
segmentDist := make(map[int64][]*meta.Segment)
totalRowCount := 0
for _, node := range onlineNodes {
for _, node := range rwNodes {
dist := b.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))
segments := lo.Filter(dist, func(segment *meta.Segment, _ int) bool {
return b.targetMgr.GetSealedSegment(segment.GetCollectionID(), segment.GetID(), meta.CurrentTarget) != nil &&
@ -273,7 +251,7 @@ func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, onlineNode
}
// find nodes with less row count than average
average := totalRowCount / len(onlineNodes)
average := totalRowCount / len(rwNodes)
nodesWithLessRow := make([]int64, 0)
for node, segments := range segmentDist {
sort.Slice(segments, func(i, j int) bool {
@ -313,11 +291,11 @@ func (b *RowCountBasedBalancer) genSegmentPlan(replica *meta.Replica, onlineNode
return segmentPlans
}
func (b *RowCountBasedBalancer) genStoppingChannelPlan(replica *meta.Replica, onlineNodes []int64, offlineNodes []int64) []ChannelAssignPlan {
func (b *RowCountBasedBalancer) genStoppingChannelPlan(replica *meta.Replica, rwNodes []int64, roNodes []int64) []ChannelAssignPlan {
channelPlans := make([]ChannelAssignPlan, 0)
for _, nodeID := range offlineNodes {
for _, nodeID := range roNodes {
dmChannels := b.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(nodeID))
plans := b.AssignChannel(dmChannels, onlineNodes, false)
plans := b.AssignChannel(dmChannels, rwNodes, false)
for i := range plans {
plans[i].From = nodeID
plans[i].Replica = replica
@ -327,20 +305,20 @@ func (b *RowCountBasedBalancer) genStoppingChannelPlan(replica *meta.Replica, on
return channelPlans
}
func (b *RowCountBasedBalancer) genChannelPlan(replica *meta.Replica, onlineNodes []int64) []ChannelAssignPlan {
func (b *RowCountBasedBalancer) genChannelPlan(replica *meta.Replica, rwNodes []int64) []ChannelAssignPlan {
channelPlans := make([]ChannelAssignPlan, 0)
if len(onlineNodes) > 1 {
if len(rwNodes) > 1 {
// start to balance channels on all available nodes
channelDist := b.dist.ChannelDistManager.GetByFilter(meta.WithReplica2Channel(replica))
if len(channelDist) == 0 {
return nil
}
average := int(math.Ceil(float64(len(channelDist)) / float64(len(onlineNodes))))
average := int(math.Ceil(float64(len(channelDist)) / float64(len(rwNodes))))
// find nodes with less channel count than average
nodeWithLessChannel := make([]int64, 0)
channelsToMove := make([]*meta.DmChannel, 0)
for _, node := range onlineNodes {
for _, node := range rwNodes {
channels := b.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(node))
if len(channels) <= average {

View File

@ -409,8 +409,8 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
segmentCnts: []int{1, 2},
states: []session.State{session.NodeStateNormal, session.NodeStateNormal},
distributions: map[int64][]*meta.Segment{
1: {{SegmentInfo: &datapb.SegmentInfo{ID: 1, CollectionID: 1, NumOfRows: 30}, Node: 11}},
2: {
11: {{SegmentInfo: &datapb.SegmentInfo{ID: 1, CollectionID: 1, NumOfRows: 30}, Node: 11}},
22: {
{SegmentInfo: &datapb.SegmentInfo{ID: 2, CollectionID: 1, NumOfRows: 20}, Node: 22},
{SegmentInfo: &datapb.SegmentInfo{ID: 3, CollectionID: 1, NumOfRows: 30}, Node: 22},
},
@ -455,7 +455,7 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
collection.LoadType = querypb.LoadType_LoadCollection
balancer.meta.CollectionManager.PutCollection(collection)
balancer.meta.CollectionManager.PutPartition(utils.CreateTestPartition(1, 1))
balancer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, append(c.nodes, c.notExistedNodes...)))
balancer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, c.nodes))
suite.broker.ExpectedCalls = nil
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, int64(1)).Return(nil, segments, nil)
balancer.targetMgr.UpdateCollectionNextTarget(int64(1))
@ -481,6 +481,7 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
suite.balancer.nodeManager.Add(nodeInfo)
suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i])
}
utils.RecoverAllCollection(balancer.meta)
segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, 1)
if !c.multiple {
@ -492,10 +493,11 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
}
// clear distribution
for node := range c.distributions {
for _, node := range c.nodes {
balancer.meta.ResourceManager.HandleNodeDown(node)
balancer.nodeManager.Remove(node)
balancer.dist.SegmentDistManager.Update(node)
}
for node := range c.distributionChannels {
balancer.dist.ChannelDistManager.Update(node)
}
})
@ -693,6 +695,8 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalanceOnPartStopping() {
suite.balancer.nodeManager.Add(nodeInfo)
suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i])
}
utils.RecoverAllCollection(balancer.meta)
segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, 1)
assertSegmentAssignPlanElementMatch(&suite.Suite, c.expectPlans, segmentPlans)
assertChannelAssignPlanElementMatch(&suite.Suite, c.expectChannelPlans, channelPlans)

View File

@ -141,8 +141,7 @@ func (b *ScoreBasedBalancer) hasEnoughBenefit(sourceNode *nodeItem, targetNode *
func (b *ScoreBasedBalancer) convertToNodeItems(collectionID int64, nodeIDs []int64) []*nodeItem {
ret := make([]*nodeItem, 0, len(nodeIDs))
for _, nodeInfo := range b.getNodes(nodeIDs) {
node := nodeInfo.ID()
for _, node := range nodeIDs {
priority := b.calculateScore(collectionID, node)
nodeItem := newNodeItem(priority, node)
ret = append(ret, &nodeItem)
@ -195,56 +194,38 @@ func (b *ScoreBasedBalancer) BalanceReplica(replica *meta.Replica) ([]SegmentAss
return nil, nil
}
onlineNodes := make([]int64, 0)
offlineNodes := make([]int64, 0)
rwNodes := replica.GetRWNodes()
roNodes := replica.GetRONodes()
// read only nodes is offline in current replica.
if replica.RONodesCount() > 0 {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet read only node, try to move out all segment/channel", zap.Int64s("node", replica.GetRONodes()))
offlineNodes = append(offlineNodes, replica.GetRONodes()...)
}
for _, nid := range replica.GetNodes() {
if isStopping, err := b.nodeManager.IsStoppingNode(nid); err != nil {
log.Info("not existed node", zap.Int64("nid", nid), zap.Error(err))
continue
} else if isStopping {
offlineNodes = append(offlineNodes, nid)
} else {
onlineNodes = append(onlineNodes, nid)
}
}
if len(onlineNodes) == 0 {
if len(rwNodes) == 0 {
// no available nodes to balance
return nil, nil
}
// print current distribution before generating plans
segmentPlans, channelPlans := make([]SegmentAssignPlan, 0), make([]ChannelAssignPlan, 0)
if len(offlineNodes) != 0 {
if len(roNodes) != 0 {
if !paramtable.Get().QueryCoordCfg.EnableStoppingBalance.GetAsBool() {
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", offlineNodes))
log.RatedInfo(10, "stopping balance is disabled!", zap.Int64s("stoppingNode", roNodes))
return nil, nil
}
log.Info("Handle stopping nodes",
zap.Any("stopping nodes", offlineNodes),
zap.Any("available nodes", onlineNodes),
zap.Any("stopping nodes", roNodes),
zap.Any("available nodes", rwNodes),
)
// handle stopped nodes here, have to assign segments on stopping nodes to nodes with the smallest score
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, onlineNodes, offlineNodes)...)
channelPlans = append(channelPlans, b.genStoppingChannelPlan(replica, rwNodes, roNodes)...)
if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, onlineNodes, offlineNodes)...)
segmentPlans = append(segmentPlans, b.genStoppingSegmentPlan(replica, rwNodes, roNodes)...)
}
} else {
if paramtable.Get().QueryCoordCfg.AutoBalanceChannel.GetAsBool() {
channelPlans = append(channelPlans, b.genChannelPlan(replica, onlineNodes)...)
channelPlans = append(channelPlans, b.genChannelPlan(replica, rwNodes)...)
}
if len(channelPlans) == 0 {
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, onlineNodes)...)
segmentPlans = append(segmentPlans, b.genSegmentPlan(replica, rwNodes)...)
}
}

View File

@ -439,6 +439,7 @@ func (suite *ScoreBasedBalancerTestSuite) TestBalanceOneRound() {
suite.balancer.nodeManager.Add(nodeInfo)
suite.balancer.meta.ResourceManager.HandleNodeUp(c.nodes[i])
}
utils.RecoverAllCollection(balancer.meta)
// 4. balance and verify result
segmentPlans, channelPlans := suite.getCollectionBalancePlans(balancer, c.collectionID)

View File

@ -101,12 +101,8 @@ func (b *BalanceChecker) replicasToBalance() []int64 {
}
replicas := b.meta.ReplicaManager.GetByCollection(cid)
for _, replica := range replicas {
for _, nodeID := range replica.GetNodes() {
isStopping, _ := b.nodeManager.IsStoppingNode(nodeID)
if isStopping {
stoppingReplicas = append(stoppingReplicas, replica.GetID())
break
}
if replica.RONodesCount() > 0 {
stoppingReplicas = append(stoppingReplicas, replica.GetID())
}
}
}

View File

@ -278,6 +278,14 @@ func (suite *BalanceCheckerTestSuite) TestStoppingBalance() {
suite.targetMgr.UpdateCollectionNextTarget(int64(cid2))
suite.targetMgr.UpdateCollectionCurrentTarget(int64(cid2))
mr1 := replica1.CopyForWrite()
mr1.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr1.IntoReplica())
mr2 := replica2.CopyForWrite()
mr2.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr2.IntoReplica())
// test stopping balance
idsToBalance := []int64{int64(replicaID1), int64(replicaID2)}
replicasToBalance := suite.checker.replicasToBalance()
@ -348,6 +356,14 @@ func (suite *BalanceCheckerTestSuite) TestTargetNotReady() {
suite.checker.meta.CollectionManager.PutCollection(collection2, partition2)
suite.checker.meta.ReplicaManager.Put(replica2)
mr1 := replica1.CopyForWrite()
mr1.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr1.IntoReplica())
mr2 := replica2.CopyForWrite()
mr2.AddRONode(1)
suite.checker.meta.ReplicaManager.Put(mr2.IntoReplica())
// test stopping balance
idsToBalance := []int64{int64(replicaID1)}
replicasToBalance := suite.checker.replicasToBalance()

View File

@ -130,7 +130,7 @@ func (c *ChannelChecker) getDmChannelDiff(collectionID int64,
return
}
dist := c.getChannelDist(replica)
dist := c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithReplica2Channel(replica))
distMap := typeutil.NewSet[string]()
for _, ch := range dist {
distMap.Insert(ch.GetChannelName())
@ -159,14 +159,6 @@ func (c *ChannelChecker) getDmChannelDiff(collectionID int64,
return
}
func (c *ChannelChecker) getChannelDist(replica *meta.Replica) []*meta.DmChannel {
dist := make([]*meta.DmChannel, 0)
for _, nodeID := range replica.GetNodes() {
dist = append(dist, c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(nodeID))...)
}
return dist
}
func (c *ChannelChecker) findRepeatedChannels(ctx context.Context, replicaID int64) []*meta.DmChannel {
log := log.Ctx(ctx).WithRateGroup("ChannelChecker.findRepeatedChannels", 1, 60)
replica := c.meta.Get(replicaID)
@ -176,7 +168,7 @@ func (c *ChannelChecker) findRepeatedChannels(ctx context.Context, replicaID int
log.Info("replica does not exist, skip it")
return ret
}
dist := c.getChannelDist(replica)
dist := c.dist.ChannelDistManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithReplica2Channel(replica))
targets := c.targetMgr.GetSealedSegmentsByCollection(replica.GetCollectionID(), meta.CurrentTarget)
versionsMap := make(map[string]*meta.DmChannel)
@ -221,7 +213,7 @@ func (c *ChannelChecker) createChannelLoadTask(ctx context.Context, channels []*
for _, ch := range channels {
rwNodes := replica.GetChannelRWNodes(ch.GetChannelName())
if len(rwNodes) == 0 {
rwNodes = replica.GetNodes()
rwNodes = replica.GetRWNodes()
}
plan := c.balancer.AssignChannel([]*meta.DmChannel{ch}, rwNodes, false)
plans = append(plans, plan...)

View File

@ -102,16 +102,17 @@ func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collec
)
var tasks []task.Task
segments := c.getSealedSegmentsDist(replica)
segments := c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithReplica(replica))
idSegments := make(map[int64]*meta.Segment)
roNodeSet := typeutil.NewUniqueSet(replica.GetRONodes()...)
targets := make(map[int64][]int64) // segmentID => FieldID
for _, segment := range segments {
// skip update index in stopping node
if ok, _ := c.nodeMgr.IsStoppingNode(segment.Node); ok {
// skip update index in read only node
if roNodeSet.Contain(segment.Node) {
continue
}
missing := c.checkSegment(ctx, segment, indexInfos)
missing := c.checkSegment(segment, indexInfos)
if len(missing) > 0 {
targets[segment.GetID()] = missing
idSegments[segment.GetID()] = segment
@ -142,7 +143,7 @@ func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collec
return tasks
}
func (c *IndexChecker) checkSegment(ctx context.Context, segment *meta.Segment, indexInfos []*indexpb.IndexInfo) (fieldIDs []int64) {
func (c *IndexChecker) checkSegment(segment *meta.Segment, indexInfos []*indexpb.IndexInfo) (fieldIDs []int64) {
var result []int64
for _, indexInfo := range indexInfos {
fieldID, indexID := indexInfo.FieldID, indexInfo.IndexID
@ -158,14 +159,6 @@ func (c *IndexChecker) checkSegment(ctx context.Context, segment *meta.Segment,
return result
}
func (c *IndexChecker) getSealedSegmentsDist(replica *meta.Replica) []*meta.Segment {
var ret []*meta.Segment
for _, node := range replica.GetNodes() {
ret = append(ret, c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))...)
}
return ret
}
func (c *IndexChecker) createSegmentUpdateTask(ctx context.Context, segment *meta.Segment, replica *meta.Replica) (task.Task, bool) {
action := task.NewSegmentActionWithScope(segment.Node, task.ActionTypeUpdate, segment.GetInsertChannel(), segment.GetID(), querypb.DataScope_Historical)
t, err := task.NewSegmentTask(

View File

@ -134,9 +134,12 @@ func (suite *IndexCheckerSuite) TestLoadIndex() {
suite.Equal(task.ActionTypeUpdate, action.Type())
suite.EqualValues(2, action.SegmentID())
// test skip load index for stopping node
// test skip load index for read only node
suite.nodeMgr.Stopping(1)
suite.nodeMgr.Stopping(2)
suite.meta.ResourceManager.HandleNodeStopping(1)
suite.meta.ResourceManager.HandleNodeStopping(2)
utils.RecoverAllCollection(suite.meta)
tasks = checker.Check(context.Background())
suite.Require().Len(tasks, 0)
}

View File

@ -93,12 +93,7 @@ func (c *LeaderChecker) Check(ctx context.Context) []task.Task {
replicas := c.meta.ReplicaManager.GetByCollection(collectionID)
for _, replica := range replicas {
for _, node := range replica.GetNodes() {
if ok, _ := c.nodeMgr.IsStoppingNode(node); ok {
// no need to correct leader's view which is loaded on stopping node
continue
}
for _, node := range replica.GetRWNodes() {
leaderViews := c.dist.LeaderViewManager.GetByFilter(meta.WithCollectionID2LeaderView(replica.GetCollectionID()), meta.WithNodeID2LeaderView(node))
for _, leaderView := range leaderViews {
dist := c.dist.SegmentDistManager.GetByFilter(meta.WithChannel(leaderView.Channel), meta.WithReplica(replica))

View File

@ -237,7 +237,8 @@ func (suite *LeaderCheckerTestSuite) TestStoppingNode() {
observer := suite.checker
observer.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
observer.meta.CollectionManager.PutPartition(utils.CreateTestPartition(1, 1))
observer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, []int64{1, 2}))
replica := utils.CreateTestReplica(1, 1, []int64{1, 2})
observer.meta.ReplicaManager.Put(replica)
segments := []*datapb.SegmentInfo{
{
ID: 1,
@ -261,12 +262,9 @@ func (suite *LeaderCheckerTestSuite) TestStoppingNode() {
view.TargetVersion = observer.target.GetCollectionTargetVersion(1, meta.CurrentTarget)
observer.dist.LeaderViewManager.Update(2, view)
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 2,
Address: "localhost",
Hostname: "localhost",
}))
suite.nodeMgr.Stopping(2)
mutableReplica := replica.CopyForWrite()
mutableReplica.AddRONode(2)
observer.meta.ReplicaManager.Put(mutableReplica.IntoReplica())
tasks := suite.checker.Check(context.TODO())
suite.Len(tasks, 0)

View File

@ -204,7 +204,7 @@ func (c *SegmentChecker) getSealedSegmentDiff(
log.Info("replica does not exist, skip it")
return
}
dist := c.getSealedSegmentsDist(replica)
dist := c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithReplica(replica))
sort.Slice(dist, func(i, j int) bool {
return dist[i].Version < dist[j].Version
})
@ -293,14 +293,6 @@ func (c *SegmentChecker) getSealedSegmentDiff(
return
}
func (c *SegmentChecker) getSealedSegmentsDist(replica *meta.Replica) []*meta.Segment {
ret := make([]*meta.Segment, 0)
for _, node := range replica.GetNodes() {
ret = append(ret, c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithNodeID(node))...)
}
return ret
}
func (c *SegmentChecker) findRepeatedSealedSegments(replicaID int64) []*meta.Segment {
segments := make([]*meta.Segment, 0)
replica := c.meta.Get(replicaID)
@ -308,7 +300,7 @@ func (c *SegmentChecker) findRepeatedSealedSegments(replicaID int64) []*meta.Seg
log.Info("replica does not exist, skip it")
return segments
}
dist := c.getSealedSegmentsDist(replica)
dist := c.dist.SegmentDistManager.GetByFilter(meta.WithCollectionID(replica.GetCollectionID()), meta.WithReplica(replica))
versions := make(map[int64]*meta.Segment)
for _, s := range dist {
// l0 segment should be release with channel together
@ -398,25 +390,12 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments []
rwNodes := replica.GetChannelRWNodes(shard)
if len(rwNodes) == 0 {
rwNodes = replica.GetNodes()
}
// filter out stopping nodes.
availableNodes := lo.Filter(rwNodes, func(node int64, _ int) bool {
stop, err := c.nodeMgr.IsStoppingNode(node)
if err != nil {
return false
}
return !stop
})
if len(availableNodes) == 0 {
return nil
rwNodes = replica.GetRWNodes()
}
// L0 segment can only be assign to shard leader's node
if isLevel0 {
availableNodes = []int64{leader.ID}
rwNodes = []int64{leader.ID}
}
segmentInfos := lo.Map(segments, func(s *datapb.SegmentInfo, _ int) *meta.Segment {
@ -424,7 +403,7 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments []
SegmentInfo: s,
}
})
shardPlans := c.balancer.AssignSegment(replica.GetCollectionID(), segmentInfos, availableNodes, false)
shardPlans := c.balancer.AssignSegment(replica.GetCollectionID(), segmentInfos, rwNodes, false)
for i := range shardPlans {
shardPlans[i].Replica = replica
}

View File

@ -46,7 +46,7 @@ import (
func (s *Server) checkAnyReplicaAvailable(collectionID int64) bool {
for _, replica := range s.meta.ReplicaManager.GetByCollection(collectionID) {
isAvailable := true
for _, node := range replica.GetNodes() {
for _, node := range replica.GetRONodes() {
if s.nodeMgr.Get(node) == nil {
isAvailable = false
break

View File

@ -159,16 +159,12 @@ func (job *LoadCollectionJob) Execute() error {
// API of LoadCollection is wired, we should use map[resourceGroupNames]replicaNumber as input, to keep consistency with `TransferReplica` API.
// Then we can implement dynamic replica changed in different resource group independently.
replicas, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames())
_, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames())
if err != nil {
msg := "failed to spawn replica for collection"
log.Warn(msg, zap.Error(err))
return errors.Wrap(err, msg)
}
for _, replica := range replicas {
log.Info("replica created", zap.Int64("replicaID", replica.GetID()),
zap.Int64s("nodes", replica.GetNodes()), zap.String("resourceGroup", replica.GetResourceGroup()))
}
job.undo.IsReplicaCreated = true
}
@ -346,16 +342,12 @@ func (job *LoadPartitionJob) Execute() error {
if err != nil {
return err
}
replicas, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames())
_, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber(), collectionInfo.GetVirtualChannelNames())
if err != nil {
msg := "failed to spawn replica for collection"
log.Warn(msg, zap.Error(err))
return errors.Wrap(err, msg)
}
for _, replica := range replicas {
log.Info("replica created", zap.Int64("replicaID", replica.GetID()),
zap.Int64s("nodes", replica.GetNodes()), zap.String("resourceGroup", replica.GetResourceGroup()))
}
job.undo.IsReplicaCreated = true
}

View File

@ -39,7 +39,7 @@ func NewReplica(replica *querypb.Replica, nodes ...typeutil.UniqueSet) *Replica
}
// newReplica creates a new replica from pb.
func newReplica(replica *querypb.Replica, channels ...string) *Replica {
func newReplica(replica *querypb.Replica) *Replica {
return &Replica{
replicaPB: proto.Clone(replica).(*querypb.Replica),
rwNodes: typeutil.NewUniqueSet(replica.Nodes...),
@ -65,7 +65,10 @@ func (replica *Replica) GetResourceGroup() string {
// GetNodes returns the rw nodes of the replica.
// readonly, don't modify the returned slice.
func (replica *Replica) GetNodes() []int64 {
return replica.replicaPB.GetNodes()
nodes := make([]int64, 0)
nodes = append(nodes, replica.replicaPB.GetRoNodes()...)
nodes = append(nodes, replica.replicaPB.GetNodes()...)
return nodes
}
// GetRONodes returns the ro nodes of the replica.
@ -74,6 +77,12 @@ func (replica *Replica) GetRONodes() []int64 {
return replica.replicaPB.GetRoNodes()
}
// GetRONodes returns the rw nodes of the replica.
// readonly, don't modify the returned slice.
func (replica *Replica) GetRWNodes() []int64 {
return replica.replicaPB.GetNodes()
}
// RangeOverRWNodes iterates over the read and write nodes of the replica.
func (replica *Replica) RangeOverRWNodes(f func(node int64) bool) {
replica.rwNodes.Range(f)
@ -131,8 +140,8 @@ func (replica *Replica) GetChannelRWNodes(channelName string) []int64 {
return replica.replicaPB.ChannelNodeInfos[channelName].GetRwNodes()
}
// copyForWrite returns a mutable replica for write operations.
func (replica *Replica) copyForWrite() *mutableReplica {
// CopyForWrite returns a mutable replica for write operations.
func (replica *Replica) CopyForWrite() *mutableReplica {
exclusiveRWNodeToChannel := make(map[int64]string)
for name, channelNodeInfo := range replica.replicaPB.GetChannelNodeInfos() {
for _, nodeID := range channelNodeInfo.GetRwNodes() {

View File

@ -195,7 +195,7 @@ func (m *ReplicaManager) TransferReplica(collectionID typeutil.UniqueID, srcRGNa
// Node Change will be executed by replica_observer in background.
replicas := make([]*Replica, 0, replicaNum)
for i := 0; i < replicaNum; i++ {
mutableReplica := srcReplicas[i].copyForWrite()
mutableReplica := srcReplicas[i].CopyForWrite()
mutableReplica.SetResourceGroup(dstRGName)
replicas = append(replicas, mutableReplica.IntoReplica())
}
@ -350,7 +350,7 @@ func (m *ReplicaManager) RecoverNodesInCollection(collectionID typeutil.UniqueID
// nothing to do.
return
}
mutableReplica := m.replicas[assignment.GetReplicaID()].copyForWrite()
mutableReplica := m.replicas[assignment.GetReplicaID()].CopyForWrite()
mutableReplica.AddRONode(roNodes...) // rw -> ro
mutableReplica.AddRWNode(recoverableNodes...) // ro -> rw
mutableReplica.AddRWNode(incomingNode...) // unused -> rw
@ -414,7 +414,7 @@ func (m *ReplicaManager) RemoveNode(replicaID typeutil.UniqueID, nodes ...typeut
return merr.WrapErrReplicaNotFound(replicaID)
}
mutableReplica := replica.copyForWrite()
mutableReplica := replica.CopyForWrite()
mutableReplica.RemoveNode(nodes...) // ro -> unused
return m.put(mutableReplica.IntoReplica())
}

View File

@ -30,13 +30,13 @@ func (suite *ReplicaSuite) TestReadOperations() {
r := newReplica(suite.replicaPB)
suite.testRead(r)
// keep same after clone.
mutableReplica := r.copyForWrite()
mutableReplica := r.CopyForWrite()
suite.testRead(mutableReplica.IntoReplica())
}
func (suite *ReplicaSuite) TestClone() {
r := newReplica(suite.replicaPB)
r2 := r.copyForWrite()
r2 := r.CopyForWrite()
suite.testRead(r)
// after apply write operation on copy, the original should not be affected.
@ -68,7 +68,7 @@ func (suite *ReplicaSuite) TestRange() {
})
suite.Equal(1, count)
mr := r.copyForWrite()
mr := r.CopyForWrite()
mr.AddRONode(1)
count = 0
@ -81,7 +81,7 @@ func (suite *ReplicaSuite) TestRange() {
func (suite *ReplicaSuite) TestWriteOperation() {
r := newReplica(suite.replicaPB)
mr := r.copyForWrite()
mr := r.CopyForWrite()
// test add available node.
suite.False(mr.Contains(5))
@ -158,7 +158,7 @@ func (suite *ReplicaSuite) testRead(r *Replica) {
suite.Equal(suite.replicaPB.GetResourceGroup(), r.GetResourceGroup())
// Test GetNodes()
suite.ElementsMatch(suite.replicaPB.GetNodes(), r.GetNodes())
suite.ElementsMatch(suite.replicaPB.GetNodes(), r.GetRWNodes())
// Test GetRONodes()
suite.ElementsMatch(suite.replicaPB.GetRoNodes(), r.GetRONodes())
@ -195,7 +195,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
},
})
mutableReplica := r.copyForWrite()
mutableReplica := r.CopyForWrite()
// add 10 rw nodes, exclusive mode is false.
for i := 0; i < 10; i++ {
mutableReplica.AddRWNode(int64(i))
@ -205,7 +205,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
suite.Equal(0, len(channelNodeInfo.GetRwNodes()))
}
mutableReplica = r.copyForWrite()
mutableReplica = r.CopyForWrite()
// add 10 rw nodes, exclusive mode is true.
for i := 10; i < 20; i++ {
mutableReplica.AddRWNode(int64(i))
@ -216,7 +216,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
}
// 4 node become read only, exclusive mode still be true
mutableReplica = r.copyForWrite()
mutableReplica = r.CopyForWrite()
for i := 0; i < 4; i++ {
mutableReplica.AddRONode(int64(i))
}
@ -226,7 +226,7 @@ func (suite *ReplicaSuite) TestChannelExclusiveMode() {
}
// 4 node has been removed, exclusive mode back to false
mutableReplica = r.copyForWrite()
mutableReplica = r.CopyForWrite()
for i := 4; i < 8; i++ {
mutableReplica.RemoveNode(int64(i))
}

View File

@ -453,7 +453,6 @@ func (rm *ResourceManager) HandleNodeDown(node int64) {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
// failure of node down can be ignored, node down can be done by `RemoveAllDownNode`.
rm.incomingNode.Remove(node)
// for stopping query node becomes offline, node change won't be triggered,
@ -470,6 +469,19 @@ func (rm *ResourceManager) HandleNodeDown(node int64) {
)
}
func (rm *ResourceManager) HandleNodeStopping(node int64) {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
rm.incomingNode.Remove(node)
rgName, err := rm.unassignNode(node)
log.Info("HandleNodeStopping: remove node from resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
zap.Error(err),
)
}
// ListenResourceGroupChanged return a listener for resource group changed.
func (rm *ResourceManager) ListenResourceGroupChanged() *syncutil.VersionedListener {
return rm.rgChangedNotifier.Listen(syncutil.VersionedListenAtEarliest)
@ -495,25 +507,6 @@ func (rm *ResourceManager) AssignPendingIncomingNode() {
}
}
// RemoveAllDownNode remove all down node from resource group.
func (rm *ResourceManager) RemoveAllDownNode() {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
for nodeID := range rm.nodeIDMap {
if node := rm.nodeMgr.Get(nodeID); node == nil || node.IsStoppingState() {
// unassignNode failure can be skip.
rgName, err := rm.unassignNode(nodeID)
log.Info("remove down node from resource group",
zap.Bool("nodeExist", node != nil),
zap.Int64("nodeID", nodeID),
zap.String("rgName", rgName),
zap.Error(err),
)
}
}
}
// AutoRecoverResourceGroup auto recover rg, return recover used node num
func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) error {
rm.rwmutex.Lock()
@ -847,7 +840,8 @@ func (rm *ResourceManager) unassignNode(node int64) (string, error) {
rm.nodeChangedNotifier.NotifyAll()
return rg.GetName(), nil
}
return "", nil
return "", errors.Errorf("node %d not found in any resource group", node)
}
// validateResourceGroupConfig validate resource group config.

View File

@ -524,16 +524,6 @@ func (suite *ResourceManagerSuite) TestAutoRecover() {
suite.Equal(80, suite.manager.GetResourceGroup("rg2").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup("rg3").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup(DefaultResourceGroupName).NodeNum())
// Test down all nodes.
for i := 1; i <= 100; i++ {
suite.manager.nodeMgr.Remove(int64(i))
}
suite.manager.RemoveAllDownNode()
suite.Zero(suite.manager.GetResourceGroup("rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup("rg2").NodeNum())
suite.Zero(suite.manager.GetResourceGroup("rg3").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(DefaultResourceGroupName).NodeNum())
}
func (suite *ResourceManagerSuite) testTransferNode() {

View File

@ -100,6 +100,7 @@ func (ob *ReplicaObserver) checkNodesInReplica() {
replicas := ob.meta.ReplicaManager.GetByCollection(collectionID)
for _, replica := range replicas {
roNodes := replica.GetRONodes()
rwNodes := replica.GetRWNodes()
if len(roNodes) == 0 {
continue
}
@ -124,7 +125,7 @@ func (ob *ReplicaObserver) checkNodesInReplica() {
zap.Int64("replicaID", replica.GetID()),
zap.Int64s("removedNodes", removeNodes),
zap.Int64s("roNodes", roNodes),
zap.Int64s("availableNodes", replica.GetNodes()),
zap.Int64s("rwNodes", rwNodes),
)
if err := ob.meta.ReplicaManager.RemoveNode(replica.GetID(), removeNodes...); err != nil {
logger.Warn("fail to remove node from replica", zap.Error(err))

View File

@ -98,10 +98,6 @@ func (ob *ResourceObserver) checkAndRecoverResourceGroup() {
manager.AssignPendingIncomingNode()
}
// Remove all down nodes in resource group manager.
log.Debug("remove all down nodes in resource group manager...")
ob.meta.RemoveAllDownNode()
log.Debug("recover resource groups...")
// Recover all resource group into expected configuration.
for _, rgName := range rgNames {

View File

@ -136,6 +136,7 @@ func (suite *ResourceObserverSuite) TestObserverRecoverOperation() {
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg2"))
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg3"))
// new node is down, rg3 cannot use that node anymore.
suite.meta.ResourceManager.HandleNodeDown(10)
suite.observer.checkAndRecoverResourceGroup()
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg1"))
suite.NoError(suite.meta.ResourceManager.MeetRequirement("rg2"))

View File

@ -276,7 +276,7 @@ func (s *Server) TransferSegment(ctx context.Context, req *querypb.TransferSegme
// when no dst node specified, default to use all other nodes in same
dstNodeSet := typeutil.NewUniqueSet()
if req.GetToAllNodes() {
dstNodeSet.Insert(replica.GetNodes()...)
dstNodeSet.Insert(replica.GetRWNodes()...)
} else {
// check whether dstNode is healthy
if err := s.isStoppingNode(req.GetTargetNodeID()); err != nil {
@ -348,7 +348,7 @@ func (s *Server) TransferChannel(ctx context.Context, req *querypb.TransferChann
// when no dst node specified, default to use all other nodes in same
dstNodeSet := typeutil.NewUniqueSet()
if req.GetToAllNodes() {
dstNodeSet.Insert(replica.GetNodes()...)
dstNodeSet.Insert(replica.GetRWNodes()...)
} else {
// check whether dstNode is healthy
if err := s.isStoppingNode(req.GetTargetNodeID()); err != nil {

View File

@ -441,7 +441,6 @@ func (s *Server) startQueryCoord() error {
s.nodeMgr.Stopping(node.ServerID)
}
}
s.checkReplicas()
for _, node := range sessions {
s.handleNodeUp(node.ServerID)
}
@ -685,6 +684,7 @@ func (s *Server) watchNodes(revision int64) {
)
s.nodeMgr.Stopping(nodeID)
s.checkerController.Check()
s.meta.ResourceManager.HandleNodeStopping(nodeID)
case sessionutil.SessionDelEvent:
nodeID := event.Session.ServerID
@ -748,7 +748,6 @@ func (s *Server) handleNodeUp(node int64) {
}
func (s *Server) handleNodeDown(node int64) {
log := log.With(zap.Int64("nodeID", node))
s.taskScheduler.RemoveExecutor(node)
s.distController.Remove(node)
@ -757,57 +756,12 @@ func (s *Server) handleNodeDown(node int64) {
s.dist.ChannelDistManager.Update(node)
s.dist.SegmentDistManager.Update(node)
// Clear meta
for _, collection := range s.meta.CollectionManager.GetAll() {
log := log.With(zap.Int64("collectionID", collection))
replica := s.meta.ReplicaManager.GetByCollectionAndNode(collection, node)
if replica == nil {
continue
}
err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), node)
if err != nil {
log.Warn("failed to remove node from collection's replicas",
zap.Int64("replicaID", replica.GetID()),
zap.Error(err),
)
}
log.Info("remove node from replica",
zap.Int64("replicaID", replica.GetID()))
}
// Clear tasks
s.taskScheduler.RemoveByNode(node)
s.meta.ResourceManager.HandleNodeDown(node)
}
// checkReplicas checks whether replica contains offline node, and remove those nodes
func (s *Server) checkReplicas() {
for _, collection := range s.meta.CollectionManager.GetAll() {
log := log.With(zap.Int64("collectionID", collection))
replicas := s.meta.ReplicaManager.GetByCollection(collection)
for _, replica := range replicas {
toRemove := make([]int64, 0)
for _, node := range replica.GetNodes() {
if s.nodeMgr.Get(node) == nil {
toRemove = append(toRemove, node)
}
}
if len(toRemove) > 0 {
log := log.With(
zap.Int64("replicaID", replica.GetID()),
zap.Int64s("offlineNodes", toRemove),
)
log.Info("some nodes are offline, remove them from replica", zap.Any("toRemove", toRemove))
if err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), toRemove...); err != nil {
log.Warn("failed to remove offline nodes from replica")
}
}
}
}
}
func (s *Server) updateBalanceConfigLoop(ctx context.Context) {
success := s.updateBalanceConfig()
if success {

View File

@ -686,7 +686,7 @@ func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceReques
// when no dst node specified, default to use all other nodes in same
dstNodeSet := typeutil.NewUniqueSet()
if len(req.GetDstNodeIDs()) == 0 {
dstNodeSet.Insert(replica.GetNodes()...)
dstNodeSet.Insert(replica.GetRWNodes()...)
} else {
for _, dstNode := range req.GetDstNodeIDs() {
if !replica.Contains(dstNode) {
@ -1075,7 +1075,7 @@ func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.Describ
replicasInRG := s.meta.GetByResourceGroup(req.GetResourceGroup())
for _, replica := range replicasInRG {
loadedReplicas[replica.GetCollectionID()]++
for _, node := range replica.GetNodes() {
for _, node := range replica.GetRONodes() {
if !s.meta.ContainsNode(replica.GetResourceGroup(), node) {
outgoingNodes[replica.GetCollectionID()]++
}
@ -1090,7 +1090,7 @@ func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.Describ
if replica.GetResourceGroup() == req.GetResourceGroup() {
continue
}
for _, node := range replica.GetNodes() {
for _, node := range replica.GetRONodes() {
if s.meta.ContainsNode(req.GetResourceGroup(), node) {
incomingNodes[collection]++
}
@ -1101,8 +1101,7 @@ func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.Describ
nodes := make([]*commonpb.NodeInfo, 0, len(rg.GetNodes()))
for _, nodeID := range rg.GetNodes() {
nodeSessionInfo := s.nodeMgr.Get(nodeID)
// Filter offline nodes and nodes in stopping state
if nodeSessionInfo != nil && !nodeSessionInfo.IsStoppingState() {
if nodeSessionInfo != nil {
nodes = append(nodes, &commonpb.NodeInfo{
NodeId: nodeSessionInfo.ID(),
Address: nodeSessionInfo.Addr(),

View File

@ -432,7 +432,8 @@ func (suite *ServiceSuite) TestResourceGroup() {
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
ID: 1,
CollectionID: 1,
Nodes: []int64{1011, 1013},
Nodes: []int64{1011},
RoNodes: []int64{1013},
ResourceGroup: "rg11",
},
typeutil.NewUniqueSet(1011, 1013)),
@ -440,7 +441,8 @@ func (suite *ServiceSuite) TestResourceGroup() {
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
ID: 2,
CollectionID: 2,
Nodes: []int64{1012, 1014},
Nodes: []int64{1014},
RoNodes: []int64{1012},
ResourceGroup: "rg12",
},
typeutil.NewUniqueSet(1012, 1014)),

View File

@ -22,7 +22,6 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
@ -35,19 +34,6 @@ var (
ErrUseWrongNumRG = errors.New("resource group num can only be 0, 1 or same as replica number")
)
func GetReplicaNodesInfo(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeManager, replicaID int64) []*session.NodeInfo {
replica := replicaMgr.Get(replicaID)
if replica == nil {
return nil
}
nodes := make([]*session.NodeInfo, 0, len(replica.GetNodes()))
for _, node := range replica.GetNodes() {
nodes = append(nodes, nodeMgr.Get(node))
}
return nodes
}
func GetPartitions(collectionMgr *meta.CollectionManager, collectionID int64) ([]int64, error) {
collection := collectionMgr.GetCollection(collectionID)
if collection != nil {