mirror of https://github.com/milvus-io/milvus.git
parent
69cc68caff
commit
bb5088e605
|
@ -273,7 +273,7 @@ func (rm *ResourceManager) unassignNode(rgName string, node int64) error {
|
|||
return ErrRGNotExist
|
||||
}
|
||||
|
||||
if rm.nodeMgr.Get(node) == nil || !rm.groups[rgName].containsNode(node) {
|
||||
if !rm.groups[rgName].containsNode(node) {
|
||||
// remove non exist node should be tolerable
|
||||
return nil
|
||||
}
|
||||
|
@ -477,10 +477,6 @@ func (rm *ResourceManager) HandleNodeDown(node int64) (string, error) {
|
|||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
|
||||
if rm.nodeMgr.Get(node) == nil {
|
||||
return "", ErrNodeNotExist
|
||||
}
|
||||
|
||||
rgName, err := rm.findResourceGroupByNode(node)
|
||||
if err != nil {
|
||||
return "", ErrNodeNotAssignToRG
|
||||
|
@ -555,6 +551,12 @@ func (rm *ResourceManager) TransferNode(from string, to string, numNode int) ([]
|
|||
// interrupt transfer, unreachable logic path
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Info("transfer node",
|
||||
zap.String("sourceRG", from),
|
||||
zap.String("targetRG", to),
|
||||
zap.Int64("nodeID", node),
|
||||
)
|
||||
}
|
||||
|
||||
return movedNodes, nil
|
||||
|
@ -616,6 +618,7 @@ func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) ([]int64, err
|
|||
|
||||
ret := make([]int64, 0)
|
||||
|
||||
rm.checkRGNodeStatus(DefaultResourceGroupName)
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
lackNodesNum := rm.groups[rgName].LackOfNodes()
|
||||
nodesInDefault := rm.groups[DefaultResourceGroupName].GetNodes()
|
||||
|
@ -635,6 +638,11 @@ func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) ([]int64, err
|
|||
return ret, err
|
||||
}
|
||||
|
||||
log.Info("move node from default rg to recover",
|
||||
zap.String("targetRG", rgName),
|
||||
zap.Int64("nodeID", node),
|
||||
)
|
||||
|
||||
ret = append(ret, node)
|
||||
}
|
||||
|
||||
|
|
|
@ -321,6 +321,23 @@ func (suite *ResourceManagerSuite) TestAutoRecover() {
|
|||
suite.manager.AutoRecoverResourceGroup("rg")
|
||||
lackNodes = suite.manager.CheckLackOfNode("rg")
|
||||
suite.Equal(lackNodes, 0)
|
||||
|
||||
// test auto recover behavior when all node down
|
||||
suite.manager.nodeMgr.Remove(1)
|
||||
suite.manager.nodeMgr.Remove(2)
|
||||
suite.manager.AutoRecoverResourceGroup("rg")
|
||||
nodes, _ := suite.manager.GetNodes("rg")
|
||||
suite.Len(nodes, 0)
|
||||
nodes, _ = suite.manager.GetNodes(DefaultResourceGroupName)
|
||||
suite.Len(nodes, 0)
|
||||
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.manager.HandleNodeUp(1)
|
||||
suite.manager.AutoRecoverResourceGroup("rg")
|
||||
nodes, _ = suite.manager.GetNodes("rg")
|
||||
suite.Len(nodes, 1)
|
||||
nodes, _ = suite.manager.GetNodes(DefaultResourceGroupName)
|
||||
suite.Len(nodes, 0)
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestDefaultResourceGroup() {
|
||||
|
|
Loading…
Reference in New Issue