Check whether the node is stopping when using `load balance` api (#21438)

Signed-off-by: SimFG <bang.fu@zilliz.com>
pull/21449/head
SimFG 2022-12-29 15:47:31 +08:00 committed by GitHub
parent d7156812c1
commit e6d2849c9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 0 deletions

View File

@ -444,6 +444,20 @@ func (s *Server) GetSegmentInfo(ctx context.Context, req *querypb.GetSegmentInfo
}, nil
}
func (s *Server) isStoppingNode(nodeID int64) error {
isStopping, err := s.nodeMgr.IsStoppingNode(nodeID)
if err != nil {
log.Warn("fail to check whether the node is stopping", zap.Int64("node_id", nodeID), zap.Error(err))
return err
}
if isStopping {
msg := fmt.Sprintf("failed to balance due to the source/destination node[%d] is stopping", nodeID)
log.Warn(msg)
return errors.New(msg)
}
return nil
}
func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.Int64("collectionID", req.GetCollectionID()),
@ -478,12 +492,20 @@ func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceReques
log.Warn(msg)
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
}
if err := s.isStoppingNode(srcNode); err != nil {
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError,
fmt.Sprintf("can't balance, because the source node[%d] is invalid", srcNode), err), nil
}
for _, dstNode := range req.GetDstNodeIDs() {
if !replica.Nodes.Contain(dstNode) {
msg := "destination nodes have to be in the same replica of source node"
log.Warn(msg)
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
}
if err := s.isStoppingNode(dstNode); err != nil {
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError,
fmt.Sprintf("can't balance, because the destination node[%d] is invalid", dstNode), err), nil
}
}
err := s.balanceSegments(ctx, req, replica)

View File

@ -745,6 +745,26 @@ func (suite *ServiceSuite) TestLoadBalanceFailed() {
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
suite.Contains(resp.Reason, "failed to balance segments")
suite.Contains(resp.Reason, task.ErrTaskCanceled.Error())
suite.meta.ReplicaManager.AddNode(replicas[0].ID, 10)
req.SourceNodeIDs = []int64{10}
resp, err = server.LoadBalance(ctx, req)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
req.SourceNodeIDs = []int64{srcNode}
req.DstNodeIDs = []int64{10}
resp, err = server.LoadBalance(ctx, req)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
suite.nodeMgr.Add(session.NewNodeInfo(10, "localhost"))
suite.nodeMgr.Stopping(10)
resp, err = server.LoadBalance(ctx, req)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
suite.nodeMgr.Remove(10)
suite.meta.ReplicaManager.RemoveNode(replicas[0].ID, 10)
}
}