Avoid the panic when the status of rpc response is nil (#26910)

Signed-off-by: SimFG <bang.fu@zilliz.com>
pull/26922/head
SimFG 2023-09-07 19:23:15 +08:00 committed by GitHub
parent 3fd315f2b5
commit 0901b76732
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 25 additions and 14 deletions

View File

@ -754,7 +754,6 @@ func (s *Server) GetIndexInfos(ctx context.Context, req *indexpb.GetIndexInfoReq
}
func (s *Server) UnhealthyStatus() *commonpb.Status {
return merr.Status(
merr.WrapErrServiceNotReady(
fmt.Sprintf("datacoord %d is unhealthy", s.serverID())))
code := s.stateCode.Load().(commonpb.StateCode)
return merr.Status(merr.WrapErrServiceNotReady(code.String()))
}

View File

@ -1668,7 +1668,7 @@ func (s *Server) BroadcastAlteredCollection(ctx context.Context, req *datapb.Alt
func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
if s.isClosed() {
reason := errorutil.UnHealthReason("datacoord", paramtable.GetNodeID(), "datacoord is closed")
return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: []string{reason}}, nil
return &milvuspb.CheckHealthResponse{Status: s.UnhealthyStatus(), IsHealthy: false, Reasons: []string{reason}}, nil
}
mu := &sync.Mutex{}
@ -1700,10 +1700,10 @@ func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthReque
err := group.Wait()
if err != nil || len(errReasons) != 0 {
return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: errReasons}, nil
return &milvuspb.CheckHealthResponse{Status: merr.Status(nil), IsHealthy: false, Reasons: errReasons}, nil
}
return &milvuspb.CheckHealthResponse{IsHealthy: true, Reasons: errReasons}, nil
return &milvuspb.CheckHealthResponse{Status: merr.Status(nil), IsHealthy: true, Reasons: errReasons}, nil
}
func (s *Server) GcConfirm(ctx context.Context, request *datapb.GcConfirmRequest) (*datapb.GcConfirmResponse, error) {

View File

@ -973,15 +973,15 @@ func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeade
func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
if err := merr.CheckHealthy(s.State()); err != nil {
reason := errorutil.UnHealthReason("querycoord", paramtable.GetNodeID(), "querycoord is unhealthy")
return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: []string{reason}}, nil
return &milvuspb.CheckHealthResponse{Status: merr.Status(err), IsHealthy: false, Reasons: []string{reason}}, nil
}
errReasons, err := s.checkNodeHealth(ctx)
if err != nil || len(errReasons) != 0 {
return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: errReasons}, nil
return &milvuspb.CheckHealthResponse{Status: merr.Status(nil), IsHealthy: false, Reasons: errReasons}, nil
}
return &milvuspb.CheckHealthResponse{IsHealthy: true, Reasons: errReasons}, nil
return &milvuspb.CheckHealthResponse{Status: merr.Status(nil), IsHealthy: true, Reasons: errReasons}, nil
}
func (s *Server) checkNodeHealth(ctx context.Context) ([]string, error) {

View File

@ -2787,9 +2787,13 @@ func (c *Core) RenameCollection(ctx context.Context, req *milvuspb.RenameCollect
}
func (c *Core) CheckHealth(ctx context.Context, in *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
if _, ok := c.checkHealthy(); !ok {
if code, ok := c.checkHealthy(); !ok {
reason := errorutil.UnHealthReason("rootcoord", c.session.ServerID, "rootcoord is unhealthy")
return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: []string{reason}}, nil
return &milvuspb.CheckHealthResponse{
Status: merr.Status(merr.WrapErrServiceNotReady(code.String())),
IsHealthy: false,
Reasons: []string{reason},
}, nil
}
mu := &sync.Mutex{}
@ -2813,8 +2817,12 @@ func (c *Core) CheckHealth(ctx context.Context, in *milvuspb.CheckHealthRequest)
err := group.Wait()
if err != nil || len(errReasons) != 0 {
return &milvuspb.CheckHealthResponse{IsHealthy: false, Reasons: errReasons}, nil
return &milvuspb.CheckHealthResponse{
Status: merr.Status(nil),
IsHealthy: false,
Reasons: errReasons,
}, nil
}
return &milvuspb.CheckHealthResponse{IsHealthy: true, Reasons: errReasons}, nil
return &milvuspb.CheckHealthResponse{Status: merr.Status(nil), IsHealthy: true, Reasons: errReasons}, nil
}

View File

@ -368,6 +368,11 @@ func (c *ClientBase[T]) call(ctx context.Context, caller func(client T) (any, er
return nil
}
if status == nil {
log.Warn("status is nil, please fix it", zap.Stack("stack"))
return nil
}
if merr.Ok(status) || !merr.IsRetryableCode(status.GetCode()) {
return nil
}

View File

@ -342,7 +342,6 @@ func TestClientBase_RetryPolicy(t *testing.T) {
State: &milvuspb.ComponentInfo{
NodeID: randID,
},
Status: merr.Status(nil),
}, nil
})
assert.NoError(t, err)