fix check grpc error logic (#28182)

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/28190/head
wei liu 2023-11-07 11:54:18 +08:00 committed by GitHub
parent 945c89f779
commit da41a5b51e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 21 deletions

View File

@ -361,21 +361,22 @@ func (c *ClientBase[T]) needResetCancel() (needReset bool) {
return false
}
func (c *ClientBase[T]) checkErr(ctx context.Context, err error) (needRetry, needReset bool, retErr error) {
func (c *ClientBase[T]) checkGrpcErr(ctx context.Context, err error) (needRetry, needReset bool, retErr error) {
log := log.Ctx(ctx).With(zap.String("clientRole", c.GetRole()))
switch {
case funcutil.IsGrpcErr(err):
// grpc err
log.Warn("call received grpc error", zap.Error(err))
if funcutil.IsGrpcErr(err, codes.Canceled, codes.DeadlineExceeded) {
// canceled or deadline exceeded
return true, c.needResetCancel(), err
}
// Unknown err
if !funcutil.IsGrpcErr(err) {
log.Warn("fail to grpc call because of unknown error", zap.Error(err))
return false, false, err
}
if funcutil.IsGrpcErr(err, codes.Unimplemented) {
return false, false, merr.WrapErrServiceUnimplemented(err)
}
return true, true, err
// grpc err
log.Warn("call received grpc error", zap.Error(err))
switch {
case funcutil.IsGrpcErr(err, codes.Canceled, codes.DeadlineExceeded):
// canceled or deadline exceeded
return true, c.needResetCancel(), err
case funcutil.IsGrpcErr(err, codes.Unimplemented):
return false, false, merr.WrapErrServiceUnimplemented(err)
case IsServerIDMismatchErr(err):
if ok, err := c.checkNodeSessionExist(ctx); !ok {
// if session doesn't exist, no need to retry for datanode/indexnode/querynode
@ -385,9 +386,7 @@ func (c *ClientBase[T]) checkErr(ctx context.Context, err error) (needRetry, nee
case IsCrossClusterRoutingErr(err):
return true, true, err
default:
log.Warn("fail to grpc call because of unknown error", zap.Error(err))
// Unknown err
return false, false, err
return true, true, err
}
}
@ -443,7 +442,7 @@ func (c *ClientBase[T]) call(ctx context.Context, caller func(client T) (any, er
ret, err = caller(client)
if err != nil {
var needRetry, needReset bool
needRetry, needReset, err = c.checkErr(ctx, err)
needRetry, needReset, err = c.checkGrpcErr(ctx, err)
if !needRetry {
// stop retry
err = retry.Unrecoverable(err)

View File

@ -121,7 +121,7 @@ func TestClientBase_NodeSessionNotExist(t *testing.T) {
base.grpcClient = &mockClient{}
base.grpcClientMtx.Unlock()
_, err = base.Call(ctx, func(client *mockClient) (any, error) {
return struct{}{}, merr.ErrNodeNotMatch
return struct{}{}, status.Errorf(codes.Unknown, merr.ErrNodeNotMatch.Error())
})
assert.True(t, errors.Is(err, merr.ErrNodeNotFound))
}
@ -343,19 +343,34 @@ func TestClientBase_Recall(t *testing.T) {
})
}
func TestClientBase_CheckError(t *testing.T) {
func TestClientBase_CheckGrpcError(t *testing.T) {
base := ClientBase[*mockClient]{}
base.grpcClient = &mockClient{}
base.MaxAttempts = 1
ctx := context.Background()
retry, reset, _ := base.checkErr(ctx, status.Errorf(codes.Canceled, "fake context canceled"))
retry, reset, _ := base.checkGrpcErr(ctx, status.Errorf(codes.Canceled, "fake context canceled"))
assert.True(t, retry)
assert.True(t, reset)
retry, reset, _ = base.checkErr(ctx, status.Errorf(codes.Unimplemented, "fake context canceled"))
retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unimplemented, "fake context canceled"))
assert.False(t, retry)
assert.False(t, reset)
// test serverId mismatch
retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unknown, merr.ErrNodeNotMatch.Error()))
assert.True(t, retry)
assert.True(t, reset)
// test cross cluster
retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unknown, merr.ErrServiceCrossClusterRouting.Error()))
assert.True(t, retry)
assert.True(t, reset)
// test default
retry, reset, _ = base.checkGrpcErr(ctx, status.Errorf(codes.Unknown, merr.ErrNodeNotFound.Error()))
assert.True(t, retry)
assert.True(t, reset)
}
type server struct {