fix: Skip unnecessary query node health check in proxy (#36491)

issue: #36490
After the query node changes from a delegator to a worker, proxy should
skip this querynode's health check.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/36604/head
wei liu 2024-09-26 18:13:15 +08:00 committed by GitHub
parent 55be814a58
commit c056620899
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 7 deletions

View File

@ -247,6 +247,7 @@ func (b *LookAsideBalancer) checkQueryNodeHealthLoop(ctx context.Context) {
qn, err := b.clientMgr.GetClient(ctx, node)
if err != nil {
// get client from clientMgr failed, which means this qn isn't a shard leader anymore, skip it's health check
b.trySetQueryNodeUnReachable(node, err)
log.RatedInfo(10, "get client failed", zap.Int64("node", node), zap.Error(err))
return struct{}{}, nil
}

View File

@ -25,7 +25,6 @@ import (
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"go.uber.org/atomic"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
@ -344,12 +343,10 @@ func (suite *LookAsideBalancerSuite) TestGetClientFailed() {
// test get shard client from client mgr return nil
suite.clientMgr.ExpectedCalls = nil
suite.clientMgr.EXPECT().GetClient(mock.Anything, int64(2)).Return(nil, errors.New("shard client not found"))
failCounter := atomic.NewInt64(0)
suite.balancer.failedHeartBeatCounter.Insert(2, failCounter)
// slepp 10s, wait for checkNodeHealth execute for more than one round
time.Sleep(10 * time.Second)
suite.True(failCounter.Load() == 0)
// expected stopping the health check after failure times reaching the limit
suite.Eventually(func() bool {
return !suite.balancer.metricsMap.Contain(2)
}, 30*time.Second, 1*time.Second)
}
func (suite *LookAsideBalancerSuite) TestNodeRecover() {