fix: fix GetShardLeaders return empty node list (#32685)

issue: #32449

to avoid GetShardLeaders return empty node list, this PR add node list
check in both client side and server side.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/32697/head
wei liu 2024-04-29 14:19:26 +08:00 committed by GitHub
parent ef4c875d4c
commit d900e68440
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 2 deletions

View File

@ -204,11 +204,16 @@ func (lb *LBPolicyImpl) Execute(ctx context.Context, workload CollectionWorkLoad
return err
}
// let every request could retry at least twice, which could retry after update shard leader cache
retryTimes := Params.ProxyCfg.RetryTimesOnReplica.GetAsInt()
wg, ctx := errgroup.WithContext(ctx)
for channel, nodes := range dml2leaders {
channel := channel
nodes := lo.Map(nodes, func(node nodeInfo, _ int) int64 { return node.nodeID })
retryOnReplica := Params.ProxyCfg.RetryTimesOnReplica.GetAsInt()
channelRetryTimes := retryTimes
if len(nodes) > 0 {
channelRetryTimes *= len(nodes)
}
wg.Go(func() error {
return lb.ExecuteWithRetry(ctx, ChannelWorkload{
db: workload.db,
@ -218,7 +223,7 @@ func (lb *LBPolicyImpl) Execute(ctx context.Context, workload CollectionWorkLoad
shardLeaders: nodes,
nq: workload.nq,
exec: workload.exec,
retryTimes: uint(len(nodes) * retryOnReplica),
retryTimes: uint(channelRetryTimes),
})
})
}

View File

@ -942,6 +942,16 @@ func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeade
}
}
// to avoid node down during GetShardLeaders
if len(ids) == 0 {
msg := fmt.Sprintf("channel %s is not available in any replica", channel.GetChannelName())
log.Warn(msg, zap.Error(channelErr))
resp.Status = merr.Status(
errors.Wrap(merr.WrapErrChannelNotAvailable(channel.GetChannelName()), channelErr.Error()))
resp.Shards = nil
return resp, nil
}
resp.Shards = append(resp.Shards, &querypb.ShardLeadersList{
ChannelName: channel.GetChannelName(),
NodeIds: ids,