mirror of https://github.com/milvus-io/milvus.git
issue: #29677 #29838 pr: #29999 during get shard leaders, if qeurynode doesn't ack the heartbeat than 10s, querycoord will treat it as unavailable, and won't return shard leader on it. but when querynode has a full cpu usage, it's easily to stuck for more than 10s without ack the heartbeat, which cause no shard leader to search/query. This PR remove heartbeat lag logic during get shard leaders Signed-off-by: Wei Liu <wei.liu@zilliz.com>pull/30116/head
parent
7f32576f36
commit
71e24f0a7f
|
@ -359,8 +359,6 @@ func (s *Server) fillReplicaInfo(replica *meta.Replica, withShardNodes bool) (*m
|
|||
func checkNodeAvailable(nodeID int64, info *session.NodeInfo) error {
|
||||
if info == nil {
|
||||
return merr.WrapErrNodeOffline(nodeID)
|
||||
} else if time.Since(info.LastHeartbeat()) > Params.QueryCoordCfg.HeartbeatAvailableInterval.GetAsDuration(time.Millisecond) {
|
||||
return merr.WrapErrNodeOffline(nodeID, fmt.Sprintf("lastHB=%v", info.LastHeartbeat()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1521,12 +1521,6 @@ func (suite *ServiceSuite) TestGetShardLeadersFailed() {
|
|||
suite.nodeMgr.Add(session.NewNodeInfo(node, "localhost"))
|
||||
}
|
||||
|
||||
// Last heartbeat response time too old
|
||||
suite.fetchHeartbeats(time.Now().Add(-Params.QueryCoordCfg.HeartbeatAvailableInterval.GetAsDuration(time.Millisecond) - 1))
|
||||
resp, err = server.GetShardLeaders(ctx, req)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_NoReplicaAvailable, resp.GetStatus().GetErrorCode())
|
||||
|
||||
// Segment not fully loaded
|
||||
for _, node := range suite.nodes {
|
||||
suite.dist.SegmentDistManager.Update(node)
|
||||
|
|
Loading…
Reference in New Issue