mirror of https://github.com/milvus-io/milvus.git
Fix recover multi collection after query node down (#13952)
Signed-off-by: xige-16 <xi.ge@zilliz.com>pull/13980/head
parent
526715aee3
commit
d39a4a3f2d
|
@ -1735,29 +1735,23 @@ func (lbt *loadBalanceTask) execute(ctx context.Context) error {
|
|||
}
|
||||
|
||||
mergedDmChannel := mergeDmChannelInfo(dmChannelInfos)
|
||||
for channelName := range dmChannel2WatchInfo {
|
||||
vChannelInfo, ok := mergedDmChannel[channelName]
|
||||
if !ok {
|
||||
err = fmt.Errorf("loadBalanceTask: can't get recovery info from data coord, channel name = %s", channelName)
|
||||
log.Error(err.Error())
|
||||
lbt.setResultInfo(err)
|
||||
return err
|
||||
}
|
||||
for channelName, vChannelInfo := range mergedDmChannel {
|
||||
if _, ok := dmChannel2WatchInfo[channelName]; ok {
|
||||
msgBase := proto.Clone(lbt.Base).(*commonpb.MsgBase)
|
||||
msgBase.MsgType = commonpb.MsgType_WatchDmChannels
|
||||
watchRequest := &querypb.WatchDmChannelsRequest{
|
||||
Base: msgBase,
|
||||
CollectionID: collectionID,
|
||||
Infos: []*datapb.VchannelInfo{vChannelInfo},
|
||||
Schema: schema,
|
||||
}
|
||||
|
||||
msgBase := proto.Clone(lbt.Base).(*commonpb.MsgBase)
|
||||
msgBase.MsgType = commonpb.MsgType_WatchDmChannels
|
||||
watchRequest := &querypb.WatchDmChannelsRequest{
|
||||
Base: msgBase,
|
||||
CollectionID: collectionID,
|
||||
Infos: []*datapb.VchannelInfo{vChannelInfo},
|
||||
Schema: schema,
|
||||
}
|
||||
if collectionInfo.LoadType == querypb.LoadType_LoadPartition {
|
||||
watchRequest.PartitionIDs = toRecoverPartitionIDs
|
||||
}
|
||||
|
||||
if collectionInfo.LoadType == querypb.LoadType_LoadPartition {
|
||||
watchRequest.PartitionIDs = toRecoverPartitionIDs
|
||||
watchDmChannelReqs = append(watchDmChannelReqs, watchRequest)
|
||||
}
|
||||
|
||||
watchDmChannelReqs = append(watchDmChannelReqs, watchRequest)
|
||||
}
|
||||
}
|
||||
internalTasks, err := assignInternalTask(ctx, lbt, lbt.meta, lbt.cluster, loadSegmentReqs, watchDmChannelReqs, true, lbt.SourceNodeIDs, lbt.DstNodeIDs)
|
||||
|
|
|
@ -1036,6 +1036,42 @@ func TestLoadBalanceIndexedSegmentsAfterNodeDown(t *testing.T) {
|
|||
assert.Nil(t, err)
|
||||
}
|
||||
|
||||
func TestLoadBalancePartitionAfterNodeDown(t *testing.T) {
|
||||
refreshParams()
|
||||
ctx := context.Background()
|
||||
queryCoord, err := startQueryCoord(ctx)
|
||||
assert.Nil(t, err)
|
||||
|
||||
node1, err := startQueryNodeServer(ctx)
|
||||
assert.Nil(t, err)
|
||||
waitQueryNodeOnline(queryCoord.cluster, node1.queryNodeID)
|
||||
|
||||
loadPartitionTask := genLoadPartitionTask(ctx, queryCoord)
|
||||
|
||||
err = queryCoord.scheduler.Enqueue(loadPartitionTask)
|
||||
assert.Nil(t, err)
|
||||
waitTaskFinalState(loadPartitionTask, taskExpired)
|
||||
|
||||
node2, err := startQueryNodeServer(ctx)
|
||||
assert.Nil(t, err)
|
||||
waitQueryNodeOnline(queryCoord.cluster, node2.queryNodeID)
|
||||
|
||||
indexCoord := newIndexCoordMock()
|
||||
indexCoord.returnIndexFile = true
|
||||
queryCoord.indexCoordClient = indexCoord
|
||||
removeNodeSession(node1.queryNodeID)
|
||||
for {
|
||||
if len(queryCoord.meta.getSegmentInfosByNode(node1.queryNodeID)) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
node2.stop()
|
||||
queryCoord.Stop()
|
||||
err = removeAllSession()
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
|
||||
func TestMergeWatchDeltaChannelInfo(t *testing.T) {
|
||||
infos := []*datapb.VchannelInfo{
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue