Filter segments when doing FromDmlCPLoadDelete (#20388)

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/20400/head
congqixia 2022-11-08 11:47:02 +08:00 committed by GitHub
parent a7bed1c927
commit ad0cce8f70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 40 additions and 17 deletions

View File

@ -107,7 +107,10 @@ func (l *loadSegmentsTask) Execute(ctx context.Context) error {
pos := deltaPosition
runningGroup.Go(func() error {
// reload data from dml channel
return l.node.loader.FromDmlCPLoadDelete(groupCtx, l.req.CollectionID, pos)
return l.node.loader.FromDmlCPLoadDelete(groupCtx, l.req.CollectionID, pos,
lo.FilterMap(l.req.Infos, func(info *queryPb.SegmentLoadInfo, _ int) (int64, bool) {
return info.GetSegmentID(), info.GetInsertChannel() == pos.GetChannelName()
}))
})
}
err = runningGroup.Wait()

View File

@ -231,12 +231,13 @@ func TestTask_loadSegmentsTask(t *testing.T) {
Schema: schema,
Infos: []*querypb.SegmentLoadInfo{
{
SegmentID: segmentID,
PartitionID: defaultPartitionID,
CollectionID: defaultCollectionID,
BinlogPaths: fieldBinlog,
NumOfRows: defaultMsgLength,
Statslogs: statsLog,
SegmentID: segmentID,
PartitionID: defaultPartitionID,
CollectionID: defaultCollectionID,
BinlogPaths: fieldBinlog,
NumOfRows: defaultMsgLength,
Statslogs: statsLog,
InsertChannel: vDmChannel,
},
},
DeltaPositions: []*internalpb.MsgPosition{

View File

@ -50,6 +50,7 @@ import (
"github.com/milvus-io/milvus/internal/util/paramtable"
"github.com/milvus-io/milvus/internal/util/timerecord"
"github.com/milvus-io/milvus/internal/util/tsoutil"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/panjf2000/ants/v2"
)
@ -674,7 +675,8 @@ func (loader *segmentLoader) loadDeltaLogs(ctx context.Context, segment *Segment
return nil
}
func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collectionID int64, position *internalpb.MsgPosition) error {
func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collectionID int64, position *internalpb.MsgPosition,
segmentIDs []int64) error {
startTs := time.Now()
stream, err := loader.factory.NewMsgStream(ctx)
if err != nil {
@ -725,7 +727,11 @@ func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collection
}
log.Info("start read delta msg from seek position to last position",
zap.Int64("collectionID", collectionID), zap.String("channel", pChannelName), zap.Any("seekPos", position), zap.Any("lastMsg", lastMsgID))
zap.Int64("collectionID", collectionID),
zap.String("channel", pChannelName),
zap.String("seekPos", position.String()),
zap.Any("lastMsg", lastMsgID), // use any in case of nil
)
hasMore := true
for hasMore {
select {
@ -740,7 +746,7 @@ func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collection
position.GetMsgID())
log.Warn("fail to read delta msg",
zap.String("pChannelName", pChannelName),
zap.ByteString("msgID", position.GetMsgID()),
zap.Binary("msgID", position.GetMsgID()),
zap.Error(err),
)
return err
@ -760,8 +766,8 @@ func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collection
if err != nil {
// TODO: panic?
// error occurs when missing meta info or unexpected pk type, should not happen
err = fmt.Errorf("deleteNode processDeleteMessages failed, collectionID = %d, err = %s", dmsg.CollectionID, err)
log.Error(err.Error())
err = fmt.Errorf("processDeleteMessages failed, collectionID = %d, err = %s", dmsg.CollectionID, err)
log.Error("FromDmlCPLoadDelete failed to process delete message", zap.Error(err))
return err
}
}
@ -769,7 +775,10 @@ func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collection
ret, err := lastMsgID.LessOrEqualThan(tsMsg.Position().MsgID)
if err != nil {
log.Warn("check whether current MsgID less than last MsgID failed",
zap.Int64("collectionID", collectionID), zap.String("channel", pChannelName), zap.Error(err))
zap.Int64("collectionID", collectionID),
zap.String("channel", pChannelName),
zap.Error(err),
)
return err
}
@ -781,9 +790,19 @@ func (loader *segmentLoader) FromDmlCPLoadDelete(ctx context.Context, collection
}
}
log.Info("All data has been read, there is no more data", zap.Int64("collectionID", collectionID),
zap.String("channel", pChannelName), zap.Any("msgID", position.GetMsgID()))
log.Info("All data has been read, there is no more data",
zap.Int64("collectionID", collectionID),
zap.String("channel", pChannelName),
zap.Binary("msgID", position.GetMsgID()),
)
segmentIDSet := typeutil.NewUniqueSet(segmentIDs...)
for segmentID, pks := range delData.deleteIDs {
// ignore non target segment
if !segmentIDSet.Contain(segmentID) {
continue
}
segment, err := loader.metaReplica.getSegmentByID(segmentID, segmentTypeSealed)
if err != nil {
log.Warn("failed to get segment", zap.Int64("segment", segmentID), zap.Error(err))

View File

@ -767,7 +767,7 @@ func testSeekFailWhenConsumingDeltaMsg(ctx context.Context, t *testing.T, positi
loader := node.loader
assert.NotNil(t, loader)
ret := loader.FromDmlCPLoadDelete(ctx, defaultCollectionID, position)
ret := loader.FromDmlCPLoadDelete(ctx, defaultCollectionID, position, []int64{})
assert.EqualError(t, ret, errMsg)
}
@ -801,7 +801,7 @@ func testConsumingDeltaMsg(ctx context.Context, t *testing.T, position *msgstrea
loader := node.loader
assert.NotNil(t, loader)
return loader.FromDmlCPLoadDelete(ctx, defaultCollectionID, position)
return loader.FromDmlCPLoadDelete(ctx, defaultCollectionID, position, []int64{})
}
type mockMsgID struct {