milvus/internal/querynodev2/delegator/delta_forward_test.go

500 lines
16 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package delegator
import (
"context"
"math/rand"
"testing"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/querynodev2/cluster"
"github.com/milvus-io/milvus/internal/querynodev2/pkoracle"
"github.com/milvus-io/milvus/internal/querynodev2/segments"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/mq/msgstream"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/metric"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type StreamingForwardSuite struct {
suite.Suite
collectionID int64
partitionIDs []int64
replicaID int64
vchannelName string
version int64
workerManager *cluster.MockManager
manager *segments.Manager
loader *segments.MockLoader
mq *msgstream.MockMsgStream
delegator *shardDelegator
chunkManager storage.ChunkManager
rootPath string
}
func (s *StreamingForwardSuite) SetupSuite() {
paramtable.Init()
paramtable.SetNodeID(1)
}
func (s *StreamingForwardSuite) SetupTest() {
s.collectionID = 1000
s.partitionIDs = []int64{500, 501}
s.replicaID = 65535
s.vchannelName = "rootcoord-dml_1000_v0"
s.version = 2000
s.workerManager = &cluster.MockManager{}
s.manager = segments.NewManager()
s.loader = &segments.MockLoader{}
s.loader.EXPECT().
Load(mock.Anything, s.collectionID, segments.SegmentTypeGrowing, int64(0), mock.Anything).
Call.Return(func(ctx context.Context, collectionID int64, segmentType segments.SegmentType, version int64, infos ...*querypb.SegmentLoadInfo) []segments.Segment {
return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) segments.Segment {
ms := &segments.MockSegment{}
ms.EXPECT().ID().Return(info.GetSegmentID())
ms.EXPECT().Type().Return(segments.SegmentTypeGrowing)
ms.EXPECT().Partition().Return(info.GetPartitionID())
ms.EXPECT().Collection().Return(info.GetCollectionID())
ms.EXPECT().Indexes().Return(nil)
ms.EXPECT().RowNum().Return(info.GetNumOfRows())
ms.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).Return(nil)
return ms
})
}, nil)
// init schema
s.manager.Collection.PutOrRef(s.collectionID, &schemapb.CollectionSchema{
Name: "TestCollection",
Fields: []*schemapb.FieldSchema{
{
Name: "id",
FieldID: 100,
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
AutoID: true,
},
{
Name: "vector",
FieldID: 101,
IsPrimaryKey: false,
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: "128",
},
},
},
},
}, &segcorepb.CollectionIndexMeta{
MaxIndexRowCount: 100,
IndexMetas: []*segcorepb.FieldIndexMeta{
{
FieldID: 101,
CollectionID: s.collectionID,
IndexName: "binary_index",
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: "128",
},
},
IndexParams: []*commonpb.KeyValuePair{
{
Key: common.IndexTypeKey,
Value: "BIN_IVF_FLAT",
},
{
Key: common.MetricTypeKey,
Value: metric.JACCARD,
},
},
},
},
}, &querypb.LoadMetaInfo{
PartitionIDs: s.partitionIDs,
})
s.mq = &msgstream.MockMsgStream{}
s.rootPath = "delegator_test"
// init chunkManager
chunkManagerFactory := storage.NewTestChunkManagerFactory(paramtable.Get(), s.rootPath)
s.chunkManager, _ = chunkManagerFactory.NewPersistentStorageChunkManager(context.Background())
delegator, err := NewShardDelegator(context.Background(), s.collectionID, s.replicaID, s.vchannelName, s.version, s.workerManager, s.manager, s.loader, &msgstream.MockMqFactory{
NewMsgStreamFunc: func(_ context.Context) (msgstream.MsgStream, error) {
return s.mq, nil
},
}, 10000, nil, s.chunkManager)
s.Require().NoError(err)
sd, ok := delegator.(*shardDelegator)
s.Require().True(ok)
s.delegator = sd
}
func (s *StreamingForwardSuite) TestBFStreamingForward() {
paramtable.Get().Save(paramtable.Get().QueryNodeCfg.StreamingDeltaForwardPolicy.Key, StreamingForwardPolicyBF)
defer paramtable.Get().Reset(paramtable.Get().QueryNodeCfg.StreamingDeltaForwardPolicy.Key)
delegator := s.delegator
// Setup distribution
delegator.distribution.AddGrowing(SegmentEntry{
NodeID: 1,
PartitionID: 1,
SegmentID: 100,
})
delegator.distribution.AddDistributions(SegmentEntry{
NodeID: 1,
PartitionID: 1,
SegmentID: 101,
})
delegator.distribution.AddDistributions(SegmentEntry{
NodeID: 1,
PartitionID: 1,
SegmentID: 102,
})
delegator.distribution.SyncTargetVersion(1, []int64{1}, []int64{100}, []int64{101, 102}, nil)
// Setup pk oracle
// empty bfs will not match
delegator.pkOracle.Register(pkoracle.NewBloomFilterSet(100, 10, commonpb.SegmentState_Growing), 1)
delegator.pkOracle.Register(pkoracle.NewBloomFilterSet(102, 10, commonpb.SegmentState_Sealed), 1)
// candidate key alway match
delegator.pkOracle.Register(pkoracle.NewCandidateKey(101, 10, commonpb.SegmentState_Sealed), 1)
deletedSegment := typeutil.NewConcurrentSet[int64]()
mockWorker := cluster.NewMockWorker(s.T())
s.workerManager.EXPECT().GetWorker(mock.Anything, int64(1)).Return(mockWorker, nil)
mockWorker.EXPECT().Delete(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, dr *querypb.DeleteRequest) error {
s.T().Log(dr.GetSegmentId())
deletedSegment.Insert(dr.SegmentId)
s.ElementsMatch([]int64{10}, dr.GetPrimaryKeys().GetIntId().GetData())
s.ElementsMatch([]uint64{10}, dr.GetTimestamps())
return nil
}).Maybe()
delegator.ProcessDelete([]*DeleteData{
{
PartitionID: -1,
PrimaryKeys: []storage.PrimaryKey{storage.NewInt64PrimaryKey(10)},
Timestamps: []uint64{10},
RowCount: 1,
},
}, 100)
s.ElementsMatch([]int64{101}, deletedSegment.Collect())
}
func (s *StreamingForwardSuite) TestDirectStreamingForward() {
paramtable.Get().Save(paramtable.Get().QueryNodeCfg.StreamingDeltaForwardPolicy.Key, StreamingForwardPolicyDirect)
defer paramtable.Get().Reset(paramtable.Get().QueryNodeCfg.StreamingDeltaForwardPolicy.Key)
delegator := s.delegator
// Setup distribution
delegator.distribution.AddGrowing(SegmentEntry{
NodeID: 1,
PartitionID: 1,
SegmentID: 100,
})
delegator.distribution.AddDistributions(SegmentEntry{
NodeID: 1,
PartitionID: 1,
SegmentID: 101,
})
delegator.distribution.AddDistributions(SegmentEntry{
NodeID: 1,
PartitionID: 1,
SegmentID: 102,
})
delegator.distribution.SyncTargetVersion(1, []int64{1}, []int64{100}, []int64{101, 102}, nil)
// Setup pk oracle
// empty bfs will not match
delegator.pkOracle.Register(pkoracle.NewBloomFilterSet(100, 10, commonpb.SegmentState_Growing), 1)
delegator.pkOracle.Register(pkoracle.NewBloomFilterSet(102, 10, commonpb.SegmentState_Sealed), 1)
// candidate key alway match
delegator.pkOracle.Register(pkoracle.NewCandidateKey(101, 10, commonpb.SegmentState_Sealed), 1)
deletedSegment := typeutil.NewConcurrentSet[int64]()
mockWorker := cluster.NewMockWorker(s.T())
s.workerManager.EXPECT().GetWorker(mock.Anything, int64(1)).Return(mockWorker, nil)
mockWorker.EXPECT().DeleteBatch(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, dr *querypb.DeleteBatchRequest) (*querypb.DeleteBatchResponse, error) {
deletedSegment.Upsert(dr.GetSegmentIds()...)
s.ElementsMatch([]int64{10}, dr.GetPrimaryKeys().GetIntId().GetData())
s.ElementsMatch([]uint64{10}, dr.GetTimestamps())
return &querypb.DeleteBatchResponse{Status: merr.Success()}, nil
})
delegator.ProcessDelete([]*DeleteData{
{
PartitionID: -1,
PrimaryKeys: []storage.PrimaryKey{storage.NewInt64PrimaryKey(10)},
Timestamps: []uint64{10},
RowCount: 1,
},
}, 100)
s.ElementsMatch([]int64{100, 101, 102}, deletedSegment.Collect())
}
func TestStreamingForward(t *testing.T) {
suite.Run(t, new(StreamingForwardSuite))
}
type GrowingMergeL0Suite struct {
suite.Suite
collectionID int64
partitionIDs []int64
replicaID int64
vchannelName string
version int64
schema *schemapb.CollectionSchema
workerManager *cluster.MockManager
manager *segments.Manager
loader *segments.MockLoader
mq *msgstream.MockMsgStream
delegator *shardDelegator
chunkManager storage.ChunkManager
rootPath string
}
func (s *GrowingMergeL0Suite) SetupSuite() {
paramtable.Init()
paramtable.SetNodeID(1)
}
func (s *GrowingMergeL0Suite) SetupTest() {
s.collectionID = 1000
s.partitionIDs = []int64{500, 501}
s.replicaID = 65535
s.vchannelName = "rootcoord-dml_1000v0"
s.version = 2000
s.workerManager = &cluster.MockManager{}
s.manager = segments.NewManager()
s.loader = &segments.MockLoader{}
s.loader.EXPECT().
Load(mock.Anything, s.collectionID, segments.SegmentTypeGrowing, int64(0), mock.Anything).
Call.Return(func(ctx context.Context, collectionID int64, segmentType segments.SegmentType, version int64, infos ...*querypb.SegmentLoadInfo) []segments.Segment {
return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) segments.Segment {
ms := &segments.MockSegment{}
ms.EXPECT().ID().Return(info.GetSegmentID())
ms.EXPECT().Type().Return(segments.SegmentTypeGrowing)
ms.EXPECT().Partition().Return(info.GetPartitionID())
ms.EXPECT().Collection().Return(info.GetCollectionID())
ms.EXPECT().Indexes().Return(nil)
ms.EXPECT().RowNum().Return(info.GetNumOfRows())
ms.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).Return(nil)
return ms
})
}, nil)
// init schema
s.schema = &schemapb.CollectionSchema{
Name: "TestCollection",
Fields: []*schemapb.FieldSchema{
{
Name: "id",
FieldID: 100,
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
AutoID: true,
},
{
Name: "vector",
FieldID: 101,
IsPrimaryKey: false,
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: "128",
},
},
},
},
}
s.manager.Collection.PutOrRef(s.collectionID, s.schema, &segcorepb.CollectionIndexMeta{
MaxIndexRowCount: 100,
IndexMetas: []*segcorepb.FieldIndexMeta{
{
FieldID: 101,
CollectionID: s.collectionID,
IndexName: "binary_index",
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: "128",
},
},
IndexParams: []*commonpb.KeyValuePair{
{
Key: common.IndexTypeKey,
Value: "BIN_IVF_FLAT",
},
{
Key: common.MetricTypeKey,
Value: metric.JACCARD,
},
},
},
},
}, &querypb.LoadMetaInfo{
PartitionIDs: s.partitionIDs,
})
s.mq = &msgstream.MockMsgStream{}
s.rootPath = "delegator_test"
// init chunkManager
chunkManagerFactory := storage.NewTestChunkManagerFactory(paramtable.Get(), s.rootPath)
s.chunkManager, _ = chunkManagerFactory.NewPersistentStorageChunkManager(context.Background())
delegator, err := NewShardDelegator(context.Background(), s.collectionID, s.replicaID, s.vchannelName, s.version, s.workerManager, s.manager, s.loader, &msgstream.MockMqFactory{
NewMsgStreamFunc: func(_ context.Context) (msgstream.MsgStream, error) {
return s.mq, nil
},
}, 10000, nil, s.chunkManager)
s.Require().NoError(err)
sd, ok := delegator.(*shardDelegator)
s.Require().True(ok)
s.delegator = sd
}
func (s *GrowingMergeL0Suite) TestAddL0ForGrowingBF() {
sd := s.delegator
sd.l0ForwardPolicy = L0ForwardPolicyBF
seg := segments.NewMockSegment(s.T())
coll := s.manager.Collection.Get(s.collectionID)
l0Segment, err := segments.NewL0Segment(coll, segments.SegmentTypeSealed, s.version, &querypb.SegmentLoadInfo{
SegmentID: 10001,
CollectionID: s.collectionID,
PartitionID: common.AllPartitionsID,
InsertChannel: s.vchannelName,
})
s.Require().NoError(err)
n := 10
deltaData := storage.NewDeltaData(int64(n))
for i := 0; i < n; i++ {
deltaData.Append(storage.NewInt64PrimaryKey(rand.Int63()), 0)
}
err = l0Segment.LoadDeltaData(context.Background(), deltaData)
s.Require().NoError(err)
s.delegator.deleteBuffer.RegisterL0(l0Segment)
seg.EXPECT().ID().Return(10000)
seg.EXPECT().Partition().Return(100)
seg.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, pk storage.PrimaryKeys, u []uint64) error {
s.Equal(deltaData.DeletePks(), pk)
s.Equal(deltaData.DeleteTimestamps(), u)
return nil
}).Once()
err = sd.addL0ForGrowing(context.Background(), seg)
s.NoError(err)
seg.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, pk storage.PrimaryKeys, u []uint64) error {
return errors.New("mocked")
}).Once()
err = sd.addL0ForGrowing(context.Background(), seg)
s.Error(err)
}
func (s *GrowingMergeL0Suite) TestAddL0ForGrowingLoad() {
sd := s.delegator
sd.l0ForwardPolicy = L0ForwardPolicyRemoteLoad
seg := segments.NewMockSegment(s.T())
coll := s.manager.Collection.Get(s.collectionID)
l0Segment, err := segments.NewL0Segment(coll, segments.SegmentTypeSealed, s.version, &querypb.SegmentLoadInfo{
SegmentID: 10001,
CollectionID: s.collectionID,
PartitionID: common.AllPartitionsID,
InsertChannel: s.vchannelName,
Deltalogs: []*datapb.FieldBinlog{
{Binlogs: []*datapb.Binlog{
{LogPath: "mocked_log_path"},
}},
},
})
s.Require().NoError(err)
n := 10
deltaData := storage.NewDeltaData(int64(n))
for i := 0; i < n; i++ {
deltaData.Append(storage.NewInt64PrimaryKey(rand.Int63()), 0)
}
err = l0Segment.LoadDeltaData(context.Background(), deltaData)
s.Require().NoError(err)
s.delegator.deleteBuffer.RegisterL0(l0Segment)
seg.EXPECT().ID().Return(10000)
seg.EXPECT().Partition().Return(100)
s.loader.EXPECT().LoadDeltaLogs(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, seg segments.Segment, fb []*datapb.FieldBinlog) error {
s.ElementsMatch([]string{"mocked_log_path"}, lo.Flatten(lo.Map(fb, func(fbl *datapb.FieldBinlog, _ int) []string {
return lo.Map(fbl.Binlogs, func(bl *datapb.Binlog, _ int) string { return bl.LogPath })
})))
return nil
}).Once()
err = sd.addL0ForGrowing(context.Background(), seg)
s.NoError(err)
s.loader.EXPECT().LoadDeltaLogs(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, seg segments.Segment, fb []*datapb.FieldBinlog) error {
return errors.New("mocked")
}).Once()
err = sd.addL0ForGrowing(context.Background(), seg)
s.Error(err)
}
func (s *GrowingMergeL0Suite) TestAddL0ForGrowingInvalid() {
sd := s.delegator
sd.l0ForwardPolicy = "invalid_policy"
seg := segments.NewMockSegment(s.T())
s.Panics(func() {
sd.addL0ForGrowing(context.Background(), seg)
})
}
func TestGrowingMergeL0(t *testing.T) {
suite.Run(t, new(GrowingMergeL0Suite))
}