mirror of https://github.com/milvus-io/milvus.git
parent
8f3b8ed1da
commit
84e568ee32
|
@ -190,9 +190,17 @@ func (dsService *dataSyncService) initNodes(vchanInfo *datapb.VchannelInfo) erro
|
|||
return err
|
||||
}
|
||||
|
||||
dn := newDeleteNode(dsService.replica, vchanInfo.GetChannelName(), dsService.flushChs.deleteBufferCh)
|
||||
|
||||
var deleteNode Node = dn
|
||||
var deleteNode Node
|
||||
deleteNode, err = newDeleteNode(
|
||||
dsService.ctx,
|
||||
dsService.replica,
|
||||
dsService.idAllocator,
|
||||
dsService.flushChs.deleteBufferCh,
|
||||
vchanInfo.GetChannelName(),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// recover segment checkpoints
|
||||
for _, us := range vchanInfo.GetUnflushedSegments() {
|
||||
|
|
|
@ -99,7 +99,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg {
|
|||
return []Msg{}
|
||||
}
|
||||
case commonpb.MsgType_Insert:
|
||||
log.Debug("DDNode with insert messages")
|
||||
log.Debug("DDNode receive insert messages")
|
||||
imsg := msg.(*msgstream.InsertMsg)
|
||||
if imsg.CollectionID != ddn.collectionID {
|
||||
//log.Debug("filter invalid InsertMsg, collection mis-match",
|
||||
|
@ -117,19 +117,27 @@ func (ddn *ddNode) Operate(in []Msg) []Msg {
|
|||
}
|
||||
}
|
||||
fgMsg.insertMessages = append(fgMsg.insertMessages, imsg)
|
||||
case commonpb.MsgType_Delete:
|
||||
log.Debug("DDNode receive delete messages")
|
||||
dmsg := msg.(*msgstream.DeleteMsg)
|
||||
if dmsg.CollectionID != ddn.collectionID {
|
||||
//log.Debug("filter invalid DeleteMsg, collection mis-match",
|
||||
// zap.Int64("Get msg collID", dmsg.CollectionID),
|
||||
// zap.Int64("Expected collID", ddn.collectionID))
|
||||
continue
|
||||
}
|
||||
fgMsg.deleteMessages = append(fgMsg.deleteMessages, dmsg)
|
||||
}
|
||||
}
|
||||
|
||||
fgMsg.startPositions = append(fgMsg.startPositions, msMsg.StartPositions()...)
|
||||
fgMsg.endPositions = append(fgMsg.endPositions, msMsg.EndPositions()...)
|
||||
|
||||
var res Msg = &fgMsg
|
||||
|
||||
for _, sp := range spans {
|
||||
sp.Finish()
|
||||
}
|
||||
|
||||
return []Msg{res}
|
||||
return []Msg{&fgMsg}
|
||||
}
|
||||
|
||||
func (ddn *ddNode) filterFlushedSegmentInsertMessages(msg *msgstream.InsertMsg) bool {
|
||||
|
|
|
@ -207,6 +207,44 @@ func TestFlowGraph_DDNode_Operate(to *testing.T) {
|
|||
}
|
||||
})
|
||||
|
||||
to.Run("Test DDNode Operate Delete Msg", func(te *testing.T) {
|
||||
tests := []struct {
|
||||
ddnCollID UniqueID
|
||||
inMsgCollID UniqueID
|
||||
|
||||
MsgEndTs Timestamp
|
||||
|
||||
expectedRtLen int
|
||||
description string
|
||||
}{
|
||||
{1, 1, 2000, 1, "normal"},
|
||||
{1, 2, 4000, 0, "inMsgCollID(2) != ddnCollID"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
te.Run(test.description, func(t *testing.T) {
|
||||
// Prepare ddNode states
|
||||
ddn := ddNode{
|
||||
collectionID: test.ddnCollID,
|
||||
}
|
||||
|
||||
// Prepare delete messages
|
||||
var dMsg msgstream.TsMsg = &msgstream.DeleteMsg{
|
||||
BaseMsg: msgstream.BaseMsg{EndTimestamp: test.MsgEndTs},
|
||||
DeleteRequest: internalpb.DeleteRequest{
|
||||
Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Delete},
|
||||
CollectionID: test.inMsgCollID,
|
||||
},
|
||||
}
|
||||
tsMessages := []msgstream.TsMsg{dMsg}
|
||||
var msgStreamMsg Msg = flowgraph.GenerateMsgStreamMsg(tsMessages, 0, 0, nil, nil)
|
||||
|
||||
// Test
|
||||
rt := ddn.Operate([]Msg{msgStreamMsg})
|
||||
assert.Equal(t, test.expectedRtLen, len(rt[0].(*flowGraphMsg).deleteMessages))
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestFlowGraph_DDNode_filterMessages(te *testing.T) {
|
||||
|
|
|
@ -12,21 +12,58 @@
|
|||
package datanode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"path"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/kv"
|
||||
miniokv "github.com/milvus-io/milvus/internal/kv/minio"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/msgstream"
|
||||
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/trace"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
)
|
||||
|
||||
type (
|
||||
// DeleteData record deleted IDs and Timestamps
|
||||
DeleteData = storage.DeleteData
|
||||
)
|
||||
|
||||
// DeleteNode is to process delete msg, flush delete info into storage.
|
||||
type deleteNode struct {
|
||||
BaseNode
|
||||
|
||||
channelName string
|
||||
delBuf sync.Map // map[segmentID]*DelDataBuf
|
||||
replica Replica
|
||||
idAllocator allocatorInterface
|
||||
flushCh <-chan *flushMsg
|
||||
minIOKV kv.BaseKV
|
||||
}
|
||||
|
||||
flushCh <-chan *flushMsg
|
||||
// BufferData buffers insert data, monitoring buffer size and limit
|
||||
// size and limit both indicate numOfRows
|
||||
type DelDataBuf struct {
|
||||
delData *DeleteData
|
||||
size int64
|
||||
}
|
||||
|
||||
func (ddb *DelDataBuf) updateSize(size int64) {
|
||||
ddb.size += size
|
||||
}
|
||||
|
||||
func newDelDataBuf() *DelDataBuf {
|
||||
return &DelDataBuf{
|
||||
delData: &DeleteData{
|
||||
Data: make(map[string]int64),
|
||||
},
|
||||
size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func (dn *deleteNode) Name() string {
|
||||
|
@ -37,31 +74,112 @@ func (dn *deleteNode) Close() {
|
|||
log.Info("Flowgraph Delete Node closing")
|
||||
}
|
||||
|
||||
func (dn *deleteNode) bufferDeleteMsg(msg *msgstream.DeleteMsg) error {
|
||||
log.Debug("bufferDeleteMsg", zap.Any("primary keys", msg.PrimaryKeys))
|
||||
|
||||
segIDToPkMap := make(map[UniqueID][]int64)
|
||||
segIDToTsMap := make(map[UniqueID][]int64)
|
||||
|
||||
m := dn.filterSegmentByPK(msg.PartitionID, msg.PrimaryKeys)
|
||||
for _, pk := range msg.PrimaryKeys {
|
||||
segIDs, ok := m[pk]
|
||||
if !ok {
|
||||
log.Warn("primary key not exist in all segments", zap.Int64("primary key", pk))
|
||||
continue
|
||||
}
|
||||
for _, segID := range segIDs {
|
||||
segIDToPkMap[segID] = append(segIDToPkMap[segID], pk)
|
||||
segIDToTsMap[segID] = append(segIDToTsMap[segID], int64(msg.Timestamp))
|
||||
}
|
||||
}
|
||||
|
||||
for segID, pks := range segIDToPkMap {
|
||||
rows := len(pks)
|
||||
tss, ok := segIDToTsMap[segID]
|
||||
if !ok || rows != len(tss) {
|
||||
log.Error("primary keys and timestamp's element num mis-match")
|
||||
}
|
||||
|
||||
newBuf := newDelDataBuf()
|
||||
delDataBuf, _ := dn.delBuf.LoadOrStore(segID, newBuf)
|
||||
delData := delDataBuf.(*DelDataBuf).delData
|
||||
|
||||
for i := 0; i < rows; i++ {
|
||||
delData.Data[strconv.FormatInt(pks[i], 10)] = tss[i]
|
||||
log.Debug("delete", zap.Int64("primary key", pks[i]), zap.Int64("ts", tss[i]))
|
||||
}
|
||||
|
||||
// store
|
||||
delDataBuf.(*DelDataBuf).updateSize(int64(rows))
|
||||
dn.delBuf.Store(segID, delDataBuf)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dn *deleteNode) showDelBuf() {
|
||||
segments := dn.replica.filterSegments(dn.channelName, 0)
|
||||
for _, seg := range segments {
|
||||
segID := seg.segmentID
|
||||
if v, ok := dn.delBuf.Load(segID); ok {
|
||||
delDataBuf, _ := v.(*DelDataBuf)
|
||||
log.Debug("del data buffer status", zap.Int64("segID", segID), zap.Int64("size", delDataBuf.size))
|
||||
for pk, ts := range delDataBuf.delData.Data {
|
||||
log.Debug("del data", zap.String("pk", pk), zap.Int64("ts", ts))
|
||||
}
|
||||
} else {
|
||||
log.Error("segment not exist", zap.Int64("segID", segID))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (dn *deleteNode) Operate(in []Msg) []Msg {
|
||||
log.Debug("deleteNode Operating")
|
||||
//log.Debug("deleteNode Operating")
|
||||
|
||||
if len(in) != 1 {
|
||||
log.Warn("Invalid operate message input in deleteNode", zap.Int("input length", len(in)))
|
||||
return []Msg{}
|
||||
log.Error("Invalid operate message input in deleteNode", zap.Int("input length", len(in)))
|
||||
return nil
|
||||
}
|
||||
|
||||
_, ok := in[0].(*MsgStreamMsg)
|
||||
fgMsg, ok := in[0].(*flowGraphMsg)
|
||||
if !ok {
|
||||
log.Warn("type assertion failed for MsgStreamMsg")
|
||||
return []Msg{}
|
||||
log.Error("type assertion failed for flowGraphMsg")
|
||||
return nil
|
||||
}
|
||||
|
||||
var spans []opentracing.Span
|
||||
for _, msg := range fgMsg.deleteMessages {
|
||||
sp, ctx := trace.StartSpanFromContext(msg.TraceCtx())
|
||||
spans = append(spans, sp)
|
||||
msg.SetTraceCtx(ctx)
|
||||
}
|
||||
|
||||
for _, msg := range fgMsg.deleteMessages {
|
||||
if err := dn.bufferDeleteMsg(msg); err != nil {
|
||||
log.Error("buffer delete msg failed", zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
// show all data in dn.delBuf
|
||||
if len(fgMsg.deleteMessages) != 0 {
|
||||
dn.showDelBuf()
|
||||
}
|
||||
|
||||
// handle manual flush
|
||||
select {
|
||||
case fmsg := <-dn.flushCh:
|
||||
currentSegID := fmsg.segmentID
|
||||
log.Debug("DeleteNode receives flush message",
|
||||
zap.Int64("segmentID", currentSegID),
|
||||
zap.Int64("collectionID", fmsg.collectionID),
|
||||
)
|
||||
log.Debug("DeleteNode receives flush message", zap.Int64("collID", fmsg.collectionID))
|
||||
dn.flushDelData(fmsg.collectionID, fgMsg.timeRange)
|
||||
|
||||
// clean dn.delBuf
|
||||
dn.delBuf = sync.Map{}
|
||||
default:
|
||||
}
|
||||
|
||||
return []Msg{}
|
||||
for _, sp := range spans {
|
||||
sp.Finish()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// filterSegmentByPK returns the bloom filter check result.
|
||||
|
@ -83,16 +201,91 @@ func (dn *deleteNode) filterSegmentByPK(partID UniqueID, pks []int64) map[int64]
|
|||
return result
|
||||
}
|
||||
|
||||
func newDeleteNode(replica Replica, channelName string, flushCh <-chan *flushMsg) *deleteNode {
|
||||
baseNode := BaseNode{}
|
||||
baseNode.SetMaxParallelism(Params.FlowGraphMaxQueueLength)
|
||||
func (dn *deleteNode) flushDelData(collID UniqueID, timeRange TimeRange) {
|
||||
schema, err := dn.replica.getCollectionSchema(collID, timeRange.timestampMax)
|
||||
if err != nil {
|
||||
log.Error("failed to get collection schema", zap.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
return &deleteNode{
|
||||
BaseNode: baseNode,
|
||||
delCodec := storage.NewDeleteCodec(&etcdpb.CollectionMeta{
|
||||
ID: collID,
|
||||
Schema: schema,
|
||||
})
|
||||
|
||||
channelName: channelName,
|
||||
replica: replica,
|
||||
kvs := make(map[string]string)
|
||||
// buffer data to binlogs
|
||||
dn.delBuf.Range(func(k, v interface{}) bool {
|
||||
segID := k.(int64)
|
||||
delDataBuf := v.(*DelDataBuf)
|
||||
collID, partID, err := dn.replica.getCollectionAndPartitionID(segID)
|
||||
if err != nil {
|
||||
log.Error("failed to get collection ID and partition ID", zap.Error(err))
|
||||
return false
|
||||
}
|
||||
|
||||
flushCh: flushCh,
|
||||
blob, err := delCodec.Serialize(partID, segID, delDataBuf.delData)
|
||||
if err != nil {
|
||||
log.Error("failed to serialize delete data", zap.Error(err))
|
||||
return false
|
||||
}
|
||||
|
||||
// write insert binlog
|
||||
logID, err := dn.idAllocator.allocID()
|
||||
if err != nil {
|
||||
log.Error("failed to alloc ID", zap.Error(err))
|
||||
return false
|
||||
}
|
||||
|
||||
blobKey, _ := dn.idAllocator.genKey(false, collID, partID, segID, logID)
|
||||
blobPath := path.Join(Params.DeleteBinlogRootPath, blobKey)
|
||||
kvs[blobPath] = string(blob.Value[:])
|
||||
log.Debug("delete blob path", zap.String("path", blobPath))
|
||||
|
||||
return true
|
||||
})
|
||||
|
||||
if len(kvs) > 0 {
|
||||
err = dn.minIOKV.MultiSave(kvs)
|
||||
if err != nil {
|
||||
log.Error("failed to save minIO ..", zap.Error(err))
|
||||
}
|
||||
log.Debug("save delete blobs to minIO successfully")
|
||||
}
|
||||
}
|
||||
|
||||
func newDeleteNode(
|
||||
ctx context.Context,
|
||||
replica Replica,
|
||||
idAllocator allocatorInterface,
|
||||
flushCh <-chan *flushMsg,
|
||||
channelName string,
|
||||
) (*deleteNode, error) {
|
||||
baseNode := BaseNode{}
|
||||
baseNode.SetMaxQueueLength(Params.FlowGraphMaxQueueLength)
|
||||
baseNode.SetMaxParallelism(Params.FlowGraphMaxParallelism)
|
||||
|
||||
// MinIO
|
||||
option := &miniokv.Option{
|
||||
Address: Params.MinioAddress,
|
||||
AccessKeyID: Params.MinioAccessKeyID,
|
||||
SecretAccessKeyID: Params.MinioSecretAccessKey,
|
||||
UseSSL: Params.MinioUseSSL,
|
||||
CreateBucket: true,
|
||||
BucketName: Params.MinioBucketName,
|
||||
}
|
||||
minIOKV, err := miniokv.NewMinIOKV(ctx, option)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &deleteNode{
|
||||
BaseNode: baseNode,
|
||||
channelName: channelName,
|
||||
delBuf: sync.Map{},
|
||||
replica: replica,
|
||||
idAllocator: idAllocator,
|
||||
flushCh: flushCh,
|
||||
minIOKV: minIOKV,
|
||||
}, nil
|
||||
}
|
||||
|
|
|
@ -12,10 +12,14 @@
|
|||
package datanode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/bits-and-blooms/bloom/v3"
|
||||
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/util/flowgraph"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
|
@ -27,6 +31,14 @@ type mockReplica struct {
|
|||
flushedSegments map[UniqueID]*Segment
|
||||
}
|
||||
|
||||
func newMockReplica() *mockReplica {
|
||||
return &mockReplica{
|
||||
newSegments: make(map[int64]*Segment),
|
||||
normalSegments: make(map[int64]*Segment),
|
||||
flushedSegments: make(map[int64]*Segment),
|
||||
}
|
||||
}
|
||||
|
||||
func (replica *mockReplica) filterSegments(channelName string, partitionID UniqueID) []*Segment {
|
||||
results := make([]*Segment, 0)
|
||||
for _, value := range replica.newSegments {
|
||||
|
@ -41,122 +53,164 @@ func (replica *mockReplica) filterSegments(channelName string, partitionID Uniqu
|
|||
return results
|
||||
}
|
||||
|
||||
func (replica *mockReplica) getCollectionSchema(collectionID UniqueID, ts Timestamp) (*schemapb.CollectionSchema, error) {
|
||||
return &schemapb.CollectionSchema{}, nil
|
||||
}
|
||||
|
||||
func (replica *mockReplica) getCollectionAndPartitionID(segID UniqueID) (collID, partitionID UniqueID, err error) {
|
||||
return 0, 1, nil
|
||||
}
|
||||
|
||||
func TestFlowGraphDeleteNode_newDeleteNode(te *testing.T) {
|
||||
tests := []struct {
|
||||
replica Replica
|
||||
ctx context.Context
|
||||
replica Replica
|
||||
idAllocator allocatorInterface
|
||||
|
||||
description string
|
||||
}{
|
||||
{&SegmentReplica{}, "pointer of SegmentReplica"},
|
||||
{context.Background(), &SegmentReplica{}, &allocator{}, "pointer of SegmentReplica"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
te.Run(test.description, func(t *testing.T) {
|
||||
dn := newDeleteNode(test.replica, "", make(chan *flushMsg))
|
||||
dn, err := newDeleteNode(test.ctx, test.replica, test.idAllocator, make(chan *flushMsg), "")
|
||||
assert.Nil(t, err)
|
||||
|
||||
assert.NotNil(t, dn)
|
||||
assert.Equal(t, "deleteNode", dn.Name())
|
||||
dn.Close()
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestFlowGraphDeleteNode_Operate(te *testing.T) {
|
||||
tests := []struct {
|
||||
invalidIn []Msg
|
||||
validIn []Msg
|
||||
|
||||
description string
|
||||
}{
|
||||
{[]Msg{}, nil,
|
||||
"Invalid input length == 0"},
|
||||
{[]Msg{&flowGraphMsg{}, &flowGraphMsg{}, &flowGraphMsg{}}, nil,
|
||||
"Invalid input length == 3"},
|
||||
{[]Msg{&flowGraphMsg{}}, nil,
|
||||
"Invalid input length == 1 but input message is not msgStreamMsg"},
|
||||
{nil, []Msg{&MsgStreamMsg{}},
|
||||
"valid input"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
te.Run(test.description, func(t *testing.T) {
|
||||
flushCh := make(chan *flushMsg, 10)
|
||||
dn := deleteNode{flushCh: flushCh}
|
||||
if test.invalidIn != nil {
|
||||
rt := dn.Operate(test.invalidIn)
|
||||
assert.Empty(t, rt)
|
||||
} else {
|
||||
flushCh <- &flushMsg{0, 100, 10, 1}
|
||||
rt := dn.Operate(test.validIn)
|
||||
assert.Empty(t, rt)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_GetSegmentsByPKs(t *testing.T) {
|
||||
func genMockReplica(segIDs []int64, pks []int64, chanName string) *mockReplica {
|
||||
buf := make([]byte, 8)
|
||||
filter1 := bloom.NewWithEstimates(1000000, 0.01)
|
||||
filter0 := bloom.NewWithEstimates(1000000, 0.01)
|
||||
for i := 0; i < 3; i++ {
|
||||
binary.BigEndian.PutUint64(buf, uint64(i))
|
||||
binary.BigEndian.PutUint64(buf, uint64(pks[i]))
|
||||
filter0.Add(buf)
|
||||
}
|
||||
|
||||
filter1 := bloom.NewWithEstimates(1000000, 0.01)
|
||||
for i := 3; i < 5; i++ {
|
||||
binary.BigEndian.PutUint64(buf, uint64(pks[i]))
|
||||
filter1.Add(buf)
|
||||
}
|
||||
filter2 := bloom.NewWithEstimates(1000000, 0.01)
|
||||
for i := 3; i < 5; i++ {
|
||||
binary.BigEndian.PutUint64(buf, uint64(i))
|
||||
filter2.Add(buf)
|
||||
|
||||
seg0 := &Segment{
|
||||
segmentID: segIDs[0],
|
||||
channelName: chanName,
|
||||
pkFilter: filter0,
|
||||
}
|
||||
segment1 := &Segment{
|
||||
segmentID: 1,
|
||||
channelName: "test",
|
||||
seg1 := &Segment{
|
||||
segmentID: segIDs[1],
|
||||
channelName: chanName,
|
||||
pkFilter: filter0,
|
||||
}
|
||||
seg2 := &Segment{
|
||||
segmentID: segIDs[2],
|
||||
channelName: chanName,
|
||||
pkFilter: filter0,
|
||||
}
|
||||
seg3 := &Segment{
|
||||
segmentID: segIDs[3],
|
||||
channelName: chanName,
|
||||
pkFilter: filter1,
|
||||
}
|
||||
segment2 := &Segment{
|
||||
segmentID: 2,
|
||||
channelName: "test",
|
||||
seg4 := &Segment{
|
||||
segmentID: segIDs[4],
|
||||
channelName: chanName,
|
||||
pkFilter: filter1,
|
||||
}
|
||||
segment3 := &Segment{
|
||||
segmentID: 3,
|
||||
channelName: "test",
|
||||
pkFilter: filter1,
|
||||
}
|
||||
segment4 := &Segment{
|
||||
segmentID: 4,
|
||||
channelName: "test",
|
||||
pkFilter: filter2,
|
||||
}
|
||||
segment5 := &Segment{
|
||||
segmentID: 5,
|
||||
channelName: "test",
|
||||
pkFilter: filter2,
|
||||
}
|
||||
segment6 := &Segment{
|
||||
segmentID: 5,
|
||||
seg5 := &Segment{
|
||||
segmentID: segIDs[4],
|
||||
channelName: "test_error",
|
||||
pkFilter: filter2,
|
||||
}
|
||||
mockReplica := &mockReplica{}
|
||||
mockReplica.newSegments = make(map[int64]*Segment)
|
||||
mockReplica.normalSegments = make(map[int64]*Segment)
|
||||
mockReplica.flushedSegments = make(map[int64]*Segment)
|
||||
mockReplica.newSegments[segment1.segmentID] = segment1
|
||||
mockReplica.newSegments[segment2.segmentID] = segment2
|
||||
mockReplica.normalSegments[segment3.segmentID] = segment3
|
||||
mockReplica.normalSegments[segment4.segmentID] = segment4
|
||||
mockReplica.flushedSegments[segment5.segmentID] = segment5
|
||||
mockReplica.flushedSegments[segment6.segmentID] = segment6
|
||||
dn := newDeleteNode(mockReplica, "test", make(chan *flushMsg))
|
||||
results := dn.filterSegmentByPK(0, []int64{0, 1, 2, 3, 4})
|
||||
expected := map[int64][]int64{
|
||||
0: {1, 2, 3},
|
||||
1: {1, 2, 3},
|
||||
2: {1, 2, 3},
|
||||
3: {4, 5},
|
||||
4: {4, 5},
|
||||
}
|
||||
for key, value := range expected {
|
||||
assert.ElementsMatch(t, value, results[key])
|
||||
pkFilter: filter1,
|
||||
}
|
||||
|
||||
replica := newMockReplica()
|
||||
replica.newSegments[segIDs[0]] = seg0
|
||||
replica.newSegments[segIDs[1]] = seg1
|
||||
replica.normalSegments[segIDs[2]] = seg2
|
||||
replica.normalSegments[segIDs[3]] = seg3
|
||||
replica.flushedSegments[segIDs[4]] = seg4
|
||||
replica.flushedSegments[segIDs[4]] = seg5
|
||||
|
||||
return replica
|
||||
}
|
||||
|
||||
func TestFlowGraphDeleteNode_Operate(t *testing.T) {
|
||||
t.Run("Test deleteNode Operate invalid Msg", func(te *testing.T) {
|
||||
invalidInTests := []struct {
|
||||
in []Msg
|
||||
desc string
|
||||
}{
|
||||
{[]Msg{},
|
||||
"Invalid input length == 0"},
|
||||
{[]Msg{&flowGraphMsg{}, &flowGraphMsg{}, &flowGraphMsg{}},
|
||||
"Invalid input length == 3"},
|
||||
{[]Msg{&flowGraphMsg{}},
|
||||
"Invalid input length == 1 but input message is not msgStreamMsg"},
|
||||
}
|
||||
|
||||
for _, test := range invalidInTests {
|
||||
te.Run(test.desc, func(t *testing.T) {
|
||||
flushCh := make(chan *flushMsg, 10)
|
||||
dn := deleteNode{flushCh: flushCh}
|
||||
rt := dn.Operate(test.in)
|
||||
assert.Empty(t, rt)
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
const (
|
||||
chanName = "channel-test"
|
||||
)
|
||||
var (
|
||||
segIDs = []int64{11, 22, 33, 44, 55}
|
||||
pks = []int64{3, 17, 44, 190, 425}
|
||||
)
|
||||
replica := genMockReplica(segIDs, pks, chanName)
|
||||
t.Run("Test get segment by primary keys", func(te *testing.T) {
|
||||
dn, err := newDeleteNode(context.Background(), replica, &allocator{}, make(chan *flushMsg), chanName)
|
||||
assert.Nil(t, err)
|
||||
|
||||
results := dn.filterSegmentByPK(0, pks)
|
||||
expected := map[int64][]int64{
|
||||
pks[0]: segIDs[0:3],
|
||||
pks[1]: segIDs[0:3],
|
||||
pks[2]: segIDs[0:3],
|
||||
pks[3]: segIDs[3:5],
|
||||
pks[4]: segIDs[3:5],
|
||||
}
|
||||
for key, value := range expected {
|
||||
assert.ElementsMatch(t, value, results[key])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Test deleteNode Operate valid Msg", func(te *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
|
||||
chanName := "datanode-test-FlowGraphDeletenode-operate"
|
||||
testPath := "/test/datanode/root/meta"
|
||||
assert.NoError(t, clearEtcd(testPath))
|
||||
Params.MetaRootPath = testPath
|
||||
Params.DeleteBinlogRootPath = testPath
|
||||
|
||||
flushChan := make(chan *flushMsg, 100)
|
||||
delNode, err := newDeleteNode(ctx, replica, NewAllocatorFactory(), flushChan, chanName)
|
||||
assert.Nil(te, err)
|
||||
|
||||
flushChan <- &flushMsg{
|
||||
msgID: 1,
|
||||
timestamp: 2000,
|
||||
collectionID: UniqueID(1),
|
||||
}
|
||||
|
||||
msg := GenFlowGraphDeleteMsg(pks, chanName)
|
||||
var fgMsg flowgraph.Msg = &msg
|
||||
delNode.Operate([]flowgraph.Msg{fgMsg})
|
||||
})
|
||||
}
|
||||
|
|
|
@ -340,11 +340,19 @@ func (ibNode *insertBufferNode) Operate(in []Msg) []Msg {
|
|||
log.Error("send hard time tick into pulsar channel failed", zap.Error(err))
|
||||
}
|
||||
|
||||
res := flowGraphMsg{
|
||||
deleteMessages: fgMsg.deleteMessages,
|
||||
timeRange: fgMsg.timeRange,
|
||||
startPositions: fgMsg.startPositions,
|
||||
endPositions: fgMsg.endPositions,
|
||||
}
|
||||
|
||||
for _, sp := range spans {
|
||||
sp.Finish()
|
||||
}
|
||||
|
||||
return nil
|
||||
// send delete msg to DeleteNode
|
||||
return []Msg{&res}
|
||||
}
|
||||
|
||||
// updateSegStatesInReplica updates statistics in replica for the segments in insertMsgs.
|
||||
|
|
|
@ -15,7 +15,6 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"path"
|
||||
"sync"
|
||||
"testing"
|
||||
|
@ -181,43 +180,11 @@ func TestFlowGraphInsertBufferNode_Operate(t *testing.T) {
|
|||
collectionID: UniqueID(1),
|
||||
}
|
||||
|
||||
inMsg := genFlowGraphMsg(insertChannelName)
|
||||
inMsg := GenFlowGraphInsertMsg(insertChannelName)
|
||||
var fgMsg flowgraph.Msg = &inMsg
|
||||
iBNode.Operate([]flowgraph.Msg{fgMsg})
|
||||
}
|
||||
|
||||
func genFlowGraphMsg(chanName string) flowGraphMsg {
|
||||
|
||||
timeRange := TimeRange{
|
||||
timestampMin: 0,
|
||||
timestampMax: math.MaxUint64,
|
||||
}
|
||||
|
||||
startPos := []*internalpb.MsgPosition{
|
||||
{
|
||||
ChannelName: chanName,
|
||||
MsgID: make([]byte, 0),
|
||||
Timestamp: 0,
|
||||
},
|
||||
}
|
||||
|
||||
var fgMsg = &flowGraphMsg{
|
||||
insertMessages: make([]*msgstream.InsertMsg, 0),
|
||||
timeRange: TimeRange{
|
||||
timestampMin: timeRange.timestampMin,
|
||||
timestampMax: timeRange.timestampMax,
|
||||
},
|
||||
startPositions: startPos,
|
||||
endPositions: startPos,
|
||||
}
|
||||
|
||||
dataFactory := NewDataFactory()
|
||||
fgMsg.insertMessages = append(fgMsg.insertMessages, dataFactory.GetMsgStreamInsertMsgs(2)...)
|
||||
|
||||
return *fgMsg
|
||||
|
||||
}
|
||||
|
||||
func TestFlushSegment(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
|
@ -391,7 +358,7 @@ func TestFlowGraphInsertBufferNode_AutoFlush(t *testing.T) {
|
|||
|
||||
// Auto flush number of rows set to 2
|
||||
|
||||
inMsg := genFlowGraphMsg("datanode-03-test-autoflush")
|
||||
inMsg := GenFlowGraphInsertMsg("datanode-03-test-autoflush")
|
||||
inMsg.insertMessages = dataFactory.GetMsgStreamInsertMsgs(2)
|
||||
var iMsg flowgraph.Msg = &inMsg
|
||||
|
||||
|
@ -673,7 +640,7 @@ func TestInsertBufferNode_bufferInsertMsg(t *testing.T) {
|
|||
iBNode, err := newInsertBufferNode(ctx, replica, msFactory, NewAllocatorFactory(), flushChan, saveBinlog, "string", newCache())
|
||||
require.NoError(t, err)
|
||||
|
||||
inMsg := genFlowGraphMsg(insertChannelName)
|
||||
inMsg := GenFlowGraphInsertMsg(insertChannelName)
|
||||
for _, msg := range inMsg.insertMessages {
|
||||
msg.EndTimestamp = 101 // ts valid
|
||||
err = iBNode.bufferInsertMsg(msg, &internalpb.MsgPosition{})
|
||||
|
|
|
@ -27,6 +27,7 @@ type (
|
|||
|
||||
type flowGraphMsg struct {
|
||||
insertMessages []*msgstream.InsertMsg
|
||||
deleteMessages []*msgstream.DeleteMsg
|
||||
timeRange TimeRange
|
||||
startPositions []*internalpb.MsgPosition
|
||||
endPositions []*internalpb.MsgPosition
|
||||
|
|
|
@ -409,14 +409,97 @@ func (df *DataFactory) GetMsgStreamTsInsertMsgs(n int, chanName string) (inMsgs
|
|||
return
|
||||
}
|
||||
|
||||
func (df *DataFactory) GetMsgStreamInsertMsgs(n int) (inMsgs []*msgstream.InsertMsg) {
|
||||
func (df *DataFactory) GetMsgStreamInsertMsgs(n int) (msgs []*msgstream.InsertMsg) {
|
||||
for i := 0; i < n; i++ {
|
||||
var msg = df.GenMsgStreamInsertMsg(i, "")
|
||||
inMsgs = append(inMsgs, msg)
|
||||
msgs = append(msgs, msg)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (df *DataFactory) GenMsgStreamDeleteMsg(pks []int64, chanName string) *msgstream.DeleteMsg {
|
||||
idx := 100
|
||||
var msg = &msgstream.DeleteMsg{
|
||||
BaseMsg: msgstream.BaseMsg{
|
||||
HashValues: []uint32{uint32(idx)},
|
||||
},
|
||||
DeleteRequest: internalpb.DeleteRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgType: commonpb.MsgType_Delete,
|
||||
MsgID: 0,
|
||||
Timestamp: Timestamp(idx + 1000),
|
||||
SourceID: 0,
|
||||
},
|
||||
CollectionName: "col1",
|
||||
PartitionName: "default",
|
||||
ShardName: chanName,
|
||||
PrimaryKeys: pks,
|
||||
Timestamp: Timestamp(idx + 1000),
|
||||
},
|
||||
}
|
||||
return msg
|
||||
}
|
||||
|
||||
func GenFlowGraphInsertMsg(chanName string) flowGraphMsg {
|
||||
timeRange := TimeRange{
|
||||
timestampMin: 0,
|
||||
timestampMax: math.MaxUint64,
|
||||
}
|
||||
|
||||
startPos := []*internalpb.MsgPosition{
|
||||
{
|
||||
ChannelName: chanName,
|
||||
MsgID: make([]byte, 0),
|
||||
Timestamp: 0,
|
||||
},
|
||||
}
|
||||
|
||||
var fgMsg = &flowGraphMsg{
|
||||
insertMessages: make([]*msgstream.InsertMsg, 0),
|
||||
timeRange: TimeRange{
|
||||
timestampMin: timeRange.timestampMin,
|
||||
timestampMax: timeRange.timestampMax,
|
||||
},
|
||||
startPositions: startPos,
|
||||
endPositions: startPos,
|
||||
}
|
||||
|
||||
dataFactory := NewDataFactory()
|
||||
fgMsg.insertMessages = append(fgMsg.insertMessages, dataFactory.GetMsgStreamInsertMsgs(2)...)
|
||||
|
||||
return *fgMsg
|
||||
}
|
||||
|
||||
func GenFlowGraphDeleteMsg(pks []int64, chanName string) flowGraphMsg {
|
||||
timeRange := TimeRange{
|
||||
timestampMin: 0,
|
||||
timestampMax: math.MaxUint64,
|
||||
}
|
||||
|
||||
startPos := []*internalpb.MsgPosition{
|
||||
{
|
||||
ChannelName: chanName,
|
||||
MsgID: make([]byte, 0),
|
||||
Timestamp: 0,
|
||||
},
|
||||
}
|
||||
|
||||
var fgMsg = &flowGraphMsg{
|
||||
insertMessages: make([]*msgstream.InsertMsg, 0),
|
||||
timeRange: TimeRange{
|
||||
timestampMin: timeRange.timestampMin,
|
||||
timestampMax: timeRange.timestampMax,
|
||||
},
|
||||
startPositions: startPos,
|
||||
endPositions: startPos,
|
||||
}
|
||||
|
||||
dataFactory := NewDataFactory()
|
||||
fgMsg.deleteMessages = append(fgMsg.deleteMessages, dataFactory.GenMsgStreamDeleteMsg(pks, chanName))
|
||||
|
||||
return *fgMsg
|
||||
}
|
||||
|
||||
type AllocatorFactory struct {
|
||||
sync.Mutex
|
||||
r *rand.Rand
|
||||
|
|
|
@ -38,6 +38,7 @@ type ParamTable struct {
|
|||
FlushInsertBufferSize int64
|
||||
InsertBinlogRootPath string
|
||||
StatsBinlogRootPath string
|
||||
DeleteBinlogRootPath string
|
||||
Alias string // Different datanode in one machine
|
||||
|
||||
// Pulsar address
|
||||
|
@ -103,6 +104,7 @@ func (p *ParamTable) Init() {
|
|||
p.initFlushInsertBufferSize()
|
||||
p.initInsertBinlogRootPath()
|
||||
p.initStatsBinlogRootPath()
|
||||
p.initDeleteBinlogRootPath()
|
||||
|
||||
p.initPulsarAddress()
|
||||
p.initRocksmqPath()
|
||||
|
@ -153,6 +155,14 @@ func (p *ParamTable) initStatsBinlogRootPath() {
|
|||
p.StatsBinlogRootPath = path.Join(rootPath, "stats_log")
|
||||
}
|
||||
|
||||
func (p *ParamTable) initDeleteBinlogRootPath() {
|
||||
rootPath, err := p.Load("minio.rootPath")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
p.DeleteBinlogRootPath = path.Join(rootPath, "delta_log")
|
||||
}
|
||||
|
||||
func (p *ParamTable) initPulsarAddress() {
|
||||
url, err := p.Load("_PulsarAddress")
|
||||
if err != nil {
|
||||
|
|
|
@ -179,6 +179,10 @@ func (replica *SegmentReplica) getCollectionAndPartitionID(segID UniqueID) (coll
|
|||
return seg.collectionID, seg.partitionID, nil
|
||||
}
|
||||
|
||||
if seg, ok := replica.flushedSegments[segID]; ok {
|
||||
return seg.collectionID, seg.partitionID, nil
|
||||
}
|
||||
|
||||
return 0, 0, fmt.Errorf("Cannot find segment, id = %v", segID)
|
||||
}
|
||||
|
||||
|
|
|
@ -103,9 +103,9 @@ func TestSegmentReplica_getCollectionAndPartitionID(te *testing.T) {
|
|||
}
|
||||
|
||||
collID, parID, err := sr.getCollectionAndPartitionID(test.segInFlushed)
|
||||
assert.Error(t, err)
|
||||
assert.Zero(t, collID)
|
||||
assert.Zero(t, parID)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.inCollID, collID)
|
||||
assert.Equal(t, test.inParID, parID)
|
||||
} else {
|
||||
sr := &SegmentReplica{}
|
||||
collID, parID, err := sr.getCollectionAndPartitionID(1000)
|
||||
|
|
|
@ -4749,40 +4749,40 @@ func (dt *deleteTask) Execute(ctx context.Context) (err error) {
|
|||
}
|
||||
msgPack.Msgs[0] = tsMsg
|
||||
|
||||
//collID := dt.DeleteRequest.CollectionID
|
||||
//stream, err := dt.chMgr.getDMLStream(collID)
|
||||
//if err != nil {
|
||||
// err = dt.chMgr.createDMLMsgStream(collID)
|
||||
// if err != nil {
|
||||
// dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||
// dt.result.Status.Reason = err.Error()
|
||||
// return err
|
||||
// }
|
||||
// channels, err := dt.chMgr.getChannels(collID)
|
||||
// if err == nil {
|
||||
// for _, pchan := range channels {
|
||||
// err := dt.chTicker.addPChan(pchan)
|
||||
// if err != nil {
|
||||
// log.Warn("failed to add pchan to channels time ticker",
|
||||
// zap.Error(err),
|
||||
// zap.String("pchan", pchan))
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// stream, err = dt.chMgr.getDMLStream(collID)
|
||||
// if err != nil {
|
||||
// dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||
// dt.result.Status.Reason = err.Error()
|
||||
// return err
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//err = stream.Produce(&msgPack)
|
||||
//if err != nil {
|
||||
// dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||
// dt.result.Status.Reason = err.Error()
|
||||
// return err
|
||||
//}
|
||||
collID := dt.DeleteRequest.CollectionID
|
||||
stream, err := dt.chMgr.getDMLStream(collID)
|
||||
if err != nil {
|
||||
err = dt.chMgr.createDMLMsgStream(collID)
|
||||
if err != nil {
|
||||
dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||
dt.result.Status.Reason = err.Error()
|
||||
return err
|
||||
}
|
||||
channels, err := dt.chMgr.getChannels(collID)
|
||||
if err == nil {
|
||||
for _, pchan := range channels {
|
||||
err := dt.chTicker.addPChan(pchan)
|
||||
if err != nil {
|
||||
log.Warn("failed to add pchan to channels time ticker",
|
||||
zap.Error(err),
|
||||
zap.String("pchan", pchan))
|
||||
}
|
||||
}
|
||||
}
|
||||
stream, err = dt.chMgr.getDMLStream(collID)
|
||||
if err != nil {
|
||||
dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||
dt.result.Status.Reason = err.Error()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = stream.Produce(&msgPack)
|
||||
if err != nil {
|
||||
dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||
dt.result.Status.Reason = err.Error()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue