Support Delete in datanode (#8505)

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
pull/9638/head
Cai Yudong 2021-10-11 16:31:44 +08:00 committed by GitHub
parent 8f3b8ed1da
commit 84e568ee32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 569 additions and 195 deletions

View File

@ -190,9 +190,17 @@ func (dsService *dataSyncService) initNodes(vchanInfo *datapb.VchannelInfo) erro
return err
}
dn := newDeleteNode(dsService.replica, vchanInfo.GetChannelName(), dsService.flushChs.deleteBufferCh)
var deleteNode Node = dn
var deleteNode Node
deleteNode, err = newDeleteNode(
dsService.ctx,
dsService.replica,
dsService.idAllocator,
dsService.flushChs.deleteBufferCh,
vchanInfo.GetChannelName(),
)
if err != nil {
return err
}
// recover segment checkpoints
for _, us := range vchanInfo.GetUnflushedSegments() {

View File

@ -99,7 +99,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg {
return []Msg{}
}
case commonpb.MsgType_Insert:
log.Debug("DDNode with insert messages")
log.Debug("DDNode receive insert messages")
imsg := msg.(*msgstream.InsertMsg)
if imsg.CollectionID != ddn.collectionID {
//log.Debug("filter invalid InsertMsg, collection mis-match",
@ -117,19 +117,27 @@ func (ddn *ddNode) Operate(in []Msg) []Msg {
}
}
fgMsg.insertMessages = append(fgMsg.insertMessages, imsg)
case commonpb.MsgType_Delete:
log.Debug("DDNode receive delete messages")
dmsg := msg.(*msgstream.DeleteMsg)
if dmsg.CollectionID != ddn.collectionID {
//log.Debug("filter invalid DeleteMsg, collection mis-match",
// zap.Int64("Get msg collID", dmsg.CollectionID),
// zap.Int64("Expected collID", ddn.collectionID))
continue
}
fgMsg.deleteMessages = append(fgMsg.deleteMessages, dmsg)
}
}
fgMsg.startPositions = append(fgMsg.startPositions, msMsg.StartPositions()...)
fgMsg.endPositions = append(fgMsg.endPositions, msMsg.EndPositions()...)
var res Msg = &fgMsg
for _, sp := range spans {
sp.Finish()
}
return []Msg{res}
return []Msg{&fgMsg}
}
func (ddn *ddNode) filterFlushedSegmentInsertMessages(msg *msgstream.InsertMsg) bool {

View File

@ -207,6 +207,44 @@ func TestFlowGraph_DDNode_Operate(to *testing.T) {
}
})
to.Run("Test DDNode Operate Delete Msg", func(te *testing.T) {
tests := []struct {
ddnCollID UniqueID
inMsgCollID UniqueID
MsgEndTs Timestamp
expectedRtLen int
description string
}{
{1, 1, 2000, 1, "normal"},
{1, 2, 4000, 0, "inMsgCollID(2) != ddnCollID"},
}
for _, test := range tests {
te.Run(test.description, func(t *testing.T) {
// Prepare ddNode states
ddn := ddNode{
collectionID: test.ddnCollID,
}
// Prepare delete messages
var dMsg msgstream.TsMsg = &msgstream.DeleteMsg{
BaseMsg: msgstream.BaseMsg{EndTimestamp: test.MsgEndTs},
DeleteRequest: internalpb.DeleteRequest{
Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Delete},
CollectionID: test.inMsgCollID,
},
}
tsMessages := []msgstream.TsMsg{dMsg}
var msgStreamMsg Msg = flowgraph.GenerateMsgStreamMsg(tsMessages, 0, 0, nil, nil)
// Test
rt := ddn.Operate([]Msg{msgStreamMsg})
assert.Equal(t, test.expectedRtLen, len(rt[0].(*flowGraphMsg).deleteMessages))
})
}
})
}
func TestFlowGraph_DDNode_filterMessages(te *testing.T) {

View File

@ -12,21 +12,58 @@
package datanode
import (
"context"
"encoding/binary"
"path"
"strconv"
"sync"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/kv"
miniokv "github.com/milvus-io/milvus/internal/kv/minio"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/msgstream"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/trace"
"github.com/opentracing/opentracing-go"
)
type (
// DeleteData record deleted IDs and Timestamps
DeleteData = storage.DeleteData
)
// DeleteNode is to process delete msg, flush delete info into storage.
type deleteNode struct {
BaseNode
channelName string
delBuf sync.Map // map[segmentID]*DelDataBuf
replica Replica
idAllocator allocatorInterface
flushCh <-chan *flushMsg
minIOKV kv.BaseKV
}
flushCh <-chan *flushMsg
// BufferData buffers insert data, monitoring buffer size and limit
// size and limit both indicate numOfRows
type DelDataBuf struct {
delData *DeleteData
size int64
}
func (ddb *DelDataBuf) updateSize(size int64) {
ddb.size += size
}
func newDelDataBuf() *DelDataBuf {
return &DelDataBuf{
delData: &DeleteData{
Data: make(map[string]int64),
},
size: 0,
}
}
func (dn *deleteNode) Name() string {
@ -37,31 +74,112 @@ func (dn *deleteNode) Close() {
log.Info("Flowgraph Delete Node closing")
}
func (dn *deleteNode) bufferDeleteMsg(msg *msgstream.DeleteMsg) error {
log.Debug("bufferDeleteMsg", zap.Any("primary keys", msg.PrimaryKeys))
segIDToPkMap := make(map[UniqueID][]int64)
segIDToTsMap := make(map[UniqueID][]int64)
m := dn.filterSegmentByPK(msg.PartitionID, msg.PrimaryKeys)
for _, pk := range msg.PrimaryKeys {
segIDs, ok := m[pk]
if !ok {
log.Warn("primary key not exist in all segments", zap.Int64("primary key", pk))
continue
}
for _, segID := range segIDs {
segIDToPkMap[segID] = append(segIDToPkMap[segID], pk)
segIDToTsMap[segID] = append(segIDToTsMap[segID], int64(msg.Timestamp))
}
}
for segID, pks := range segIDToPkMap {
rows := len(pks)
tss, ok := segIDToTsMap[segID]
if !ok || rows != len(tss) {
log.Error("primary keys and timestamp's element num mis-match")
}
newBuf := newDelDataBuf()
delDataBuf, _ := dn.delBuf.LoadOrStore(segID, newBuf)
delData := delDataBuf.(*DelDataBuf).delData
for i := 0; i < rows; i++ {
delData.Data[strconv.FormatInt(pks[i], 10)] = tss[i]
log.Debug("delete", zap.Int64("primary key", pks[i]), zap.Int64("ts", tss[i]))
}
// store
delDataBuf.(*DelDataBuf).updateSize(int64(rows))
dn.delBuf.Store(segID, delDataBuf)
}
return nil
}
func (dn *deleteNode) showDelBuf() {
segments := dn.replica.filterSegments(dn.channelName, 0)
for _, seg := range segments {
segID := seg.segmentID
if v, ok := dn.delBuf.Load(segID); ok {
delDataBuf, _ := v.(*DelDataBuf)
log.Debug("del data buffer status", zap.Int64("segID", segID), zap.Int64("size", delDataBuf.size))
for pk, ts := range delDataBuf.delData.Data {
log.Debug("del data", zap.String("pk", pk), zap.Int64("ts", ts))
}
} else {
log.Error("segment not exist", zap.Int64("segID", segID))
}
}
}
func (dn *deleteNode) Operate(in []Msg) []Msg {
log.Debug("deleteNode Operating")
//log.Debug("deleteNode Operating")
if len(in) != 1 {
log.Warn("Invalid operate message input in deleteNode", zap.Int("input length", len(in)))
return []Msg{}
log.Error("Invalid operate message input in deleteNode", zap.Int("input length", len(in)))
return nil
}
_, ok := in[0].(*MsgStreamMsg)
fgMsg, ok := in[0].(*flowGraphMsg)
if !ok {
log.Warn("type assertion failed for MsgStreamMsg")
return []Msg{}
log.Error("type assertion failed for flowGraphMsg")
return nil
}
var spans []opentracing.Span
for _, msg := range fgMsg.deleteMessages {
sp, ctx := trace.StartSpanFromContext(msg.TraceCtx())
spans = append(spans, sp)
msg.SetTraceCtx(ctx)
}
for _, msg := range fgMsg.deleteMessages {
if err := dn.bufferDeleteMsg(msg); err != nil {
log.Error("buffer delete msg failed", zap.Error(err))
}
}
// show all data in dn.delBuf
if len(fgMsg.deleteMessages) != 0 {
dn.showDelBuf()
}
// handle manual flush
select {
case fmsg := <-dn.flushCh:
currentSegID := fmsg.segmentID
log.Debug("DeleteNode receives flush message",
zap.Int64("segmentID", currentSegID),
zap.Int64("collectionID", fmsg.collectionID),
)
log.Debug("DeleteNode receives flush message", zap.Int64("collID", fmsg.collectionID))
dn.flushDelData(fmsg.collectionID, fgMsg.timeRange)
// clean dn.delBuf
dn.delBuf = sync.Map{}
default:
}
return []Msg{}
for _, sp := range spans {
sp.Finish()
}
return nil
}
// filterSegmentByPK returns the bloom filter check result.
@ -83,16 +201,91 @@ func (dn *deleteNode) filterSegmentByPK(partID UniqueID, pks []int64) map[int64]
return result
}
func newDeleteNode(replica Replica, channelName string, flushCh <-chan *flushMsg) *deleteNode {
baseNode := BaseNode{}
baseNode.SetMaxParallelism(Params.FlowGraphMaxQueueLength)
func (dn *deleteNode) flushDelData(collID UniqueID, timeRange TimeRange) {
schema, err := dn.replica.getCollectionSchema(collID, timeRange.timestampMax)
if err != nil {
log.Error("failed to get collection schema", zap.Error(err))
return
}
return &deleteNode{
BaseNode: baseNode,
delCodec := storage.NewDeleteCodec(&etcdpb.CollectionMeta{
ID: collID,
Schema: schema,
})
channelName: channelName,
replica: replica,
kvs := make(map[string]string)
// buffer data to binlogs
dn.delBuf.Range(func(k, v interface{}) bool {
segID := k.(int64)
delDataBuf := v.(*DelDataBuf)
collID, partID, err := dn.replica.getCollectionAndPartitionID(segID)
if err != nil {
log.Error("failed to get collection ID and partition ID", zap.Error(err))
return false
}
flushCh: flushCh,
blob, err := delCodec.Serialize(partID, segID, delDataBuf.delData)
if err != nil {
log.Error("failed to serialize delete data", zap.Error(err))
return false
}
// write insert binlog
logID, err := dn.idAllocator.allocID()
if err != nil {
log.Error("failed to alloc ID", zap.Error(err))
return false
}
blobKey, _ := dn.idAllocator.genKey(false, collID, partID, segID, logID)
blobPath := path.Join(Params.DeleteBinlogRootPath, blobKey)
kvs[blobPath] = string(blob.Value[:])
log.Debug("delete blob path", zap.String("path", blobPath))
return true
})
if len(kvs) > 0 {
err = dn.minIOKV.MultiSave(kvs)
if err != nil {
log.Error("failed to save minIO ..", zap.Error(err))
}
log.Debug("save delete blobs to minIO successfully")
}
}
func newDeleteNode(
ctx context.Context,
replica Replica,
idAllocator allocatorInterface,
flushCh <-chan *flushMsg,
channelName string,
) (*deleteNode, error) {
baseNode := BaseNode{}
baseNode.SetMaxQueueLength(Params.FlowGraphMaxQueueLength)
baseNode.SetMaxParallelism(Params.FlowGraphMaxParallelism)
// MinIO
option := &miniokv.Option{
Address: Params.MinioAddress,
AccessKeyID: Params.MinioAccessKeyID,
SecretAccessKeyID: Params.MinioSecretAccessKey,
UseSSL: Params.MinioUseSSL,
CreateBucket: true,
BucketName: Params.MinioBucketName,
}
minIOKV, err := miniokv.NewMinIOKV(ctx, option)
if err != nil {
return nil, err
}
return &deleteNode{
BaseNode: baseNode,
channelName: channelName,
delBuf: sync.Map{},
replica: replica,
idAllocator: idAllocator,
flushCh: flushCh,
minIOKV: minIOKV,
}, nil
}

View File

@ -12,10 +12,14 @@
package datanode
import (
"context"
"encoding/binary"
"testing"
"time"
"github.com/bits-and-blooms/bloom/v3"
"github.com/milvus-io/milvus/internal/proto/schemapb"
"github.com/milvus-io/milvus/internal/util/flowgraph"
"github.com/stretchr/testify/assert"
)
@ -27,6 +31,14 @@ type mockReplica struct {
flushedSegments map[UniqueID]*Segment
}
func newMockReplica() *mockReplica {
return &mockReplica{
newSegments: make(map[int64]*Segment),
normalSegments: make(map[int64]*Segment),
flushedSegments: make(map[int64]*Segment),
}
}
func (replica *mockReplica) filterSegments(channelName string, partitionID UniqueID) []*Segment {
results := make([]*Segment, 0)
for _, value := range replica.newSegments {
@ -41,122 +53,164 @@ func (replica *mockReplica) filterSegments(channelName string, partitionID Uniqu
return results
}
func (replica *mockReplica) getCollectionSchema(collectionID UniqueID, ts Timestamp) (*schemapb.CollectionSchema, error) {
return &schemapb.CollectionSchema{}, nil
}
func (replica *mockReplica) getCollectionAndPartitionID(segID UniqueID) (collID, partitionID UniqueID, err error) {
return 0, 1, nil
}
func TestFlowGraphDeleteNode_newDeleteNode(te *testing.T) {
tests := []struct {
replica Replica
ctx context.Context
replica Replica
idAllocator allocatorInterface
description string
}{
{&SegmentReplica{}, "pointer of SegmentReplica"},
{context.Background(), &SegmentReplica{}, &allocator{}, "pointer of SegmentReplica"},
}
for _, test := range tests {
te.Run(test.description, func(t *testing.T) {
dn := newDeleteNode(test.replica, "", make(chan *flushMsg))
dn, err := newDeleteNode(test.ctx, test.replica, test.idAllocator, make(chan *flushMsg), "")
assert.Nil(t, err)
assert.NotNil(t, dn)
assert.Equal(t, "deleteNode", dn.Name())
dn.Close()
})
}
}
func TestFlowGraphDeleteNode_Operate(te *testing.T) {
tests := []struct {
invalidIn []Msg
validIn []Msg
description string
}{
{[]Msg{}, nil,
"Invalid input length == 0"},
{[]Msg{&flowGraphMsg{}, &flowGraphMsg{}, &flowGraphMsg{}}, nil,
"Invalid input length == 3"},
{[]Msg{&flowGraphMsg{}}, nil,
"Invalid input length == 1 but input message is not msgStreamMsg"},
{nil, []Msg{&MsgStreamMsg{}},
"valid input"},
}
for _, test := range tests {
te.Run(test.description, func(t *testing.T) {
flushCh := make(chan *flushMsg, 10)
dn := deleteNode{flushCh: flushCh}
if test.invalidIn != nil {
rt := dn.Operate(test.invalidIn)
assert.Empty(t, rt)
} else {
flushCh <- &flushMsg{0, 100, 10, 1}
rt := dn.Operate(test.validIn)
assert.Empty(t, rt)
}
})
}
}
func Test_GetSegmentsByPKs(t *testing.T) {
func genMockReplica(segIDs []int64, pks []int64, chanName string) *mockReplica {
buf := make([]byte, 8)
filter1 := bloom.NewWithEstimates(1000000, 0.01)
filter0 := bloom.NewWithEstimates(1000000, 0.01)
for i := 0; i < 3; i++ {
binary.BigEndian.PutUint64(buf, uint64(i))
binary.BigEndian.PutUint64(buf, uint64(pks[i]))
filter0.Add(buf)
}
filter1 := bloom.NewWithEstimates(1000000, 0.01)
for i := 3; i < 5; i++ {
binary.BigEndian.PutUint64(buf, uint64(pks[i]))
filter1.Add(buf)
}
filter2 := bloom.NewWithEstimates(1000000, 0.01)
for i := 3; i < 5; i++ {
binary.BigEndian.PutUint64(buf, uint64(i))
filter2.Add(buf)
seg0 := &Segment{
segmentID: segIDs[0],
channelName: chanName,
pkFilter: filter0,
}
segment1 := &Segment{
segmentID: 1,
channelName: "test",
seg1 := &Segment{
segmentID: segIDs[1],
channelName: chanName,
pkFilter: filter0,
}
seg2 := &Segment{
segmentID: segIDs[2],
channelName: chanName,
pkFilter: filter0,
}
seg3 := &Segment{
segmentID: segIDs[3],
channelName: chanName,
pkFilter: filter1,
}
segment2 := &Segment{
segmentID: 2,
channelName: "test",
seg4 := &Segment{
segmentID: segIDs[4],
channelName: chanName,
pkFilter: filter1,
}
segment3 := &Segment{
segmentID: 3,
channelName: "test",
pkFilter: filter1,
}
segment4 := &Segment{
segmentID: 4,
channelName: "test",
pkFilter: filter2,
}
segment5 := &Segment{
segmentID: 5,
channelName: "test",
pkFilter: filter2,
}
segment6 := &Segment{
segmentID: 5,
seg5 := &Segment{
segmentID: segIDs[4],
channelName: "test_error",
pkFilter: filter2,
}
mockReplica := &mockReplica{}
mockReplica.newSegments = make(map[int64]*Segment)
mockReplica.normalSegments = make(map[int64]*Segment)
mockReplica.flushedSegments = make(map[int64]*Segment)
mockReplica.newSegments[segment1.segmentID] = segment1
mockReplica.newSegments[segment2.segmentID] = segment2
mockReplica.normalSegments[segment3.segmentID] = segment3
mockReplica.normalSegments[segment4.segmentID] = segment4
mockReplica.flushedSegments[segment5.segmentID] = segment5
mockReplica.flushedSegments[segment6.segmentID] = segment6
dn := newDeleteNode(mockReplica, "test", make(chan *flushMsg))
results := dn.filterSegmentByPK(0, []int64{0, 1, 2, 3, 4})
expected := map[int64][]int64{
0: {1, 2, 3},
1: {1, 2, 3},
2: {1, 2, 3},
3: {4, 5},
4: {4, 5},
}
for key, value := range expected {
assert.ElementsMatch(t, value, results[key])
pkFilter: filter1,
}
replica := newMockReplica()
replica.newSegments[segIDs[0]] = seg0
replica.newSegments[segIDs[1]] = seg1
replica.normalSegments[segIDs[2]] = seg2
replica.normalSegments[segIDs[3]] = seg3
replica.flushedSegments[segIDs[4]] = seg4
replica.flushedSegments[segIDs[4]] = seg5
return replica
}
func TestFlowGraphDeleteNode_Operate(t *testing.T) {
t.Run("Test deleteNode Operate invalid Msg", func(te *testing.T) {
invalidInTests := []struct {
in []Msg
desc string
}{
{[]Msg{},
"Invalid input length == 0"},
{[]Msg{&flowGraphMsg{}, &flowGraphMsg{}, &flowGraphMsg{}},
"Invalid input length == 3"},
{[]Msg{&flowGraphMsg{}},
"Invalid input length == 1 but input message is not msgStreamMsg"},
}
for _, test := range invalidInTests {
te.Run(test.desc, func(t *testing.T) {
flushCh := make(chan *flushMsg, 10)
dn := deleteNode{flushCh: flushCh}
rt := dn.Operate(test.in)
assert.Empty(t, rt)
})
}
})
const (
chanName = "channel-test"
)
var (
segIDs = []int64{11, 22, 33, 44, 55}
pks = []int64{3, 17, 44, 190, 425}
)
replica := genMockReplica(segIDs, pks, chanName)
t.Run("Test get segment by primary keys", func(te *testing.T) {
dn, err := newDeleteNode(context.Background(), replica, &allocator{}, make(chan *flushMsg), chanName)
assert.Nil(t, err)
results := dn.filterSegmentByPK(0, pks)
expected := map[int64][]int64{
pks[0]: segIDs[0:3],
pks[1]: segIDs[0:3],
pks[2]: segIDs[0:3],
pks[3]: segIDs[3:5],
pks[4]: segIDs[3:5],
}
for key, value := range expected {
assert.ElementsMatch(t, value, results[key])
}
})
t.Run("Test deleteNode Operate valid Msg", func(te *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
chanName := "datanode-test-FlowGraphDeletenode-operate"
testPath := "/test/datanode/root/meta"
assert.NoError(t, clearEtcd(testPath))
Params.MetaRootPath = testPath
Params.DeleteBinlogRootPath = testPath
flushChan := make(chan *flushMsg, 100)
delNode, err := newDeleteNode(ctx, replica, NewAllocatorFactory(), flushChan, chanName)
assert.Nil(te, err)
flushChan <- &flushMsg{
msgID: 1,
timestamp: 2000,
collectionID: UniqueID(1),
}
msg := GenFlowGraphDeleteMsg(pks, chanName)
var fgMsg flowgraph.Msg = &msg
delNode.Operate([]flowgraph.Msg{fgMsg})
})
}

View File

@ -340,11 +340,19 @@ func (ibNode *insertBufferNode) Operate(in []Msg) []Msg {
log.Error("send hard time tick into pulsar channel failed", zap.Error(err))
}
res := flowGraphMsg{
deleteMessages: fgMsg.deleteMessages,
timeRange: fgMsg.timeRange,
startPositions: fgMsg.startPositions,
endPositions: fgMsg.endPositions,
}
for _, sp := range spans {
sp.Finish()
}
return nil
// send delete msg to DeleteNode
return []Msg{&res}
}
// updateSegStatesInReplica updates statistics in replica for the segments in insertMsgs.

View File

@ -15,7 +15,6 @@ import (
"context"
"errors"
"fmt"
"math"
"path"
"sync"
"testing"
@ -181,43 +180,11 @@ func TestFlowGraphInsertBufferNode_Operate(t *testing.T) {
collectionID: UniqueID(1),
}
inMsg := genFlowGraphMsg(insertChannelName)
inMsg := GenFlowGraphInsertMsg(insertChannelName)
var fgMsg flowgraph.Msg = &inMsg
iBNode.Operate([]flowgraph.Msg{fgMsg})
}
func genFlowGraphMsg(chanName string) flowGraphMsg {
timeRange := TimeRange{
timestampMin: 0,
timestampMax: math.MaxUint64,
}
startPos := []*internalpb.MsgPosition{
{
ChannelName: chanName,
MsgID: make([]byte, 0),
Timestamp: 0,
},
}
var fgMsg = &flowGraphMsg{
insertMessages: make([]*msgstream.InsertMsg, 0),
timeRange: TimeRange{
timestampMin: timeRange.timestampMin,
timestampMax: timeRange.timestampMax,
},
startPositions: startPos,
endPositions: startPos,
}
dataFactory := NewDataFactory()
fgMsg.insertMessages = append(fgMsg.insertMessages, dataFactory.GetMsgStreamInsertMsgs(2)...)
return *fgMsg
}
func TestFlushSegment(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
@ -391,7 +358,7 @@ func TestFlowGraphInsertBufferNode_AutoFlush(t *testing.T) {
// Auto flush number of rows set to 2
inMsg := genFlowGraphMsg("datanode-03-test-autoflush")
inMsg := GenFlowGraphInsertMsg("datanode-03-test-autoflush")
inMsg.insertMessages = dataFactory.GetMsgStreamInsertMsgs(2)
var iMsg flowgraph.Msg = &inMsg
@ -673,7 +640,7 @@ func TestInsertBufferNode_bufferInsertMsg(t *testing.T) {
iBNode, err := newInsertBufferNode(ctx, replica, msFactory, NewAllocatorFactory(), flushChan, saveBinlog, "string", newCache())
require.NoError(t, err)
inMsg := genFlowGraphMsg(insertChannelName)
inMsg := GenFlowGraphInsertMsg(insertChannelName)
for _, msg := range inMsg.insertMessages {
msg.EndTimestamp = 101 // ts valid
err = iBNode.bufferInsertMsg(msg, &internalpb.MsgPosition{})

View File

@ -27,6 +27,7 @@ type (
type flowGraphMsg struct {
insertMessages []*msgstream.InsertMsg
deleteMessages []*msgstream.DeleteMsg
timeRange TimeRange
startPositions []*internalpb.MsgPosition
endPositions []*internalpb.MsgPosition

View File

@ -409,14 +409,97 @@ func (df *DataFactory) GetMsgStreamTsInsertMsgs(n int, chanName string) (inMsgs
return
}
func (df *DataFactory) GetMsgStreamInsertMsgs(n int) (inMsgs []*msgstream.InsertMsg) {
func (df *DataFactory) GetMsgStreamInsertMsgs(n int) (msgs []*msgstream.InsertMsg) {
for i := 0; i < n; i++ {
var msg = df.GenMsgStreamInsertMsg(i, "")
inMsgs = append(inMsgs, msg)
msgs = append(msgs, msg)
}
return
}
func (df *DataFactory) GenMsgStreamDeleteMsg(pks []int64, chanName string) *msgstream.DeleteMsg {
idx := 100
var msg = &msgstream.DeleteMsg{
BaseMsg: msgstream.BaseMsg{
HashValues: []uint32{uint32(idx)},
},
DeleteRequest: internalpb.DeleteRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_Delete,
MsgID: 0,
Timestamp: Timestamp(idx + 1000),
SourceID: 0,
},
CollectionName: "col1",
PartitionName: "default",
ShardName: chanName,
PrimaryKeys: pks,
Timestamp: Timestamp(idx + 1000),
},
}
return msg
}
func GenFlowGraphInsertMsg(chanName string) flowGraphMsg {
timeRange := TimeRange{
timestampMin: 0,
timestampMax: math.MaxUint64,
}
startPos := []*internalpb.MsgPosition{
{
ChannelName: chanName,
MsgID: make([]byte, 0),
Timestamp: 0,
},
}
var fgMsg = &flowGraphMsg{
insertMessages: make([]*msgstream.InsertMsg, 0),
timeRange: TimeRange{
timestampMin: timeRange.timestampMin,
timestampMax: timeRange.timestampMax,
},
startPositions: startPos,
endPositions: startPos,
}
dataFactory := NewDataFactory()
fgMsg.insertMessages = append(fgMsg.insertMessages, dataFactory.GetMsgStreamInsertMsgs(2)...)
return *fgMsg
}
func GenFlowGraphDeleteMsg(pks []int64, chanName string) flowGraphMsg {
timeRange := TimeRange{
timestampMin: 0,
timestampMax: math.MaxUint64,
}
startPos := []*internalpb.MsgPosition{
{
ChannelName: chanName,
MsgID: make([]byte, 0),
Timestamp: 0,
},
}
var fgMsg = &flowGraphMsg{
insertMessages: make([]*msgstream.InsertMsg, 0),
timeRange: TimeRange{
timestampMin: timeRange.timestampMin,
timestampMax: timeRange.timestampMax,
},
startPositions: startPos,
endPositions: startPos,
}
dataFactory := NewDataFactory()
fgMsg.deleteMessages = append(fgMsg.deleteMessages, dataFactory.GenMsgStreamDeleteMsg(pks, chanName))
return *fgMsg
}
type AllocatorFactory struct {
sync.Mutex
r *rand.Rand

View File

@ -38,6 +38,7 @@ type ParamTable struct {
FlushInsertBufferSize int64
InsertBinlogRootPath string
StatsBinlogRootPath string
DeleteBinlogRootPath string
Alias string // Different datanode in one machine
// Pulsar address
@ -103,6 +104,7 @@ func (p *ParamTable) Init() {
p.initFlushInsertBufferSize()
p.initInsertBinlogRootPath()
p.initStatsBinlogRootPath()
p.initDeleteBinlogRootPath()
p.initPulsarAddress()
p.initRocksmqPath()
@ -153,6 +155,14 @@ func (p *ParamTable) initStatsBinlogRootPath() {
p.StatsBinlogRootPath = path.Join(rootPath, "stats_log")
}
func (p *ParamTable) initDeleteBinlogRootPath() {
rootPath, err := p.Load("minio.rootPath")
if err != nil {
panic(err)
}
p.DeleteBinlogRootPath = path.Join(rootPath, "delta_log")
}
func (p *ParamTable) initPulsarAddress() {
url, err := p.Load("_PulsarAddress")
if err != nil {

View File

@ -179,6 +179,10 @@ func (replica *SegmentReplica) getCollectionAndPartitionID(segID UniqueID) (coll
return seg.collectionID, seg.partitionID, nil
}
if seg, ok := replica.flushedSegments[segID]; ok {
return seg.collectionID, seg.partitionID, nil
}
return 0, 0, fmt.Errorf("Cannot find segment, id = %v", segID)
}

View File

@ -103,9 +103,9 @@ func TestSegmentReplica_getCollectionAndPartitionID(te *testing.T) {
}
collID, parID, err := sr.getCollectionAndPartitionID(test.segInFlushed)
assert.Error(t, err)
assert.Zero(t, collID)
assert.Zero(t, parID)
assert.NoError(t, err)
assert.Equal(t, test.inCollID, collID)
assert.Equal(t, test.inParID, parID)
} else {
sr := &SegmentReplica{}
collID, parID, err := sr.getCollectionAndPartitionID(1000)

View File

@ -4749,40 +4749,40 @@ func (dt *deleteTask) Execute(ctx context.Context) (err error) {
}
msgPack.Msgs[0] = tsMsg
//collID := dt.DeleteRequest.CollectionID
//stream, err := dt.chMgr.getDMLStream(collID)
//if err != nil {
// err = dt.chMgr.createDMLMsgStream(collID)
// if err != nil {
// dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
// dt.result.Status.Reason = err.Error()
// return err
// }
// channels, err := dt.chMgr.getChannels(collID)
// if err == nil {
// for _, pchan := range channels {
// err := dt.chTicker.addPChan(pchan)
// if err != nil {
// log.Warn("failed to add pchan to channels time ticker",
// zap.Error(err),
// zap.String("pchan", pchan))
// }
// }
// }
// stream, err = dt.chMgr.getDMLStream(collID)
// if err != nil {
// dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
// dt.result.Status.Reason = err.Error()
// return err
// }
//}
//
//err = stream.Produce(&msgPack)
//if err != nil {
// dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
// dt.result.Status.Reason = err.Error()
// return err
//}
collID := dt.DeleteRequest.CollectionID
stream, err := dt.chMgr.getDMLStream(collID)
if err != nil {
err = dt.chMgr.createDMLMsgStream(collID)
if err != nil {
dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
dt.result.Status.Reason = err.Error()
return err
}
channels, err := dt.chMgr.getChannels(collID)
if err == nil {
for _, pchan := range channels {
err := dt.chTicker.addPChan(pchan)
if err != nil {
log.Warn("failed to add pchan to channels time ticker",
zap.Error(err),
zap.String("pchan", pchan))
}
}
}
stream, err = dt.chMgr.getDMLStream(collID)
if err != nil {
dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
dt.result.Status.Reason = err.Error()
return err
}
}
err = stream.Produce(&msgPack)
if err != nil {
dt.result.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
dt.result.Status.Reason = err.Error()
return err
}
return nil
}