data node, save binlog to minIO, and let data service save these meta (#5618)

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>
pull/5779/head
neza2017 2021-06-04 19:20:34 +08:00 committed by zhenshan.cao
parent 91ef35bad4
commit 3c70675313
2 changed files with 79 additions and 18 deletions

View File

@ -13,10 +13,13 @@ package datanode
import ( import (
"context" "context"
"fmt"
"github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/msgstream" "github.com/milvus-io/milvus/internal/msgstream"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/flowgraph" "github.com/milvus-io/milvus/internal/util/flowgraph"
"go.uber.org/zap" "go.uber.org/zap"
@ -30,6 +33,7 @@ type dataSyncService struct {
idAllocator allocatorInterface idAllocator allocatorInterface
msFactory msgstream.Factory msFactory msgstream.Factory
collectionID UniqueID collectionID UniqueID
dataService types.DataService
} }
func newDataSyncService(ctx context.Context, func newDataSyncService(ctx context.Context,
@ -83,6 +87,43 @@ func (dsService *dataSyncService) initNodes(vchanPair *datapb.VchannelInfo) {
panic(err) panic(err)
} }
saveBinlog := func(fu *autoFlushUnit) error {
id2path := []*datapb.ID2PathList{}
checkPoints := []*datapb.CheckPoint{}
for k, v := range fu.field2Path {
id2path = append(id2path, &datapb.ID2PathList{ID: k, Paths: []string{v}})
}
for k, v := range fu.openSegCheckpoints {
v := v
checkPoints = append(checkPoints, &datapb.CheckPoint{
SegmentID: k,
NumOfRows: fu.numRows[k],
Position: &v,
})
}
req := &datapb.SaveBinlogPathsRequest{
Base: &commonpb.MsgBase{
MsgType: 0, //TOD msg type
MsgID: 0, //TODO,msg id
Timestamp: 0, //TODO, time stamp
SourceID: Params.NodeID,
},
SegmentID: fu.segID,
CollectionID: 0, //TODO
Field2BinlogPaths: id2path,
CheckPoints: checkPoints,
Flushed: fu.flushed,
}
rsp, err := dsService.dataService.SaveBinlogPaths(dsService.ctx, req)
if err != nil {
return fmt.Errorf("data service save bin log path failed, err = %w", err)
}
if rsp.ErrorCode != commonpb.ErrorCode_Success {
return fmt.Errorf("data service save bin log path failed, reason = %s", rsp.Reason)
}
return nil
}
var dmStreamNode Node = newDmInputNode(dsService.ctx, dsService.msFactory, vchanPair.GetChannelName(), vchanPair.GetCheckPoints()) var dmStreamNode Node = newDmInputNode(dsService.ctx, dsService.msFactory, vchanPair.GetChannelName(), vchanPair.GetCheckPoints())
var ddNode Node = newDDNode() var ddNode Node = newDDNode()
var insertBufferNode Node = newInsertBufferNode( var insertBufferNode Node = newInsertBufferNode(
@ -91,7 +132,7 @@ func (dsService *dataSyncService) initNodes(vchanPair *datapb.VchannelInfo) {
dsService.msFactory, dsService.msFactory,
dsService.idAllocator, dsService.idAllocator,
dsService.flushChan, dsService.flushChan,
nil, //TODO,=================== call data service save binlog ========= saveBinlog,
) )
dsService.fg.AddNode(dmStreamNode) dsService.fg.AddNode(dmStreamNode)

View File

@ -64,10 +64,11 @@ type insertBufferNode struct {
} }
type autoFlushUnit struct { type autoFlushUnit struct {
collID UniqueID
segID UniqueID segID UniqueID
numRows int64
field2Path map[UniqueID]string field2Path map[UniqueID]string
openSegCheckpoints map[UniqueID]internalpb.MsgPosition openSegCheckpoints map[UniqueID]internalpb.MsgPosition
numRows map[UniqueID]int64
flushed bool flushed bool
} }
@ -503,15 +504,23 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
log.Debug("segment is empty") log.Debug("segment is empty")
continue continue
} }
segSta, err := ibNode.replica.getSegmentStatisticsUpdates(fu.segID)
if err != nil {
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
continue
}
fu.numRows = segSta.NumRows
fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint() fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint()
fu.numRows = make(map[UniqueID]int64)
for k := range fu.openSegCheckpoints {
segStat, err := ibNode.replica.getSegmentStatisticsUpdates(k)
if err != nil {
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
fu.numRows = nil
break
}
fu.numRows[k] = segStat.NumRows
}
if fu.numRows == nil {
log.Debug("failed on get segment num rows")
break
}
fu.flushed = false fu.flushed = false
if ibNode.dsSaveBinlog(&fu) != nil { if err := ibNode.dsSaveBinlog(&fu); err != nil {
log.Debug("data service save bin log path failed", zap.Error(err)) log.Debug("data service save bin log path failed", zap.Error(err))
} }
} }
@ -523,19 +532,30 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
currentSegID := fmsg.segmentID currentSegID := fmsg.segmentID
log.Debug(". Receiving flush message", zap.Int64("segmentID", currentSegID)) log.Debug(". Receiving flush message", zap.Int64("segmentID", currentSegID))
segSta, err := ibNode.replica.getSegmentStatisticsUpdates(currentSegID) checkPoints := ibNode.replica.listOpenSegmentCheckPoint()
if err != nil { numRows := make(map[UniqueID]int64)
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err)) for k := range checkPoints {
segStat, err := ibNode.replica.getSegmentStatisticsUpdates(k)
if err != nil {
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
numRows = nil
break
}
numRows[k] = segStat.NumRows
}
if numRows == nil {
log.Debug("failed on get segment num rows")
break break
} }
if ibNode.insertBuffer.size(currentSegID) <= 0 { if ibNode.insertBuffer.size(currentSegID) <= 0 {
log.Debug(".. Buffer empty ...") log.Debug(".. Buffer empty ...")
ibNode.dsSaveBinlog(&autoFlushUnit{ ibNode.dsSaveBinlog(&autoFlushUnit{
collID: fmsg.collectionID,
segID: currentSegID, segID: currentSegID,
numRows: segSta.NumRows, numRows: numRows,
field2Path: nil, field2Path: map[UniqueID]string{},
openSegCheckpoints: ibNode.replica.listOpenSegmentCheckPoint(), openSegCheckpoints: checkPoints,
flushed: true, flushed: true,
}) })
fmsg.dmlFlushedCh <- []*datapb.ID2PathList{{ID: currentSegID, Paths: []string{}}} fmsg.dmlFlushedCh <- []*datapb.ID2PathList{{ID: currentSegID, Paths: []string{}}}
@ -581,8 +601,8 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
fu := <-finishCh fu := <-finishCh
close(finishCh) close(finishCh)
if fu.field2Path != nil { if fu.field2Path != nil {
fu.numRows = segSta.NumRows fu.numRows = numRows
fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint() fu.openSegCheckpoints = checkPoints
fu.flushed = true fu.flushed = true
if ibNode.dsSaveBinlog(&fu) != nil { if ibNode.dsSaveBinlog(&fu) != nil {
log.Debug("data service save bin log path failed", zap.Error(err)) log.Debug("data service save bin log path failed", zap.Error(err))
@ -703,7 +723,7 @@ func flushSegment(collMeta *etcdpb.CollectionMeta, segID, partitionID, collID Un
} }
replica.setSegmentCheckPoint(segID) replica.setSegmentCheckPoint(segID)
flushUnit <- autoFlushUnit{segID: segID, field2Path: field2Path} flushUnit <- autoFlushUnit{collID: collID, segID: segID, field2Path: field2Path}
clearFn(true) clearFn(true)
} }