mirror of https://github.com/milvus-io/milvus.git
data node, save binlog to minIO, and let data service save these meta (#5618)
Signed-off-by: yefu.chen <yefu.chen@zilliz.com>pull/5779/head
parent
91ef35bad4
commit
3c70675313
|
@ -13,10 +13,13 @@ package datanode
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
"github.com/milvus-io/milvus/internal/msgstream"
|
"github.com/milvus-io/milvus/internal/msgstream"
|
||||||
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
|
"github.com/milvus-io/milvus/internal/types"
|
||||||
"github.com/milvus-io/milvus/internal/util/flowgraph"
|
"github.com/milvus-io/milvus/internal/util/flowgraph"
|
||||||
|
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
@ -30,6 +33,7 @@ type dataSyncService struct {
|
||||||
idAllocator allocatorInterface
|
idAllocator allocatorInterface
|
||||||
msFactory msgstream.Factory
|
msFactory msgstream.Factory
|
||||||
collectionID UniqueID
|
collectionID UniqueID
|
||||||
|
dataService types.DataService
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDataSyncService(ctx context.Context,
|
func newDataSyncService(ctx context.Context,
|
||||||
|
@ -83,6 +87,43 @@ func (dsService *dataSyncService) initNodes(vchanPair *datapb.VchannelInfo) {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
saveBinlog := func(fu *autoFlushUnit) error {
|
||||||
|
id2path := []*datapb.ID2PathList{}
|
||||||
|
checkPoints := []*datapb.CheckPoint{}
|
||||||
|
for k, v := range fu.field2Path {
|
||||||
|
id2path = append(id2path, &datapb.ID2PathList{ID: k, Paths: []string{v}})
|
||||||
|
}
|
||||||
|
for k, v := range fu.openSegCheckpoints {
|
||||||
|
v := v
|
||||||
|
checkPoints = append(checkPoints, &datapb.CheckPoint{
|
||||||
|
SegmentID: k,
|
||||||
|
NumOfRows: fu.numRows[k],
|
||||||
|
Position: &v,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
req := &datapb.SaveBinlogPathsRequest{
|
||||||
|
Base: &commonpb.MsgBase{
|
||||||
|
MsgType: 0, //TOD msg type
|
||||||
|
MsgID: 0, //TODO,msg id
|
||||||
|
Timestamp: 0, //TODO, time stamp
|
||||||
|
SourceID: Params.NodeID,
|
||||||
|
},
|
||||||
|
SegmentID: fu.segID,
|
||||||
|
CollectionID: 0, //TODO
|
||||||
|
Field2BinlogPaths: id2path,
|
||||||
|
CheckPoints: checkPoints,
|
||||||
|
Flushed: fu.flushed,
|
||||||
|
}
|
||||||
|
rsp, err := dsService.dataService.SaveBinlogPaths(dsService.ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("data service save bin log path failed, err = %w", err)
|
||||||
|
}
|
||||||
|
if rsp.ErrorCode != commonpb.ErrorCode_Success {
|
||||||
|
return fmt.Errorf("data service save bin log path failed, reason = %s", rsp.Reason)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
var dmStreamNode Node = newDmInputNode(dsService.ctx, dsService.msFactory, vchanPair.GetChannelName(), vchanPair.GetCheckPoints())
|
var dmStreamNode Node = newDmInputNode(dsService.ctx, dsService.msFactory, vchanPair.GetChannelName(), vchanPair.GetCheckPoints())
|
||||||
var ddNode Node = newDDNode()
|
var ddNode Node = newDDNode()
|
||||||
var insertBufferNode Node = newInsertBufferNode(
|
var insertBufferNode Node = newInsertBufferNode(
|
||||||
|
@ -91,7 +132,7 @@ func (dsService *dataSyncService) initNodes(vchanPair *datapb.VchannelInfo) {
|
||||||
dsService.msFactory,
|
dsService.msFactory,
|
||||||
dsService.idAllocator,
|
dsService.idAllocator,
|
||||||
dsService.flushChan,
|
dsService.flushChan,
|
||||||
nil, //TODO,=================== call data service save binlog =========
|
saveBinlog,
|
||||||
)
|
)
|
||||||
|
|
||||||
dsService.fg.AddNode(dmStreamNode)
|
dsService.fg.AddNode(dmStreamNode)
|
||||||
|
|
|
@ -64,10 +64,11 @@ type insertBufferNode struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type autoFlushUnit struct {
|
type autoFlushUnit struct {
|
||||||
|
collID UniqueID
|
||||||
segID UniqueID
|
segID UniqueID
|
||||||
numRows int64
|
|
||||||
field2Path map[UniqueID]string
|
field2Path map[UniqueID]string
|
||||||
openSegCheckpoints map[UniqueID]internalpb.MsgPosition
|
openSegCheckpoints map[UniqueID]internalpb.MsgPosition
|
||||||
|
numRows map[UniqueID]int64
|
||||||
flushed bool
|
flushed bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -503,15 +504,23 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
|
||||||
log.Debug("segment is empty")
|
log.Debug("segment is empty")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
segSta, err := ibNode.replica.getSegmentStatisticsUpdates(fu.segID)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fu.numRows = segSta.NumRows
|
|
||||||
fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint()
|
fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint()
|
||||||
|
fu.numRows = make(map[UniqueID]int64)
|
||||||
|
for k := range fu.openSegCheckpoints {
|
||||||
|
segStat, err := ibNode.replica.getSegmentStatisticsUpdates(k)
|
||||||
|
if err != nil {
|
||||||
|
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
|
||||||
|
fu.numRows = nil
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fu.numRows[k] = segStat.NumRows
|
||||||
|
}
|
||||||
|
if fu.numRows == nil {
|
||||||
|
log.Debug("failed on get segment num rows")
|
||||||
|
break
|
||||||
|
}
|
||||||
fu.flushed = false
|
fu.flushed = false
|
||||||
if ibNode.dsSaveBinlog(&fu) != nil {
|
if err := ibNode.dsSaveBinlog(&fu); err != nil {
|
||||||
log.Debug("data service save bin log path failed", zap.Error(err))
|
log.Debug("data service save bin log path failed", zap.Error(err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -523,19 +532,30 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
|
||||||
currentSegID := fmsg.segmentID
|
currentSegID := fmsg.segmentID
|
||||||
log.Debug(". Receiving flush message", zap.Int64("segmentID", currentSegID))
|
log.Debug(". Receiving flush message", zap.Int64("segmentID", currentSegID))
|
||||||
|
|
||||||
segSta, err := ibNode.replica.getSegmentStatisticsUpdates(currentSegID)
|
checkPoints := ibNode.replica.listOpenSegmentCheckPoint()
|
||||||
if err != nil {
|
numRows := make(map[UniqueID]int64)
|
||||||
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
|
for k := range checkPoints {
|
||||||
|
segStat, err := ibNode.replica.getSegmentStatisticsUpdates(k)
|
||||||
|
if err != nil {
|
||||||
|
log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
|
||||||
|
numRows = nil
|
||||||
|
break
|
||||||
|
}
|
||||||
|
numRows[k] = segStat.NumRows
|
||||||
|
}
|
||||||
|
if numRows == nil {
|
||||||
|
log.Debug("failed on get segment num rows")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if ibNode.insertBuffer.size(currentSegID) <= 0 {
|
if ibNode.insertBuffer.size(currentSegID) <= 0 {
|
||||||
log.Debug(".. Buffer empty ...")
|
log.Debug(".. Buffer empty ...")
|
||||||
ibNode.dsSaveBinlog(&autoFlushUnit{
|
ibNode.dsSaveBinlog(&autoFlushUnit{
|
||||||
|
collID: fmsg.collectionID,
|
||||||
segID: currentSegID,
|
segID: currentSegID,
|
||||||
numRows: segSta.NumRows,
|
numRows: numRows,
|
||||||
field2Path: nil,
|
field2Path: map[UniqueID]string{},
|
||||||
openSegCheckpoints: ibNode.replica.listOpenSegmentCheckPoint(),
|
openSegCheckpoints: checkPoints,
|
||||||
flushed: true,
|
flushed: true,
|
||||||
})
|
})
|
||||||
fmsg.dmlFlushedCh <- []*datapb.ID2PathList{{ID: currentSegID, Paths: []string{}}}
|
fmsg.dmlFlushedCh <- []*datapb.ID2PathList{{ID: currentSegID, Paths: []string{}}}
|
||||||
|
@ -581,8 +601,8 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
|
||||||
fu := <-finishCh
|
fu := <-finishCh
|
||||||
close(finishCh)
|
close(finishCh)
|
||||||
if fu.field2Path != nil {
|
if fu.field2Path != nil {
|
||||||
fu.numRows = segSta.NumRows
|
fu.numRows = numRows
|
||||||
fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint()
|
fu.openSegCheckpoints = checkPoints
|
||||||
fu.flushed = true
|
fu.flushed = true
|
||||||
if ibNode.dsSaveBinlog(&fu) != nil {
|
if ibNode.dsSaveBinlog(&fu) != nil {
|
||||||
log.Debug("data service save bin log path failed", zap.Error(err))
|
log.Debug("data service save bin log path failed", zap.Error(err))
|
||||||
|
@ -703,7 +723,7 @@ func flushSegment(collMeta *etcdpb.CollectionMeta, segID, partitionID, collID Un
|
||||||
}
|
}
|
||||||
|
|
||||||
replica.setSegmentCheckPoint(segID)
|
replica.setSegmentCheckPoint(segID)
|
||||||
flushUnit <- autoFlushUnit{segID: segID, field2Path: field2Path}
|
flushUnit <- autoFlushUnit{collID: collID, segID: segID, field2Path: field2Path}
|
||||||
clearFn(true)
|
clearFn(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue