2021-10-15 10:07:09 +00:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
2021-04-19 07:16:33 +00:00
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
2021-10-15 10:07:09 +00:00
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2021-04-19 07:16:33 +00:00
|
|
|
//
|
2021-10-15 10:07:09 +00:00
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2021-04-19 07:16:33 +00:00
|
|
|
|
2021-01-19 03:37:16 +00:00
|
|
|
package datanode
|
|
|
|
|
|
|
|
import (
|
2021-06-07 05:58:37 +00:00
|
|
|
"sync"
|
|
|
|
|
2021-02-26 02:13:36 +00:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2021-04-22 06:45:57 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
|
|
"github.com/milvus-io/milvus/internal/msgstream"
|
2021-06-02 07:58:33 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
2021-06-07 05:58:37 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
2021-06-02 07:58:33 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
2021-10-07 11:54:56 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/flowgraph"
|
2021-06-30 08:18:13 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/trace"
|
|
|
|
"github.com/opentracing/opentracing-go"
|
2021-01-19 03:37:16 +00:00
|
|
|
)
|
|
|
|
|
2021-10-07 11:54:56 +00:00
|
|
|
// make sure ddNode implements flowgraph.Node
|
|
|
|
var _ flowgraph.Node = (*ddNode)(nil)
|
|
|
|
|
2021-10-06 14:51:58 +00:00
|
|
|
// ddNode filter messages from message streams.
|
|
|
|
//
|
|
|
|
// ddNode recives all the messages from message stream dml channels, including insert messages,
|
|
|
|
// delete messages and ddl messages like CreateCollectionMsg.
|
|
|
|
//
|
|
|
|
// ddNode filters insert messages according to the `flushedSegment` and `FilterThreshold`.
|
|
|
|
// If the timestamp of the insert message is earlier than `FilterThreshold`, ddNode will
|
|
|
|
// filter out the insert message for those who belong to `flushedSegment`
|
|
|
|
//
|
|
|
|
// When receiving a `DropCollection` message, ddNode will send a signal to DataNode `BackgroundGC`
|
|
|
|
// goroutinue, telling DataNode to release the resources of this perticular flow graph.
|
|
|
|
//
|
|
|
|
// After the filtering process, ddNode passes all the valid insert messages and delete message
|
|
|
|
// to the following flow graph node, which in DataNode is `insertBufferNode`
|
2021-01-19 03:37:16 +00:00
|
|
|
type ddNode struct {
|
|
|
|
BaseNode
|
2021-06-07 03:25:37 +00:00
|
|
|
|
|
|
|
clearSignal chan<- UniqueID
|
|
|
|
collectionID UniqueID
|
2021-06-07 05:58:37 +00:00
|
|
|
|
2021-09-07 07:41:59 +00:00
|
|
|
segID2SegInfo sync.Map // segment ID to *SegmentInfo
|
2021-10-08 11:09:12 +00:00
|
|
|
flushedSegments []*datapb.SegmentInfo
|
2021-01-19 03:37:16 +00:00
|
|
|
}
|
|
|
|
|
2021-10-07 11:54:56 +00:00
|
|
|
// Name returns node name, implementing flowgraph.Node
|
2021-06-02 07:58:33 +00:00
|
|
|
func (ddn *ddNode) Name() string {
|
2021-01-19 03:37:16 +00:00
|
|
|
return "ddNode"
|
|
|
|
}
|
|
|
|
|
2021-10-07 11:54:56 +00:00
|
|
|
// Operate handles input messages, implementing flowgrpah.Node
|
2021-09-07 07:41:59 +00:00
|
|
|
func (ddn *ddNode) Operate(in []Msg) []Msg {
|
2021-06-08 11:25:37 +00:00
|
|
|
// log.Debug("DDNode Operating")
|
|
|
|
|
2021-01-19 03:37:16 +00:00
|
|
|
if len(in) != 1 {
|
2021-09-07 07:41:59 +00:00
|
|
|
log.Warn("Invalid operate message input in ddNode", zap.Int("input length", len(in)))
|
|
|
|
return []Msg{}
|
2021-05-31 07:28:30 +00:00
|
|
|
}
|
|
|
|
|
2021-02-25 09:35:36 +00:00
|
|
|
msMsg, ok := in[0].(*MsgStreamMsg)
|
2021-01-19 03:37:16 +00:00
|
|
|
if !ok {
|
2021-09-07 07:41:59 +00:00
|
|
|
log.Warn("Type assertion failed for MsgStreamMsg")
|
|
|
|
return []Msg{}
|
2021-01-19 03:37:16 +00:00
|
|
|
}
|
|
|
|
|
2021-06-30 08:18:13 +00:00
|
|
|
var spans []opentracing.Span
|
|
|
|
for _, msg := range msMsg.TsMessages() {
|
|
|
|
sp, ctx := trace.StartSpanFromContext(msg.TraceCtx())
|
|
|
|
spans = append(spans, sp)
|
|
|
|
msg.SetTraceCtx(ctx)
|
|
|
|
}
|
|
|
|
|
2021-09-26 02:43:57 +00:00
|
|
|
var fgMsg = flowGraphMsg{
|
2021-06-02 07:58:33 +00:00
|
|
|
insertMessages: make([]*msgstream.InsertMsg, 0),
|
2021-01-19 03:37:16 +00:00
|
|
|
timeRange: TimeRange{
|
|
|
|
timestampMin: msMsg.TimestampMin(),
|
|
|
|
timestampMax: msMsg.TimestampMax(),
|
|
|
|
},
|
2021-06-02 07:58:33 +00:00
|
|
|
startPositions: make([]*internalpb.MsgPosition, 0),
|
|
|
|
endPositions: make([]*internalpb.MsgPosition, 0),
|
2021-01-19 03:37:16 +00:00
|
|
|
}
|
|
|
|
|
2021-06-02 07:58:33 +00:00
|
|
|
for _, msg := range msMsg.TsMessages() {
|
|
|
|
switch msg.Type() {
|
|
|
|
case commonpb.MsgType_DropCollection:
|
2021-06-07 03:25:37 +00:00
|
|
|
if msg.(*msgstream.DropCollectionMsg).GetCollectionID() == ddn.collectionID {
|
2021-06-09 09:31:48 +00:00
|
|
|
log.Info("Destroying current flowgraph", zap.Any("collectionID", ddn.collectionID))
|
2021-06-07 03:25:37 +00:00
|
|
|
ddn.clearSignal <- ddn.collectionID
|
2021-07-15 02:05:55 +00:00
|
|
|
return []Msg{}
|
2021-06-07 03:25:37 +00:00
|
|
|
}
|
2021-06-02 07:58:33 +00:00
|
|
|
case commonpb.MsgType_Insert:
|
2021-10-11 08:31:44 +00:00
|
|
|
log.Debug("DDNode receive insert messages")
|
2021-08-31 10:35:58 +00:00
|
|
|
imsg := msg.(*msgstream.InsertMsg)
|
2021-09-07 07:41:59 +00:00
|
|
|
if imsg.CollectionID != ddn.collectionID {
|
|
|
|
//log.Debug("filter invalid InsertMsg, collection mis-match",
|
|
|
|
// zap.Int64("Get msg collID", imsg.CollectionID),
|
|
|
|
// zap.Int64("Expected collID", ddn.collectionID))
|
|
|
|
continue
|
|
|
|
}
|
2021-06-07 05:58:37 +00:00
|
|
|
if msg.EndTs() < FilterThreshold {
|
2021-06-08 11:25:37 +00:00
|
|
|
log.Info("Filtering Insert Messages",
|
|
|
|
zap.Uint64("Message endts", msg.EndTs()),
|
|
|
|
zap.Uint64("FilterThreshold", FilterThreshold),
|
|
|
|
)
|
2021-09-18 01:13:50 +00:00
|
|
|
if ddn.filterFlushedSegmentInsertMessages(imsg) {
|
2021-06-11 01:24:52 +00:00
|
|
|
continue
|
2021-06-07 05:58:37 +00:00
|
|
|
}
|
2021-05-31 07:28:30 +00:00
|
|
|
}
|
2021-09-26 02:43:57 +00:00
|
|
|
fgMsg.insertMessages = append(fgMsg.insertMessages, imsg)
|
2021-10-11 08:31:44 +00:00
|
|
|
case commonpb.MsgType_Delete:
|
|
|
|
log.Debug("DDNode receive delete messages")
|
|
|
|
dmsg := msg.(*msgstream.DeleteMsg)
|
|
|
|
if dmsg.CollectionID != ddn.collectionID {
|
|
|
|
//log.Debug("filter invalid DeleteMsg, collection mis-match",
|
|
|
|
// zap.Int64("Get msg collID", dmsg.CollectionID),
|
|
|
|
// zap.Int64("Expected collID", ddn.collectionID))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
fgMsg.deleteMessages = append(fgMsg.deleteMessages, dmsg)
|
2021-01-19 03:37:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-26 02:43:57 +00:00
|
|
|
fgMsg.startPositions = append(fgMsg.startPositions, msMsg.StartPositions()...)
|
|
|
|
fgMsg.endPositions = append(fgMsg.endPositions, msMsg.EndPositions()...)
|
2021-01-22 11:36:09 +00:00
|
|
|
|
2021-06-30 08:18:13 +00:00
|
|
|
for _, sp := range spans {
|
|
|
|
sp.Finish()
|
|
|
|
}
|
|
|
|
|
2021-10-11 08:31:44 +00:00
|
|
|
return []Msg{&fgMsg}
|
2021-01-19 03:37:16 +00:00
|
|
|
}
|
|
|
|
|
2021-06-11 01:24:52 +00:00
|
|
|
func (ddn *ddNode) filterFlushedSegmentInsertMessages(msg *msgstream.InsertMsg) bool {
|
2021-06-07 05:58:37 +00:00
|
|
|
if ddn.isFlushed(msg.GetSegmentID()) {
|
2021-06-11 01:24:52 +00:00
|
|
|
return true
|
2021-06-07 05:58:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-07 07:41:59 +00:00
|
|
|
if si, ok := ddn.segID2SegInfo.Load(msg.GetSegmentID()); ok {
|
|
|
|
if msg.EndTs() <= si.(*datapb.SegmentInfo).GetDmlPosition().GetTimestamp() {
|
2021-06-11 01:24:52 +00:00
|
|
|
return true
|
2021-06-07 05:58:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-07 07:41:59 +00:00
|
|
|
ddn.segID2SegInfo.Delete(msg.GetSegmentID())
|
|
|
|
}
|
2021-06-11 01:24:52 +00:00
|
|
|
return false
|
2021-05-19 11:42:07 +00:00
|
|
|
}
|
|
|
|
|
2021-06-07 05:58:37 +00:00
|
|
|
func (ddn *ddNode) isFlushed(segmentID UniqueID) bool {
|
2021-10-08 11:09:12 +00:00
|
|
|
for _, s := range ddn.flushedSegments {
|
|
|
|
if s.ID == segmentID {
|
2021-06-07 05:58:37 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func newDDNode(clearSignal chan<- UniqueID, collID UniqueID, vchanInfo *datapb.VchannelInfo) *ddNode {
|
2021-01-19 03:37:16 +00:00
|
|
|
baseNode := BaseNode{}
|
2021-10-13 03:16:32 +00:00
|
|
|
baseNode.SetMaxQueueLength(Params.FlowGraphMaxQueueLength)
|
|
|
|
baseNode.SetMaxParallelism(Params.FlowGraphMaxParallelism)
|
2021-01-19 03:37:16 +00:00
|
|
|
|
2021-10-08 11:09:12 +00:00
|
|
|
fs := make([]*datapb.SegmentInfo, 0, len(vchanInfo.GetFlushedSegments()))
|
2021-09-07 07:41:59 +00:00
|
|
|
fs = append(fs, vchanInfo.GetFlushedSegments()...)
|
|
|
|
log.Debug("ddNode add flushed segment",
|
|
|
|
zap.Int64("collectionID", vchanInfo.GetCollectionID()),
|
|
|
|
zap.Int("No. Segment", len(vchanInfo.GetFlushedSegments())),
|
|
|
|
)
|
|
|
|
|
|
|
|
dd := &ddNode{
|
|
|
|
BaseNode: baseNode,
|
|
|
|
clearSignal: clearSignal,
|
|
|
|
collectionID: collID,
|
|
|
|
flushedSegments: fs,
|
2021-06-07 05:58:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-07 07:41:59 +00:00
|
|
|
for _, us := range vchanInfo.GetUnflushedSegments() {
|
|
|
|
dd.segID2SegInfo.Store(us.GetID(), us)
|
2021-01-19 03:37:16 +00:00
|
|
|
}
|
2021-09-07 07:41:59 +00:00
|
|
|
|
|
|
|
log.Debug("ddNode add unflushed segment",
|
|
|
|
zap.Int64("collectionID", collID),
|
|
|
|
zap.Int("No. Segment", len(vchanInfo.GetUnflushedSegments())),
|
|
|
|
)
|
|
|
|
|
|
|
|
return dd
|
2021-01-19 03:37:16 +00:00
|
|
|
}
|