mirror of https://github.com/milvus-io/milvus.git
314 lines
9.1 KiB
Go
314 lines
9.1 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datanode
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"sync"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/internal/common"
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
"github.com/milvus-io/milvus/internal/metrics"
|
|
"github.com/milvus-io/milvus/internal/mq/msgstream"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/milvus-io/milvus/internal/util/trace"
|
|
"github.com/opentracing/opentracing-go"
|
|
)
|
|
|
|
type (
|
|
// DeleteData record deleted IDs and Timestamps
|
|
DeleteData = storage.DeleteData
|
|
)
|
|
|
|
// DeleteNode is to process delete msg, flush delete info into storage.
|
|
type deleteNode struct {
|
|
BaseNode
|
|
channelName string
|
|
delBuf sync.Map // map[segmentID]*DelDataBuf
|
|
replica Replica
|
|
idAllocator allocatorInterface
|
|
flushManager flushManager
|
|
|
|
clearSignal chan<- string
|
|
}
|
|
|
|
// DelDataBuf buffers insert data, monitoring buffer size and limit
|
|
// size and limit both indicate numOfRows
|
|
type DelDataBuf struct {
|
|
datapb.Binlog
|
|
delData *DeleteData
|
|
}
|
|
|
|
func (ddb *DelDataBuf) updateSize(size int64) {
|
|
ddb.EntriesNum += size
|
|
}
|
|
|
|
func (ddb *DelDataBuf) updateTimeRange(tr TimeRange) {
|
|
if tr.timestampMin < ddb.TimestampFrom {
|
|
ddb.TimestampFrom = tr.timestampMin
|
|
}
|
|
if tr.timestampMax > ddb.TimestampTo {
|
|
ddb.TimestampTo = tr.timestampMax
|
|
}
|
|
}
|
|
|
|
func (ddb *DelDataBuf) updateFromBuf(buf *DelDataBuf) {
|
|
ddb.updateSize(buf.EntriesNum)
|
|
|
|
tr := TimeRange{timestampMax: buf.TimestampTo, timestampMin: buf.TimestampFrom}
|
|
ddb.updateTimeRange(tr)
|
|
|
|
ddb.delData.Pks = append(ddb.delData.Pks, buf.delData.Pks...)
|
|
ddb.delData.Tss = append(ddb.delData.Tss, buf.delData.Tss...)
|
|
}
|
|
|
|
func newDelDataBuf() *DelDataBuf {
|
|
return &DelDataBuf{
|
|
delData: &DeleteData{},
|
|
Binlog: datapb.Binlog{
|
|
EntriesNum: 0,
|
|
TimestampFrom: math.MaxUint64,
|
|
TimestampTo: 0,
|
|
},
|
|
}
|
|
}
|
|
|
|
func (dn *deleteNode) Name() string {
|
|
return "deleteNode-" + dn.channelName
|
|
}
|
|
|
|
func (dn *deleteNode) Close() {
|
|
log.Info("Flowgraph Delete Node closing")
|
|
}
|
|
|
|
func (dn *deleteNode) bufferDeleteMsg(msg *msgstream.DeleteMsg, tr TimeRange) error {
|
|
log.Debug("bufferDeleteMsg", zap.Any("primary keys", msg.PrimaryKeys), zap.String("vChannelName", dn.channelName))
|
|
|
|
// Update delBuf for merged segments
|
|
compactedTo2From := dn.replica.listCompactedSegmentIDs()
|
|
for compactedTo, compactedFrom := range compactedTo2From {
|
|
compactToDelBuff := newDelDataBuf()
|
|
for _, segID := range compactedFrom {
|
|
value, loaded := dn.delBuf.LoadAndDelete(segID)
|
|
if loaded {
|
|
compactToDelBuff.updateFromBuf(value.(*DelDataBuf))
|
|
}
|
|
}
|
|
dn.delBuf.Store(compactedTo, compactToDelBuff)
|
|
dn.replica.removeSegments(compactedFrom...)
|
|
log.Debug("update delBuf for merged segments",
|
|
zap.Int64("compactedTo segmentID", compactedTo),
|
|
zap.Int64s("compactedFrom segmentIDs", compactedFrom),
|
|
)
|
|
}
|
|
|
|
segIDToPkMap := make(map[UniqueID][]int64)
|
|
segIDToTsMap := make(map[UniqueID][]uint64)
|
|
|
|
m := dn.filterSegmentByPK(msg.PartitionID, msg.PrimaryKeys)
|
|
for i, pk := range msg.PrimaryKeys {
|
|
segIDs, ok := m[pk]
|
|
if !ok {
|
|
log.Warn("primary key not exist in all segments",
|
|
zap.Int64("primary key", pk),
|
|
zap.String("vChannelName", dn.channelName))
|
|
continue
|
|
}
|
|
for _, segID := range segIDs {
|
|
segIDToPkMap[segID] = append(segIDToPkMap[segID], pk)
|
|
segIDToTsMap[segID] = append(segIDToTsMap[segID], msg.Timestamps[i])
|
|
}
|
|
}
|
|
|
|
for segID, pks := range segIDToPkMap {
|
|
rows := len(pks)
|
|
tss, ok := segIDToTsMap[segID]
|
|
if !ok || rows != len(tss) {
|
|
// TODO: what's the expected behavior after this Error?
|
|
log.Error("primary keys and timestamp's element num mis-match")
|
|
continue
|
|
}
|
|
|
|
var delDataBuf *DelDataBuf
|
|
value, ok := dn.delBuf.Load(segID)
|
|
if ok {
|
|
delDataBuf = value.(*DelDataBuf)
|
|
} else {
|
|
delDataBuf = newDelDataBuf()
|
|
}
|
|
delData := delDataBuf.delData
|
|
|
|
for i := 0; i < rows; i++ {
|
|
delData.Pks = append(delData.Pks, pks[i])
|
|
delData.Tss = append(delData.Tss, tss[i])
|
|
log.Debug("delete",
|
|
zap.Int64("primary key", pks[i]),
|
|
zap.Uint64("ts", tss[i]),
|
|
zap.Int64("segmentID", segID),
|
|
zap.String("vChannelName", dn.channelName))
|
|
}
|
|
|
|
// store
|
|
delDataBuf.updateSize(int64(rows))
|
|
metrics.DataNodeConsumeMsgRowsCount.WithLabelValues(metrics.DeleteLabel, fmt.Sprint(Params.DataNodeCfg.NodeID)).Add(float64(rows))
|
|
delDataBuf.updateTimeRange(tr)
|
|
dn.delBuf.Store(segID, delDataBuf)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (dn *deleteNode) showDelBuf() {
|
|
segments := dn.replica.filterSegments(dn.channelName, common.InvalidPartitionID)
|
|
for _, seg := range segments {
|
|
segID := seg.segmentID
|
|
if v, ok := dn.delBuf.Load(segID); ok {
|
|
delDataBuf, _ := v.(*DelDataBuf)
|
|
log.Debug("delta buffer status",
|
|
zap.Int64("segID", segID),
|
|
zap.Int64("size", delDataBuf.GetEntriesNum()),
|
|
zap.String("vchannel", dn.channelName))
|
|
// TODO control the printed length
|
|
length := len(delDataBuf.delData.Pks)
|
|
for i := 0; i < length; i++ {
|
|
log.Debug("del data",
|
|
zap.Int64("pk", delDataBuf.delData.Pks[i]),
|
|
zap.Uint64("ts", delDataBuf.delData.Tss[i]),
|
|
zap.Int64("segmentID", segID),
|
|
zap.String("vchannel", dn.channelName),
|
|
)
|
|
}
|
|
} else {
|
|
log.Error("segment not exist",
|
|
zap.Int64("segID", segID),
|
|
zap.String("vchannel", dn.channelName))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Operate implementing flowgraph.Node, performs delete data process
|
|
func (dn *deleteNode) Operate(in []Msg) []Msg {
|
|
//log.Debug("deleteNode Operating")
|
|
|
|
if len(in) != 1 {
|
|
log.Error("Invalid operate message input in deleteNode", zap.Int("input length", len(in)))
|
|
return nil
|
|
}
|
|
|
|
fgMsg, ok := in[0].(*flowGraphMsg)
|
|
if !ok {
|
|
log.Warn("type assertion failed for flowGraphMsg")
|
|
return nil
|
|
}
|
|
|
|
var spans []opentracing.Span
|
|
for _, msg := range fgMsg.deleteMessages {
|
|
sp, ctx := trace.StartSpanFromContext(msg.TraceCtx())
|
|
spans = append(spans, sp)
|
|
msg.SetTraceCtx(ctx)
|
|
}
|
|
|
|
for i, msg := range fgMsg.deleteMessages {
|
|
traceID, _, _ := trace.InfoFromSpan(spans[i])
|
|
log.Info("Buffer delete request in DataNode", zap.String("traceID", traceID))
|
|
|
|
if err := dn.bufferDeleteMsg(msg, fgMsg.timeRange); err != nil {
|
|
log.Error("buffer delete msg failed", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// show all data in dn.delBuf
|
|
if len(fgMsg.deleteMessages) != 0 {
|
|
dn.showDelBuf()
|
|
}
|
|
|
|
// handle flush
|
|
if len(fgMsg.segmentsToFlush) > 0 {
|
|
log.Debug("DeleteNode receives flush message",
|
|
zap.Int64s("segIDs", fgMsg.segmentsToFlush),
|
|
zap.String("vChannelName", dn.channelName))
|
|
for _, segmentToFlush := range fgMsg.segmentsToFlush {
|
|
buf, ok := dn.delBuf.Load(segmentToFlush)
|
|
if !ok {
|
|
// no related delta data to flush, send empty buf to complete flush life-cycle
|
|
dn.flushManager.flushDelData(nil, segmentToFlush, fgMsg.endPositions[0])
|
|
} else {
|
|
err := dn.flushManager.flushDelData(buf.(*DelDataBuf), segmentToFlush, fgMsg.endPositions[0])
|
|
if err != nil {
|
|
log.Warn("Failed to flush delete data", zap.Error(err))
|
|
} else {
|
|
// remove delete buf
|
|
dn.delBuf.Delete(segmentToFlush)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// drop collection signal, delete node shall notify flush manager all data are cleared and send signal to DataSyncService cleaner
|
|
if fgMsg.dropCollection {
|
|
dn.flushManager.notifyAllFlushed()
|
|
log.Debug("DeleteNode notifies BackgroundGC to release vchannel", zap.String("vChannelName", dn.channelName))
|
|
dn.clearSignal <- dn.channelName
|
|
}
|
|
|
|
for _, sp := range spans {
|
|
sp.Finish()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// filterSegmentByPK returns the bloom filter check result.
|
|
// If the key may exists in the segment, returns it in map.
|
|
// If the key not exists in the segment, the segment is filter out.
|
|
func (dn *deleteNode) filterSegmentByPK(partID UniqueID, pks []int64) map[int64][]int64 {
|
|
result := make(map[int64][]int64)
|
|
buf := make([]byte, 8)
|
|
segments := dn.replica.filterSegments(dn.channelName, partID)
|
|
for _, pk := range pks {
|
|
for _, segment := range segments {
|
|
common.Endian.PutUint64(buf, uint64(pk))
|
|
exist := segment.pkFilter.Test(buf)
|
|
if exist {
|
|
result[pk] = append(result[pk], segment.segmentID)
|
|
}
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func newDeleteNode(ctx context.Context, fm flushManager, sig chan<- string, config *nodeConfig) (*deleteNode, error) {
|
|
baseNode := BaseNode{}
|
|
baseNode.SetMaxQueueLength(config.maxQueueLength)
|
|
baseNode.SetMaxParallelism(config.maxParallelism)
|
|
|
|
return &deleteNode{
|
|
BaseNode: baseNode,
|
|
delBuf: sync.Map{},
|
|
|
|
replica: config.replica,
|
|
idAllocator: config.allocator,
|
|
channelName: config.vChannelName,
|
|
flushManager: fm,
|
|
clearSignal: sig,
|
|
}, nil
|
|
}
|