mirror of https://github.com/milvus-io/milvus.git
132 lines
4.0 KiB
Go
132 lines
4.0 KiB
Go
package writebuffer
|
|
|
|
import (
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
|
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
)
|
|
|
|
type bfWriteBuffer struct {
|
|
*writeBufferBase
|
|
|
|
syncMgr syncmgr.SyncManager
|
|
metacache metacache.MetaCache
|
|
}
|
|
|
|
func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
|
base, err := newWriteBufferBase(channel, metacache, syncMgr, option)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &bfWriteBuffer{
|
|
writeBufferBase: base,
|
|
syncMgr: syncMgr,
|
|
}, nil
|
|
}
|
|
|
|
func (wb *bfWriteBuffer) dispatchDeleteMsgs(groups []*inData, deleteMsgs []*msgstream.DeleteMsg, startPos, endPos *msgpb.MsgPosition) {
|
|
batchSize := paramtable.Get().CommonCfg.BloomFilterApplyBatchSize.GetAsInt()
|
|
|
|
split := func(pks []storage.PrimaryKey, pkTss []uint64, segments []*metacache.SegmentInfo) {
|
|
lc := storage.NewBatchLocationsCache(pks)
|
|
for _, segment := range segments {
|
|
hits := segment.GetBloomFilterSet().BatchPkExist(lc)
|
|
var deletePks []storage.PrimaryKey
|
|
var deleteTss []typeutil.Timestamp
|
|
for i, hit := range hits {
|
|
if hit {
|
|
deletePks = append(deletePks, pks[i])
|
|
deleteTss = append(deleteTss, pkTss[i])
|
|
}
|
|
}
|
|
|
|
if len(deletePks) > 0 {
|
|
wb.bufferDelete(segment.SegmentID(), deletePks, deleteTss, startPos, endPos)
|
|
}
|
|
}
|
|
}
|
|
|
|
// distribute delete msg for previous data
|
|
for _, delMsg := range deleteMsgs {
|
|
pks := storage.ParseIDs2PrimaryKeys(delMsg.GetPrimaryKeys())
|
|
pkTss := delMsg.GetTimestamps()
|
|
segments := wb.metaCache.GetSegmentsBy(metacache.WithPartitionID(delMsg.PartitionID),
|
|
metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing, commonpb.SegmentState_Flushed))
|
|
|
|
for idx := 0; idx < len(pks); idx += batchSize {
|
|
endIdx := idx + batchSize
|
|
if endIdx > len(pks) {
|
|
endIdx = len(pks)
|
|
}
|
|
split(pks[idx:endIdx], pkTss[idx:endIdx], segments)
|
|
}
|
|
|
|
for _, inData := range groups {
|
|
if delMsg.GetPartitionID() == common.AllPartitionsID || delMsg.GetPartitionID() == inData.partitionID {
|
|
var deletePks []storage.PrimaryKey
|
|
var deleteTss []typeutil.Timestamp
|
|
for idx, pk := range pks {
|
|
ts := delMsg.GetTimestamps()[idx]
|
|
if inData.pkExists(pk, ts) {
|
|
deletePks = append(deletePks, pk)
|
|
deleteTss = append(deleteTss, delMsg.GetTimestamps()[idx])
|
|
}
|
|
}
|
|
if len(deletePks) > 0 {
|
|
wb.bufferDelete(inData.segmentID, deletePks, deleteTss, startPos, endPos)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (wb *bfWriteBuffer) BufferData(insertMsgs []*msgstream.InsertMsg, deleteMsgs []*msgstream.DeleteMsg, startPos, endPos *msgpb.MsgPosition) error {
|
|
wb.mut.Lock()
|
|
defer wb.mut.Unlock()
|
|
|
|
groups, err := wb.prepareInsert(insertMsgs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// buffer insert data and add segment if not exists
|
|
for _, inData := range groups {
|
|
err := wb.bufferInsert(inData, startPos, endPos)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// distribute delete msg
|
|
// bf write buffer check bloom filter of segment and current insert batch to decide which segment to write delete data
|
|
wb.dispatchDeleteMsgs(groups, deleteMsgs, startPos, endPos)
|
|
|
|
// update pk oracle
|
|
for _, inData := range groups {
|
|
// segment shall always exists after buffer insert
|
|
segments := wb.metaCache.GetSegmentsBy(
|
|
metacache.WithSegmentIDs(inData.segmentID))
|
|
for _, segment := range segments {
|
|
for _, fieldData := range inData.pkField {
|
|
err := segment.GetBloomFilterSet().UpdatePKRange(fieldData)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// update buffer last checkpoint
|
|
wb.checkpoint = endPos
|
|
|
|
_ = wb.triggerSync()
|
|
|
|
return nil
|
|
}
|