mirror of https://github.com/milvus-io/milvus.git
Fix flush didn't respect binaryvector and other schemas (#21120)
Signed-off-by: xiaofan-luan <xiaofan.luan@zilliz.com> Signed-off-by: xiaofan-luan <xiaofan.luan@zilliz.com>pull/21134/head
parent
18ef74007f
commit
e977e014a9
|
@ -271,11 +271,11 @@ func (t *compactionTrigger) estimateDiskSegmentMaxNumOfRows(collectionID UniqueI
|
||||||
return t.estimateDiskSegmentPolicy(collMeta.Schema)
|
return t.estimateDiskSegmentPolicy(collMeta.Schema)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *compactionTrigger) updateSegmentMaxSize(segments []*SegmentInfo) error {
|
func (t *compactionTrigger) updateSegmentMaxSize(segments []*SegmentInfo) (bool, error) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
if len(segments) == 0 {
|
if len(segments) == 0 {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
collectionID := segments[0].GetCollectionID()
|
collectionID := segments[0].GetCollectionID()
|
||||||
|
@ -284,24 +284,26 @@ func (t *compactionTrigger) updateSegmentMaxSize(segments []*SegmentInfo) error
|
||||||
IndexName: "",
|
IndexName: "",
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isDiskIndex := false
|
||||||
for _, indexInfo := range resp.IndexInfos {
|
for _, indexInfo := range resp.IndexInfos {
|
||||||
indexParamsMap := funcutil.KeyValuePair2Map(indexInfo.IndexParams)
|
indexParamsMap := funcutil.KeyValuePair2Map(indexInfo.IndexParams)
|
||||||
if indexType, ok := indexParamsMap["index_type"]; ok {
|
if indexType, ok := indexParamsMap["index_type"]; ok {
|
||||||
if indexType == indexparamcheck.IndexDISKANN {
|
if indexType == indexparamcheck.IndexDISKANN {
|
||||||
diskSegmentMaxRows, err := t.estimateDiskSegmentMaxNumOfRows(collectionID)
|
diskSegmentMaxRows, err := t.estimateDiskSegmentMaxNumOfRows(collectionID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return false, err
|
||||||
}
|
}
|
||||||
for _, segment := range segments {
|
for _, segment := range segments {
|
||||||
segment.MaxRowNum = int64(diskSegmentMaxRows)
|
segment.MaxRowNum = int64(diskSegmentMaxRows)
|
||||||
}
|
}
|
||||||
|
isDiskIndex = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return isDiskIndex, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) {
|
func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) {
|
||||||
|
@ -336,7 +338,7 @@ func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) {
|
||||||
|
|
||||||
group.segments = FilterInIndexedSegments(t.handler, t.indexCoord, group.segments...)
|
group.segments = FilterInIndexedSegments(t.handler, t.indexCoord, group.segments...)
|
||||||
|
|
||||||
err := t.updateSegmentMaxSize(group.segments)
|
isDiskIndex, err := t.updateSegmentMaxSize(group.segments)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("failed to update segment max size,", zap.Error(err))
|
log.Warn("failed to update segment max size,", zap.Error(err))
|
||||||
continue
|
continue
|
||||||
|
@ -351,7 +353,7 @@ func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
plans := t.generatePlans(group.segments, signal.isForce, ct)
|
plans := t.generatePlans(group.segments, signal.isForce, isDiskIndex, ct)
|
||||||
for _, plan := range plans {
|
for _, plan := range plans {
|
||||||
segIDs := fetchSegIDs(plan.GetSegmentBinlogs())
|
segIDs := fetchSegIDs(plan.GetSegmentBinlogs())
|
||||||
|
|
||||||
|
@ -419,7 +421,7 @@ func (t *compactionTrigger) handleSignal(signal *compactionSignal) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err := t.updateSegmentMaxSize(segments)
|
isDiskIndex, err := t.updateSegmentMaxSize(segments)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("failed to update segment max size", zap.Error(err))
|
log.Warn("failed to update segment max size", zap.Error(err))
|
||||||
}
|
}
|
||||||
|
@ -438,7 +440,7 @@ func (t *compactionTrigger) handleSignal(signal *compactionSignal) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
plans := t.generatePlans(segments, signal.isForce, ct)
|
plans := t.generatePlans(segments, signal.isForce, isDiskIndex, ct)
|
||||||
for _, plan := range plans {
|
for _, plan := range plans {
|
||||||
if t.compactionHandler.isFull() {
|
if t.compactionHandler.isFull() {
|
||||||
log.Warn("compaction plan skipped due to handler full", zap.Int64("collection", signal.collectionID), zap.Int64("planID", plan.PlanID))
|
log.Warn("compaction plan skipped due to handler full", zap.Int64("collection", signal.collectionID), zap.Int64("planID", plan.PlanID))
|
||||||
|
@ -467,7 +469,7 @@ func (t *compactionTrigger) handleSignal(signal *compactionSignal) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *compactionTrigger) generatePlans(segments []*SegmentInfo, force bool, compactTime *compactTime) []*datapb.CompactionPlan {
|
func (t *compactionTrigger) generatePlans(segments []*SegmentInfo, force bool, isDiskIndex bool, compactTime *compactTime) []*datapb.CompactionPlan {
|
||||||
// find segments need internal compaction
|
// find segments need internal compaction
|
||||||
// TODO add low priority candidates, for example if the segment is smaller than full 0.9 * max segment size but larger than small segment boundary, we only execute compaction when there are no compaction running actively
|
// TODO add low priority candidates, for example if the segment is smaller than full 0.9 * max segment size but larger than small segment boundary, we only execute compaction when there are no compaction running actively
|
||||||
var prioritizedCandidates []*SegmentInfo
|
var prioritizedCandidates []*SegmentInfo
|
||||||
|
@ -478,7 +480,7 @@ func (t *compactionTrigger) generatePlans(segments []*SegmentInfo, force bool, c
|
||||||
for _, segment := range segments {
|
for _, segment := range segments {
|
||||||
segment := segment.ShadowClone()
|
segment := segment.ShadowClone()
|
||||||
// TODO should we trigger compaction periodically even if the segment has no obvious reason to be compacted?
|
// TODO should we trigger compaction periodically even if the segment has no obvious reason to be compacted?
|
||||||
if force || t.ShouldDoSingleCompaction(segment, compactTime) {
|
if force || t.ShouldDoSingleCompaction(segment, isDiskIndex, compactTime) {
|
||||||
prioritizedCandidates = append(prioritizedCandidates, segment)
|
prioritizedCandidates = append(prioritizedCandidates, segment)
|
||||||
} else if t.isSmallSegment(segment) {
|
} else if t.isSmallSegment(segment) {
|
||||||
smallCandidates = append(smallCandidates, segment)
|
smallCandidates = append(smallCandidates, segment)
|
||||||
|
@ -741,24 +743,43 @@ func (t *compactionTrigger) isStaleSegment(segment *SegmentInfo) bool {
|
||||||
return time.Since(segment.lastFlushTime).Minutes() >= segmentTimedFlushDuration
|
return time.Since(segment.lastFlushTime).Minutes() >= segmentTimedFlushDuration
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *compactionTrigger) ShouldDoSingleCompaction(segment *SegmentInfo, compactTime *compactTime) bool {
|
func (t *compactionTrigger) ShouldDoSingleCompaction(segment *SegmentInfo, isDiskIndex bool, compactTime *compactTime) bool {
|
||||||
// count all the binlog file count
|
// no longer restricted binlog numbers because this is now related to field numbers
|
||||||
var totalLogNum int
|
var binLog int
|
||||||
for _, binlogs := range segment.GetBinlogs() {
|
for _, binlogs := range segment.GetBinlogs() {
|
||||||
totalLogNum += len(binlogs.GetBinlogs())
|
binLog += len(binlogs.GetBinlogs())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// count all the statlog file count, only for flush generated segments
|
||||||
|
if len(segment.CompactionFrom) == 0 {
|
||||||
|
var statsLog int
|
||||||
|
for _, statsLogs := range segment.GetStatslogs() {
|
||||||
|
statsLog += len(statsLogs.GetBinlogs())
|
||||||
|
}
|
||||||
|
|
||||||
|
var maxSize int
|
||||||
|
if isDiskIndex {
|
||||||
|
maxSize = int(Params.DataCoordCfg.DiskSegmentMaxSize.GetAsInt64() * 1024 * 1024 / Params.DataNodeCfg.BinLogMaxSize.GetAsInt64())
|
||||||
|
} else {
|
||||||
|
maxSize = int(Params.DataCoordCfg.SegmentMaxSize.GetAsInt64() * 1024 * 1024 / Params.DataNodeCfg.BinLogMaxSize.GetAsInt64())
|
||||||
|
}
|
||||||
|
|
||||||
|
// if stats log is more than expected, trigger compaction to reduce stats log size.
|
||||||
|
// TODO maybe we want to compact to single statslog to reduce watch dml channel cost
|
||||||
|
// TODO avoid rebuild index twice.
|
||||||
|
if statsLog > maxSize*2.0 {
|
||||||
|
log.Info("stats number is too much, trigger compaction", zap.Int64("segment", segment.ID), zap.Int("Bin logs", binLog), zap.Int("Stat logs", statsLog))
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var deltaLog int
|
||||||
for _, deltaLogs := range segment.GetDeltalogs() {
|
for _, deltaLogs := range segment.GetDeltalogs() {
|
||||||
totalLogNum += len(deltaLogs.GetBinlogs())
|
deltaLog += len(deltaLogs.GetBinlogs())
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, statsLogs := range segment.GetStatslogs() {
|
if deltaLog > Params.DataCoordCfg.SingleCompactionDeltalogMaxNum.GetAsInt() {
|
||||||
totalLogNum += len(statsLogs.GetBinlogs())
|
log.Info("total delta number is too much, trigger compaction", zap.Int64("segment", segment.ID), zap.Int("Bin logs", binLog), zap.Int("Delta logs", deltaLog))
|
||||||
}
|
|
||||||
// avoid segment has too many bin logs and the etcd meta is too large, force trigger compaction
|
|
||||||
if totalLogNum > Params.DataCoordCfg.SingleCompactionBinlogMaxNum.GetAsInt() {
|
|
||||||
log.Info("total binlog number is too much, trigger compaction", zap.Int64("segment", segment.ID),
|
|
||||||
zap.Int("Delta logs", len(segment.GetDeltalogs())), zap.Int("Bin Logs", len(segment.GetBinlogs())), zap.Int("Stat logs", len(segment.GetStatslogs())))
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -825,6 +825,7 @@ func Test_compactionTrigger_noplan(t *testing.T) {
|
||||||
compactTime *compactTime
|
compactTime *compactTime
|
||||||
}
|
}
|
||||||
Params.Init()
|
Params.Init()
|
||||||
|
Params.DataCoordCfg.MinSegmentToMerge.DefaultValue = "4"
|
||||||
vecFieldID := int64(201)
|
vecFieldID := int64(201)
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
@ -1522,9 +1523,9 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
trigger := newCompactionTrigger(&meta{}, &compactionPlanHandler{}, newMockAllocator(),
|
trigger := newCompactionTrigger(&meta{}, &compactionPlanHandler{}, newMockAllocator(),
|
||||||
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, indexCoord, newMockHandler())
|
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, indexCoord, newMockHandler())
|
||||||
|
|
||||||
// Test too many files.
|
// Test too many deltalogs.
|
||||||
var binlogs []*datapb.FieldBinlog
|
var binlogs []*datapb.FieldBinlog
|
||||||
for i := UniqueID(0); i < 5000; i++ {
|
for i := UniqueID(0); i < 1000; i++ {
|
||||||
binlogs = append(binlogs, &datapb.FieldBinlog{
|
binlogs = append(binlogs, &datapb.FieldBinlog{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
|
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
|
||||||
|
@ -1541,13 +1542,46 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
MaxRowNum: 300,
|
MaxRowNum: 300,
|
||||||
InsertChannel: "ch1",
|
InsertChannel: "ch1",
|
||||||
State: commonpb.SegmentState_Flushed,
|
State: commonpb.SegmentState_Flushed,
|
||||||
Binlogs: binlogs,
|
Deltalogs: binlogs,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
couldDo := trigger.ShouldDoSingleCompaction(info, &compactTime{travelTime: 200, expireTime: 0})
|
couldDo := trigger.ShouldDoSingleCompaction(info, false, &compactTime{travelTime: 200, expireTime: 0})
|
||||||
assert.True(t, couldDo)
|
assert.True(t, couldDo)
|
||||||
|
|
||||||
|
//Test too many stats log
|
||||||
|
info = &SegmentInfo{
|
||||||
|
SegmentInfo: &datapb.SegmentInfo{
|
||||||
|
ID: 1,
|
||||||
|
CollectionID: 2,
|
||||||
|
PartitionID: 1,
|
||||||
|
LastExpireTime: 100,
|
||||||
|
NumOfRows: 100,
|
||||||
|
MaxRowNum: 300,
|
||||||
|
InsertChannel: "ch1",
|
||||||
|
State: commonpb.SegmentState_Flushed,
|
||||||
|
Statslogs: binlogs,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
couldDo = trigger.ShouldDoSingleCompaction(info, false, &compactTime{travelTime: 200, expireTime: 0})
|
||||||
|
assert.True(t, couldDo)
|
||||||
|
|
||||||
|
couldDo = trigger.ShouldDoSingleCompaction(info, true, &compactTime{travelTime: 200, expireTime: 0})
|
||||||
|
assert.True(t, couldDo)
|
||||||
|
|
||||||
|
// if only 10 bin logs, then disk index won't trigger compaction
|
||||||
|
info.Statslogs = binlogs[0:20]
|
||||||
|
couldDo = trigger.ShouldDoSingleCompaction(info, false, &compactTime{travelTime: 200, expireTime: 0})
|
||||||
|
assert.True(t, couldDo)
|
||||||
|
|
||||||
|
couldDo = trigger.ShouldDoSingleCompaction(info, true, &compactTime{travelTime: 200, expireTime: 0})
|
||||||
|
assert.False(t, couldDo)
|
||||||
|
//Test too many stats log but compacted
|
||||||
|
info.CompactionFrom = []int64{0, 1}
|
||||||
|
couldDo = trigger.ShouldDoSingleCompaction(info, false, &compactTime{travelTime: 200, expireTime: 0})
|
||||||
|
assert.False(t, couldDo)
|
||||||
|
|
||||||
//Test expire triggered compaction
|
//Test expire triggered compaction
|
||||||
var binlogs2 []*datapb.FieldBinlog
|
var binlogs2 []*datapb.FieldBinlog
|
||||||
for i := UniqueID(0); i < 100; i++ {
|
for i := UniqueID(0); i < 100; i++ {
|
||||||
|
@ -1580,15 +1614,15 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// expire time < Timestamp To
|
// expire time < Timestamp To
|
||||||
couldDo = trigger.ShouldDoSingleCompaction(info2, &compactTime{travelTime: 200, expireTime: 300})
|
couldDo = trigger.ShouldDoSingleCompaction(info2, false, &compactTime{travelTime: 200, expireTime: 300})
|
||||||
assert.False(t, couldDo)
|
assert.False(t, couldDo)
|
||||||
|
|
||||||
// didn't reach single compaction size 10 * 1024 * 1024
|
// didn't reach single compaction size 10 * 1024 * 1024
|
||||||
couldDo = trigger.ShouldDoSingleCompaction(info2, &compactTime{travelTime: 200, expireTime: 600})
|
couldDo = trigger.ShouldDoSingleCompaction(info2, false, &compactTime{travelTime: 200, expireTime: 600})
|
||||||
assert.False(t, couldDo)
|
assert.False(t, couldDo)
|
||||||
|
|
||||||
// expire time < Timestamp False
|
// expire time < Timestamp False
|
||||||
couldDo = trigger.ShouldDoSingleCompaction(info2, &compactTime{travelTime: 200, expireTime: 1200})
|
couldDo = trigger.ShouldDoSingleCompaction(info2, false, &compactTime{travelTime: 200, expireTime: 1200})
|
||||||
assert.True(t, couldDo)
|
assert.True(t, couldDo)
|
||||||
|
|
||||||
// Test Delete triggered compaction
|
// Test Delete triggered compaction
|
||||||
|
@ -1623,11 +1657,11 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// expire time < Timestamp To
|
// expire time < Timestamp To
|
||||||
couldDo = trigger.ShouldDoSingleCompaction(info3, &compactTime{travelTime: 600, expireTime: 0})
|
couldDo = trigger.ShouldDoSingleCompaction(info3, false, &compactTime{travelTime: 600, expireTime: 0})
|
||||||
assert.False(t, couldDo)
|
assert.False(t, couldDo)
|
||||||
|
|
||||||
// deltalog is large enough, should do compaction
|
// deltalog is large enough, should do compaction
|
||||||
couldDo = trigger.ShouldDoSingleCompaction(info3, &compactTime{travelTime: 800, expireTime: 0})
|
couldDo = trigger.ShouldDoSingleCompaction(info3, false, &compactTime{travelTime: 800, expireTime: 0})
|
||||||
assert.True(t, couldDo)
|
assert.True(t, couldDo)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/paramtable"
|
"github.com/milvus-io/milvus/internal/util/paramtable"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DelBufferManager is in charge of managing insertBuf and delBuf from an overall prospect
|
// DelBufferManager is in charge of managing insertBuf and delBuf from an overall prospect
|
||||||
|
@ -332,32 +333,20 @@ func (ddb *DelDataBuf) updateStartAndEndPosition(startPos *internalpb.MsgPositio
|
||||||
// * This need to change for string field support and multi-vector fields support.
|
// * This need to change for string field support and multi-vector fields support.
|
||||||
func newBufferData(collSchema *schemapb.CollectionSchema) (*BufferData, error) {
|
func newBufferData(collSchema *schemapb.CollectionSchema) (*BufferData, error) {
|
||||||
// Get Dimension
|
// Get Dimension
|
||||||
// TODO GOOSE: under assumption that there's only 1 Vector field in one collection schema
|
size, err := typeutil.EstimateSizePerRecord(collSchema)
|
||||||
var vectorSize int
|
if err != nil {
|
||||||
for _, field := range collSchema.Fields {
|
log.Warn("failed to estimate size per record", zap.Error(err))
|
||||||
if field.DataType == schemapb.DataType_FloatVector ||
|
return nil, err
|
||||||
field.DataType == schemapb.DataType_BinaryVector {
|
|
||||||
|
|
||||||
dimension, err := storage.GetDimFromParams(field.TypeParams)
|
|
||||||
switch field.DataType {
|
|
||||||
case schemapb.DataType_FloatVector:
|
|
||||||
vectorSize = dimension * 4
|
|
||||||
case schemapb.DataType_BinaryVector:
|
|
||||||
vectorSize = dimension / 8
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
log.Error("failed to get dim from field", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if vectorSize == 0 {
|
if size == 0 {
|
||||||
return nil, errors.New("Invalid dimension")
|
return nil, errors.New("Invalid schema")
|
||||||
}
|
}
|
||||||
|
|
||||||
limit := Params.DataNodeCfg.FlushInsertBufferSize.GetAsInt64() / int64(vectorSize)
|
limit := Params.DataNodeCfg.FlushInsertBufferSize.GetAsInt64() / int64(size)
|
||||||
|
if Params.DataNodeCfg.FlushInsertBufferSize.GetAsInt64()%int64(size) != 0 {
|
||||||
|
limit++
|
||||||
|
}
|
||||||
|
|
||||||
//TODO::xige-16 eval vec and string field
|
//TODO::xige-16 eval vec and string field
|
||||||
return &BufferData{
|
return &BufferData{
|
||||||
|
|
|
@ -240,12 +240,10 @@ func (t *compactionTask) merge(
|
||||||
mergeStart := time.Now()
|
mergeStart := time.Now()
|
||||||
|
|
||||||
var (
|
var (
|
||||||
dim int // dimension of float/binary vector field
|
|
||||||
maxRowsPerBinlog int // maximum rows populating one binlog
|
maxRowsPerBinlog int // maximum rows populating one binlog
|
||||||
numBinlogs int // binlog number
|
numBinlogs int // binlog number
|
||||||
numRows int64 // the number of rows uploaded
|
numRows int64 // the number of rows uploaded
|
||||||
expired int64 // the number of expired entity
|
expired int64 // the number of expired entity
|
||||||
err error
|
|
||||||
|
|
||||||
// statslog generation
|
// statslog generation
|
||||||
pkID UniqueID
|
pkID UniqueID
|
||||||
|
@ -300,25 +298,25 @@ func (t *compactionTask) merge(
|
||||||
pkID = fs.GetFieldID()
|
pkID = fs.GetFieldID()
|
||||||
pkType = fs.GetDataType()
|
pkType = fs.GetDataType()
|
||||||
}
|
}
|
||||||
if fs.GetDataType() == schemapb.DataType_FloatVector ||
|
}
|
||||||
fs.GetDataType() == schemapb.DataType_BinaryVector {
|
|
||||||
for _, t := range fs.GetTypeParams() {
|
// estimate Rows per binlog
|
||||||
if t.Key == "dim" {
|
// TODO should not convert size to row because we already know the size, this is especially important on varchar types.
|
||||||
if dim, err = strconv.Atoi(t.Value); err != nil {
|
size, err := typeutil.EstimateSizePerRecord(meta.GetSchema())
|
||||||
log.Warn("strconv wrong on get dim", zap.Error(err))
|
if err != nil {
|
||||||
return nil, nil, 0, err
|
log.Warn("failed to estimate size per record", zap.Error(err))
|
||||||
}
|
return nil, nil, 0, err
|
||||||
break
|
}
|
||||||
}
|
|
||||||
}
|
maxRowsPerBinlog = int(Params.DataNodeCfg.BinLogMaxSize.GetAsInt64() / int64(size))
|
||||||
}
|
if Params.DataNodeCfg.BinLogMaxSize.GetAsInt64()%int64(size) != 0 {
|
||||||
|
maxRowsPerBinlog++
|
||||||
}
|
}
|
||||||
|
|
||||||
expired = 0
|
expired = 0
|
||||||
numRows = 0
|
numRows = 0
|
||||||
numBinlogs = 0
|
numBinlogs = 0
|
||||||
currentTs := t.GetCurrentTime()
|
currentTs := t.GetCurrentTime()
|
||||||
maxRowsPerBinlog = int(Params.DataNodeCfg.FlushInsertBufferSize.GetAsInt64() / (int64(dim) * 4))
|
|
||||||
currentRows := 0
|
currentRows := 0
|
||||||
downloadTimeCost := time.Duration(0)
|
downloadTimeCost := time.Duration(0)
|
||||||
uploadInsertTimeCost := time.Duration(0)
|
uploadInsertTimeCost := time.Duration(0)
|
||||||
|
@ -327,14 +325,14 @@ func (t *compactionTask) merge(
|
||||||
downloadStart := time.Now()
|
downloadStart := time.Now()
|
||||||
data, err := t.download(ctxTimeout, path)
|
data, err := t.download(ctxTimeout, path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("download insertlogs wrong")
|
log.Warn("download insertlogs wrong", zap.Error(err))
|
||||||
return nil, nil, 0, err
|
return nil, nil, 0, err
|
||||||
}
|
}
|
||||||
downloadTimeCost += time.Since(downloadStart)
|
downloadTimeCost += time.Since(downloadStart)
|
||||||
|
|
||||||
iter, err := storage.NewInsertBinlogIterator(data, pkID, pkType)
|
iter, err := storage.NewInsertBinlogIterator(data, pkID, pkType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("new insert binlogs Itr wrong")
|
log.Warn("new insert binlogs Itr wrong", zap.Error(err))
|
||||||
return nil, nil, 0, err
|
return nil, nil, 0, err
|
||||||
}
|
}
|
||||||
for iter.HasNext() {
|
for iter.HasNext() {
|
||||||
|
@ -370,11 +368,11 @@ func (t *compactionTask) merge(
|
||||||
}
|
}
|
||||||
|
|
||||||
currentRows++
|
currentRows++
|
||||||
|
if currentRows >= maxRowsPerBinlog {
|
||||||
if currentRows == maxRowsPerBinlog {
|
|
||||||
uploadInsertStart := time.Now()
|
uploadInsertStart := time.Now()
|
||||||
inPaths, statsPaths, err := t.uploadSingleInsertLog(ctxTimeout, targetSegID, partID, meta, fID2Content, fID2Type)
|
inPaths, statsPaths, err := t.uploadSingleInsertLog(ctxTimeout, targetSegID, partID, meta, fID2Content, fID2Type)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Warn("failed to upload single insert log", zap.Error(err))
|
||||||
return nil, nil, 0, err
|
return nil, nil, 0, err
|
||||||
}
|
}
|
||||||
uploadInsertTimeCost += time.Since(uploadInsertStart)
|
uploadInsertTimeCost += time.Since(uploadInsertStart)
|
||||||
|
@ -392,6 +390,7 @@ func (t *compactionTask) merge(
|
||||||
uploadInsertStart := time.Now()
|
uploadInsertStart := time.Now()
|
||||||
inPaths, statsPaths, err := t.uploadSingleInsertLog(ctxTimeout, targetSegID, partID, meta, fID2Content, fID2Type)
|
inPaths, statsPaths, err := t.uploadSingleInsertLog(ctxTimeout, targetSegID, partID, meta, fID2Content, fID2Type)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Warn("failed to upload single insert log", zap.Error(err))
|
||||||
return nil, nil, 0, err
|
return nil, nil, 0, err
|
||||||
}
|
}
|
||||||
uploadInsertTimeCost += time.Since(uploadInsertStart)
|
uploadInsertTimeCost += time.Since(uploadInsertStart)
|
||||||
|
|
|
@ -307,11 +307,11 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
|
||||||
alloc := NewAllocatorFactory(1)
|
alloc := NewAllocatorFactory(1)
|
||||||
mockbIO := &binlogIO{cm, alloc}
|
mockbIO := &binlogIO{cm, alloc}
|
||||||
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0")
|
||||||
flushInsertBufferSize := Params.DataNodeCfg.FlushInsertBufferSize
|
BinLogMaxSize := Params.DataNodeCfg.BinLogMaxSize
|
||||||
defer func() {
|
defer func() {
|
||||||
Params.DataNodeCfg.FlushInsertBufferSize = flushInsertBufferSize
|
Params.DataNodeCfg.BinLogMaxSize = BinLogMaxSize
|
||||||
}()
|
}()
|
||||||
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "128")
|
paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "128")
|
||||||
iData := genInsertDataWithExpiredTS()
|
iData := genInsertDataWithExpiredTS()
|
||||||
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
||||||
|
|
||||||
|
|
|
@ -373,7 +373,7 @@ func TestFlowGraphInsertBufferNode_AutoFlush(t *testing.T) {
|
||||||
t.Run("Pure auto flush", func(t *testing.T) {
|
t.Run("Pure auto flush", func(t *testing.T) {
|
||||||
// iBNode.insertBuffer.maxSize = 2
|
// iBNode.insertBuffer.maxSize = 2
|
||||||
tmp := Params.DataNodeCfg.FlushInsertBufferSize
|
tmp := Params.DataNodeCfg.FlushInsertBufferSize
|
||||||
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "16")
|
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "200")
|
||||||
defer func() {
|
defer func() {
|
||||||
Params.DataNodeCfg.FlushInsertBufferSize = tmp
|
Params.DataNodeCfg.FlushInsertBufferSize = tmp
|
||||||
}()
|
}()
|
||||||
|
@ -465,7 +465,7 @@ func TestFlowGraphInsertBufferNode_AutoFlush(t *testing.T) {
|
||||||
|
|
||||||
t.Run("Auto with manual flush", func(t *testing.T) {
|
t.Run("Auto with manual flush", func(t *testing.T) {
|
||||||
tmp := Params.DataNodeCfg.FlushInsertBufferSize
|
tmp := Params.DataNodeCfg.FlushInsertBufferSize
|
||||||
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "16")
|
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "200")
|
||||||
defer func() {
|
defer func() {
|
||||||
Params.DataNodeCfg.FlushInsertBufferSize = tmp
|
Params.DataNodeCfg.FlushInsertBufferSize = tmp
|
||||||
}()
|
}()
|
||||||
|
@ -607,7 +607,7 @@ func TestRollBF(t *testing.T) {
|
||||||
|
|
||||||
t.Run("Pure roll BF", func(t *testing.T) {
|
t.Run("Pure roll BF", func(t *testing.T) {
|
||||||
tmp := Params.DataNodeCfg.FlushInsertBufferSize
|
tmp := Params.DataNodeCfg.FlushInsertBufferSize
|
||||||
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "16")
|
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "200")
|
||||||
defer func() {
|
defer func() {
|
||||||
Params.DataNodeCfg.FlushInsertBufferSize = tmp
|
Params.DataNodeCfg.FlushInsertBufferSize = tmp
|
||||||
}()
|
}()
|
||||||
|
@ -697,7 +697,7 @@ func (s *InsertBufferNodeSuit) SetupSuite() {
|
||||||
|
|
||||||
s.originalConfig = Params.DataNodeCfg.FlushInsertBufferSize.GetAsInt64()
|
s.originalConfig = Params.DataNodeCfg.FlushInsertBufferSize.GetAsInt64()
|
||||||
// change flushing size to 2
|
// change flushing size to 2
|
||||||
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "16")
|
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, "200")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *InsertBufferNodeSuit) TearDownSuite() {
|
func (s *InsertBufferNodeSuit) TearDownSuite() {
|
||||||
|
|
|
@ -1252,7 +1252,7 @@ type dataCoordConfig struct {
|
||||||
SingleCompactionRatioThreshold ParamItem
|
SingleCompactionRatioThreshold ParamItem
|
||||||
SingleCompactionDeltaLogMaxSize ParamItem
|
SingleCompactionDeltaLogMaxSize ParamItem
|
||||||
SingleCompactionExpiredLogMaxSize ParamItem
|
SingleCompactionExpiredLogMaxSize ParamItem
|
||||||
SingleCompactionBinlogMaxNum ParamItem
|
SingleCompactionDeltalogMaxNum ParamItem
|
||||||
GlobalCompactionInterval ParamItem
|
GlobalCompactionInterval ParamItem
|
||||||
|
|
||||||
// Garbage Collection
|
// Garbage Collection
|
||||||
|
@ -1338,7 +1338,7 @@ func (p *dataCoordConfig) init(base *BaseTable) {
|
||||||
p.MinSegmentToMerge = ParamItem{
|
p.MinSegmentToMerge = ParamItem{
|
||||||
Key: "dataCoord.compaction.min.segment",
|
Key: "dataCoord.compaction.min.segment",
|
||||||
Version: "2.0.0",
|
Version: "2.0.0",
|
||||||
DefaultValue: "4",
|
DefaultValue: "3",
|
||||||
}
|
}
|
||||||
p.MinSegmentToMerge.Init(base.mgr)
|
p.MinSegmentToMerge.Init(base.mgr)
|
||||||
|
|
||||||
|
@ -1405,12 +1405,12 @@ func (p *dataCoordConfig) init(base *BaseTable) {
|
||||||
}
|
}
|
||||||
p.SingleCompactionExpiredLogMaxSize.Init(base.mgr)
|
p.SingleCompactionExpiredLogMaxSize.Init(base.mgr)
|
||||||
|
|
||||||
p.SingleCompactionBinlogMaxNum = ParamItem{
|
p.SingleCompactionDeltalogMaxNum = ParamItem{
|
||||||
Key: "dataCoord.compaction.single.binlog.maxnum",
|
Key: "dataCoord.compaction.single.deltalog.maxnum",
|
||||||
Version: "2.0.0",
|
Version: "2.0.0",
|
||||||
DefaultValue: "1000",
|
DefaultValue: "1000",
|
||||||
}
|
}
|
||||||
p.SingleCompactionBinlogMaxNum.Init(base.mgr)
|
p.SingleCompactionDeltalogMaxNum.Init(base.mgr)
|
||||||
|
|
||||||
p.GlobalCompactionInterval = ParamItem{
|
p.GlobalCompactionInterval = ParamItem{
|
||||||
Key: "dataCoord.compaction.global.interval",
|
Key: "dataCoord.compaction.global.interval",
|
||||||
|
@ -1464,6 +1464,7 @@ type dataNodeConfig struct {
|
||||||
// segment
|
// segment
|
||||||
FlushInsertBufferSize ParamItem
|
FlushInsertBufferSize ParamItem
|
||||||
FlushDeleteBufferBytes ParamItem
|
FlushDeleteBufferBytes ParamItem
|
||||||
|
BinLogMaxSize ParamItem
|
||||||
SyncPeriod ParamItem
|
SyncPeriod ParamItem
|
||||||
|
|
||||||
// io concurrency to fetch stats logs
|
// io concurrency to fetch stats logs
|
||||||
|
@ -1501,6 +1502,13 @@ func (p *dataNodeConfig) init(base *BaseTable) {
|
||||||
}
|
}
|
||||||
p.FlushDeleteBufferBytes.Init(base.mgr)
|
p.FlushDeleteBufferBytes.Init(base.mgr)
|
||||||
|
|
||||||
|
p.BinLogMaxSize = ParamItem{
|
||||||
|
Key: "datanode.segment.binlog.maxsize",
|
||||||
|
Version: "2.0.0",
|
||||||
|
DefaultValue: "67108864",
|
||||||
|
}
|
||||||
|
p.BinLogMaxSize.Init(base.mgr)
|
||||||
|
|
||||||
p.SyncPeriod = ParamItem{
|
p.SyncPeriod = ParamItem{
|
||||||
Key: "datanode.segment.syncPeriod",
|
Key: "datanode.segment.syncPeriod",
|
||||||
Version: "2.0.0",
|
Version: "2.0.0",
|
||||||
|
|
Loading…
Reference in New Issue