fix: Limit L0 Compaction segment size and count (#30374)

See also: #30191

---------

Signed-off-by: yangxuan <xuan.yang@zilliz.com>
pull/30344/head
XuanYang-cn 2024-02-01 20:39:03 +08:00 committed by GitHub
parent adaf270697
commit e0ed5647b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 107 additions and 33 deletions

View File

@ -422,7 +422,7 @@ dataCoord:
levelzero:
forceTrigger:
minSize: 8 # The minmum size in MB to force trigger a LevelZero Compaction
minSize: 8388608 # The minmum size in bytes to force trigger a LevelZero Compaction, default as 8MB
deltalogMinNum: 10 # the minimum number of deltalog files to force trigger a LevelZero Compaction
import:
filesPerPreImportTask: 2 # The maximum number of files allowed per pre-import task.

View File

@ -6,6 +6,7 @@ import (
"github.com/samber/lo"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
// The LevelZeroSegments keeps the min group
@ -74,34 +75,75 @@ func (v *LevelZeroSegmentsView) Trigger() (CompactionView, string) {
})
var (
minDeltaSize = Params.DataCoordCfg.LevelZeroCompactionTriggerMinSize.GetAsFloat()
minDeltaCount = Params.DataCoordCfg.LevelZeroCompactionTriggerDeltalogMinNum.GetAsInt()
curDeltaSize float64
curDeltaCount int
reason string
minDeltaSize = paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerMinSize.GetAsFloat()
maxDeltaSize = paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerMaxSize.GetAsFloat()
minDeltaCount = paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMinNum.GetAsInt()
maxDeltaCount = paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMaxNum.GetAsInt()
)
for _, segView := range validSegments {
curDeltaSize += segView.DeltaSize
curDeltaCount += segView.DeltalogCount
targetViews, targetSize := v.filterViewsBySizeRange(validSegments, minDeltaSize, maxDeltaSize)
if targetViews != nil {
reason := fmt.Sprintf("level zero segments size reaches compaction limit, curDeltaSize=%.2f, limitSizeRange=[%.2f, %.2f]",
targetSize, minDeltaSize, maxDeltaSize)
return &LevelZeroSegmentsView{
label: v.label,
segments: targetViews,
earliestGrowingSegmentPos: v.earliestGrowingSegmentPos,
}, reason
}
if curDeltaSize > minDeltaSize {
reason = "level zero segments size reaches compaction limit"
targetViews, targetCount := v.filterViewsByCountRange(validSegments, minDeltaCount, maxDeltaCount)
if targetViews != nil {
reason := fmt.Sprintf("level zero segments count reaches compaction limit, curDeltaCount=%d, limitCountRange=[%d, %d]",
targetCount, minDeltaCount, maxDeltaCount)
return &LevelZeroSegmentsView{
label: v.label,
segments: targetViews,
earliestGrowingSegmentPos: v.earliestGrowingSegmentPos,
}, reason
}
if curDeltaCount > minDeltaCount {
reason = "level zero segments number reaches compaction limit"
}
if curDeltaSize < minDeltaSize && curDeltaCount < minDeltaCount {
return nil, ""
}
return &LevelZeroSegmentsView{
label: v.label,
segments: validSegments,
earliestGrowingSegmentPos: v.earliestGrowingSegmentPos,
}, reason
return nil, ""
}
// filterViewByCountRange picks segment views that total sizes in range [minCount, maxCount]
func (v *LevelZeroSegmentsView) filterViewsByCountRange(segments []*SegmentView, minCount, maxCount int) ([]*SegmentView, int) {
curDeltaCount := 0
idx := 0
for _, view := range segments {
targetCount := view.DeltalogCount + curDeltaCount
if idx != 0 && targetCount > maxCount {
break
}
idx += 1
curDeltaCount = targetCount
}
if curDeltaCount < minCount {
return nil, 0
}
return segments[:idx], curDeltaCount
}
// filterViewBySizeRange picks segment views that total count in range [minSize, maxSize]
func (v *LevelZeroSegmentsView) filterViewsBySizeRange(segments []*SegmentView, minSize, maxSize float64) ([]*SegmentView, float64) {
var curDeltaSize float64
idx := 0
for _, view := range segments {
targetSize := view.DeltaSize + curDeltaSize
if idx != 0 && targetSize > maxSize {
break
}
idx += 1
curDeltaSize = targetSize
}
if curDeltaSize < minSize {
return nil, 0
}
return segments[:idx], curDeltaSize
}

View File

@ -115,7 +115,7 @@ func (s *LevelZeroSegmentsViewSuite) TestTrigger() {
},
{
"Trigger by > TriggerDeltaSize",
8,
8 * 1024 * 1024,
1,
30000,
[]UniqueID{100, 101},
@ -127,6 +127,20 @@ func (s *LevelZeroSegmentsViewSuite) TestTrigger() {
30000,
[]UniqueID{100, 101},
},
{
"Trigger by > maxDeltaSize",
128 * 1024 * 1024,
1,
30000,
[]UniqueID{100},
},
{
"Trigger by > maxDeltaCount",
1,
24,
30000,
[]UniqueID{100},
},
}
for _, test := range tests {
@ -152,7 +166,7 @@ func (s *LevelZeroSegmentsViewSuite) TestTrigger() {
return v.ID
})
s.ElementsMatch(gotSegIDs, test.expectedSegs)
log.Info("trigger reason", zap.String("trigger reason", reason))
log.Info("output view", zap.String("view", levelZeroView.String()), zap.String("trigger reason", reason))
}
})
}

View File

@ -29,7 +29,7 @@ type CompactionViewManagerSuite struct {
m *CompactionViewManager
}
const MB = 1024 * 1024 * 1024
const MB = 1024 * 1024
func genSegmentsForMeta(label *CompactionGroupLabel) map[int64]*SegmentInfo {
segArgs := []struct {

View File

@ -2334,7 +2334,9 @@ type dataCoordConfig struct {
// LevelZero Segment
EnableLevelZeroSegment ParamItem `refreshable:"false"`
LevelZeroCompactionTriggerMinSize ParamItem `refreshable:"true"`
LevelZeroCompactionTriggerMaxSize ParamItem `refreshable:"true"`
LevelZeroCompactionTriggerDeltalogMinNum ParamItem `refreshable:"true"`
LevelZeroCompactionTriggerDeltalogMaxNum ParamItem `refreshable:"true"`
// Garbage Collection
EnableGarbageCollection ParamItem `refreshable:"false"`
@ -2632,7 +2634,7 @@ During compaction, the size of segment # of rows is able to exceed segment max #
// LevelZeroCompaction
p.EnableLevelZeroSegment = ParamItem{
Key: "dataCoord.segment.enableLevelZero",
Version: "2.3.4",
Version: "2.4.0",
Doc: "Whether to enable LevelZeroCompaction",
DefaultValue: "false",
}
@ -2640,20 +2642,36 @@ During compaction, the size of segment # of rows is able to exceed segment max #
p.LevelZeroCompactionTriggerMinSize = ParamItem{
Key: "dataCoord.compaction.levelzero.forceTrigger.minSize",
Version: "2.3.4",
Doc: "The minmum size in MB to force trigger a LevelZero Compaction",
DefaultValue: "8",
Version: "2.4.0",
Doc: "The minmum size in bytes to force trigger a LevelZero Compaction, default as 8MB",
DefaultValue: "8388608",
}
p.LevelZeroCompactionTriggerMinSize.Init(base.mgr)
p.LevelZeroCompactionTriggerMaxSize = ParamItem{
Key: "dataCoord.compaction.levelzero.forceTrigger.maxSize",
Version: "2.4.0",
Doc: "The maxmum size in bytes to force trigger a LevelZero Compaction, default as 64MB",
DefaultValue: "67108864",
}
p.LevelZeroCompactionTriggerMaxSize.Init(base.mgr)
p.LevelZeroCompactionTriggerDeltalogMinNum = ParamItem{
Key: "dataCoord.compaction.levelzero.forceTrigger.deltalogMinNum",
Version: "2.3.4",
Version: "2.4.0",
Doc: "The minimum number of deltalog files to force trigger a LevelZero Compaction",
DefaultValue: "10",
}
p.LevelZeroCompactionTriggerDeltalogMinNum.Init(base.mgr)
p.LevelZeroCompactionTriggerDeltalogMaxNum = ParamItem{
Key: "dataCoord.compaction.levelzero.forceTrigger.deltalogMaxNum",
Version: "2.4.0",
Doc: "The maxmum number of deltalog files to force trigger a LevelZero Compaction, default as 20",
DefaultValue: "20",
}
p.LevelZeroCompactionTriggerDeltalogMaxNum.Init(base.mgr)
p.EnableGarbageCollection = ParamItem{
Key: "dataCoord.enableGarbageCollection",
Version: "2.0.0",