enhance: Whether to enable mergeSort mode when performing mixCompaction (#37664)

issue: #37579

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
pull/37795/head
cai.zhang 2024-11-19 11:28:31 +08:00 committed by GitHub
parent 33bfb25c73
commit dae4160466
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 17 additions and 2 deletions

View File

@ -678,6 +678,7 @@ dataNode:
compaction:
levelZeroBatchMemoryRatio: 0.5 # The minimal memory ratio of free memory for level zero compaction executing in batch mode
levelZeroMaxBatchSize: -1 # Max batch size refers to the max number of L1/L2 segments in a batch when executing L0 compaction. Default to -1, any value that is less than 1 means no limit. Valid range: >= 1.
useMergeSort: false # Whether to enable mergeSort mode when performing mixCompaction.
gracefulStopTimeout: 1800 # seconds. force stop node without graceful stop
slot:
slotCap: 16 # The maximum number of tasks(e.g. compaction, importing) allowed to run concurrently on a datanode

View File

@ -338,7 +338,7 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) {
}
var res []*datapb.CompactionSegment
if allSorted && len(t.plan.GetSegmentBinlogs()) > 1 {
if paramtable.Get().DataNodeCfg.UseMergeSort.GetAsBool() && allSorted && len(t.plan.GetSegmentBinlogs()) > 1 {
log.Info("all segments are sorted, use merge sort")
res, err = mergeSortMultipleSegments(ctxTimeout, t.plan, t.collectionID, t.partitionID, t.maxRows, t.binlogIO,
t.plan.GetSegmentBinlogs(), t.tr, t.currentTs, t.plan.GetCollectionTtl(), t.bm25FieldIDs)

View File

@ -283,6 +283,8 @@ func (s *MixCompactionTaskSuite) TestCompactTwoToOneWithBM25() {
}
func (s *MixCompactionTaskSuite) TestCompactSortedSegment() {
paramtable.Get().Save("dataNode.compaction.useMergeSort", "true")
defer paramtable.Get().Reset("dataNode.compaction.useMergeSort")
segments := []int64{1001, 1002, 1003}
alloc := allocator.NewLocalAllocator(100, math.MaxInt64)
s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil)

View File

@ -4247,6 +4247,7 @@ type dataNodeConfig struct {
// Compaction
L0BatchMemoryRatio ParamItem `refreshable:"true"`
L0CompactionMaxBatchSize ParamItem `refreshable:"true"`
UseMergeSort ParamItem `refreshable:"true"`
GracefulStopTimeout ParamItem `refreshable:"true"`
@ -4578,6 +4579,15 @@ if this parameter <= 0, will set it as 10`,
}
p.L0CompactionMaxBatchSize.Init(base.mgr)
p.UseMergeSort = ParamItem{
Key: "dataNode.compaction.useMergeSort",
Version: "2.5.0",
Doc: "Whether to enable mergeSort mode when performing mixCompaction.",
DefaultValue: "false",
Export: true,
}
p.UseMergeSort.Init(base.mgr)
p.GracefulStopTimeout = ParamItem{
Key: "dataNode.gracefulStopTimeout",
Version: "2.3.7",

View File

@ -599,13 +599,15 @@ class TestCompactionOperation(TestcaseBase):
collection_w.wait_for_compaction_completed()
c_plans = collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact)[0]
old_segmentIDs = [c_plans.plans[0].target]
old_segmentIDs.extend(c_plans.plans[0].sources)
# waiting for handoff completed and search
cost = 180
start = time()
while True:
sleep(1)
segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0]
if len(segment_info) != 0 and segment_info[0].segmentID == c_plans.plans[0].target:
if len(segment_info) != 0 and segment_info[0].segmentID not in old_segmentIDs and segment_info[0].is_sorted:
log.debug(segment_info)
break
if time() - start > cost: