mirror of https://github.com/milvus-io/milvus.git
enhance: Add PreallocatedSegmentIDs for the compaction task (#36734)
Add `PreallocatedSegmentIDs` field to the compaction task, allowing the `ResultSegments` in the compaction task to represent the final segments produced by the compaction. issue: https://github.com/milvus-io/milvus/issues/36733 also related: https://github.com/milvus-io/milvus/issues/36686 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>pull/36790/head
parent
383350c120
commit
d230b91bd1
|
@ -167,25 +167,22 @@ func (t *clusteringCompactionTask) BuildCompactionRequest() (*datapb.CompactionP
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
plan := &datapb.CompactionPlan{
|
plan := &datapb.CompactionPlan{
|
||||||
PlanID: t.GetPlanID(),
|
PlanID: t.GetPlanID(),
|
||||||
StartTime: t.GetStartTime(),
|
StartTime: t.GetStartTime(),
|
||||||
TimeoutInSeconds: t.GetTimeoutInSeconds(),
|
TimeoutInSeconds: t.GetTimeoutInSeconds(),
|
||||||
Type: t.GetType(),
|
Type: t.GetType(),
|
||||||
Channel: t.GetChannel(),
|
Channel: t.GetChannel(),
|
||||||
CollectionTtl: t.GetCollectionTtl(),
|
CollectionTtl: t.GetCollectionTtl(),
|
||||||
TotalRows: t.GetTotalRows(),
|
TotalRows: t.GetTotalRows(),
|
||||||
Schema: t.GetSchema(),
|
Schema: t.GetSchema(),
|
||||||
ClusteringKeyField: t.GetClusteringKeyField().GetFieldID(),
|
ClusteringKeyField: t.GetClusteringKeyField().GetFieldID(),
|
||||||
MaxSegmentRows: t.GetMaxSegmentRows(),
|
MaxSegmentRows: t.GetMaxSegmentRows(),
|
||||||
PreferSegmentRows: t.GetPreferSegmentRows(),
|
PreferSegmentRows: t.GetPreferSegmentRows(),
|
||||||
AnalyzeResultPath: path.Join(t.meta.(*meta).chunkManager.RootPath(), common.AnalyzeStatsPath, metautil.JoinIDPath(t.AnalyzeTaskID, t.AnalyzeVersion)),
|
AnalyzeResultPath: path.Join(t.meta.(*meta).chunkManager.RootPath(), common.AnalyzeStatsPath, metautil.JoinIDPath(t.AnalyzeTaskID, t.AnalyzeVersion)),
|
||||||
AnalyzeSegmentIds: t.GetInputSegments(),
|
AnalyzeSegmentIds: t.GetInputSegments(),
|
||||||
BeginLogID: beginLogID,
|
BeginLogID: beginLogID,
|
||||||
PreAllocatedSegmentIDs: &datapb.IDRange{
|
PreAllocatedSegmentIDs: t.GetPreAllocatedSegmentIDs(),
|
||||||
Begin: t.GetResultSegments()[0],
|
SlotUsage: t.GetSlotUsage(),
|
||||||
End: t.GetResultSegments()[1],
|
|
||||||
},
|
|
||||||
SlotUsage: t.GetSlotUsage(),
|
|
||||||
}
|
}
|
||||||
log := log.With(zap.Int64("taskID", t.GetTriggerID()), zap.Int64("planID", plan.GetPlanID()))
|
log := log.With(zap.Int64("taskID", t.GetTriggerID()), zap.Int64("planID", plan.GetPlanID()))
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,13 @@ func (csm *compactionTaskMeta) reloadFromKV() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, task := range compactionTasks {
|
for _, task := range compactionTasks {
|
||||||
|
// To maintain compatibility with versions ≤v2.4.12, which use `ResultSegments` as preallocate segment IDs.
|
||||||
|
if task.PreAllocatedSegmentIDs == nil && len(task.GetResultSegments()) == 2 {
|
||||||
|
task.PreAllocatedSegmentIDs = &datapb.IDRange{
|
||||||
|
Begin: task.GetResultSegments()[0],
|
||||||
|
End: task.GetResultSegments()[1],
|
||||||
|
}
|
||||||
|
}
|
||||||
csm.saveCompactionTaskMemory(task)
|
csm.saveCompactionTaskMemory(task)
|
||||||
}
|
}
|
||||||
log.Info("DataCoord compactionTaskMeta reloadFromKV done", zap.Duration("duration", record.ElapseSpan()))
|
log.Info("DataCoord compactionTaskMeta reloadFromKV done", zap.Duration("duration", record.ElapseSpan()))
|
||||||
|
|
|
@ -318,21 +318,18 @@ func (t *mixCompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, er
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
plan := &datapb.CompactionPlan{
|
plan := &datapb.CompactionPlan{
|
||||||
PlanID: t.GetPlanID(),
|
PlanID: t.GetPlanID(),
|
||||||
StartTime: t.GetStartTime(),
|
StartTime: t.GetStartTime(),
|
||||||
TimeoutInSeconds: t.GetTimeoutInSeconds(),
|
TimeoutInSeconds: t.GetTimeoutInSeconds(),
|
||||||
Type: t.GetType(),
|
Type: t.GetType(),
|
||||||
Channel: t.GetChannel(),
|
Channel: t.GetChannel(),
|
||||||
CollectionTtl: t.GetCollectionTtl(),
|
CollectionTtl: t.GetCollectionTtl(),
|
||||||
TotalRows: t.GetTotalRows(),
|
TotalRows: t.GetTotalRows(),
|
||||||
Schema: t.GetSchema(),
|
Schema: t.GetSchema(),
|
||||||
BeginLogID: beginLogID,
|
BeginLogID: beginLogID,
|
||||||
PreAllocatedSegmentIDs: &datapb.IDRange{
|
PreAllocatedSegmentIDs: t.GetPreAllocatedSegmentIDs(),
|
||||||
Begin: t.GetResultSegments()[0],
|
SlotUsage: t.GetSlotUsage(),
|
||||||
End: t.GetResultSegments()[1],
|
MaxSize: t.GetMaxSize(),
|
||||||
},
|
|
||||||
SlotUsage: t.GetSlotUsage(),
|
|
||||||
MaxSize: t.GetMaxSize(),
|
|
||||||
}
|
}
|
||||||
log := log.With(zap.Int64("taskID", t.GetTriggerID()), zap.Int64("planID", plan.GetPlanID()))
|
log := log.With(zap.Int64("taskID", t.GetTriggerID()), zap.Int64("planID", plan.GetPlanID()))
|
||||||
|
|
||||||
|
|
|
@ -385,10 +385,14 @@ func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) error {
|
||||||
PartitionID: group.partitionID,
|
PartitionID: group.partitionID,
|
||||||
Channel: group.channelName,
|
Channel: group.channelName,
|
||||||
InputSegments: inputSegmentIDs,
|
InputSegments: inputSegmentIDs,
|
||||||
ResultSegments: []int64{startID + 1, endID}, // pre-allocated target segment
|
ResultSegments: []int64{},
|
||||||
TotalRows: totalRows,
|
TotalRows: totalRows,
|
||||||
Schema: coll.Schema,
|
Schema: coll.Schema,
|
||||||
MaxSize: getExpandedSize(expectedSize),
|
MaxSize: getExpandedSize(expectedSize),
|
||||||
|
PreAllocatedSegmentIDs: &datapb.IDRange{
|
||||||
|
Begin: startID + 1,
|
||||||
|
End: endID,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
err = t.compactionHandler.enqueueCompaction(task)
|
err = t.compactionHandler.enqueueCompaction(task)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -491,10 +495,14 @@ func (t *compactionTrigger) handleSignal(signal *compactionSignal) {
|
||||||
PartitionID: partitionID,
|
PartitionID: partitionID,
|
||||||
Channel: channel,
|
Channel: channel,
|
||||||
InputSegments: inputSegmentIDs,
|
InputSegments: inputSegmentIDs,
|
||||||
ResultSegments: []int64{startID + 1, endID}, // pre-allocated target segment
|
ResultSegments: []int64{},
|
||||||
TotalRows: totalRows,
|
TotalRows: totalRows,
|
||||||
Schema: coll.Schema,
|
Schema: coll.Schema,
|
||||||
MaxSize: getExpandedSize(expectedSize),
|
MaxSize: getExpandedSize(expectedSize),
|
||||||
|
PreAllocatedSegmentIDs: &datapb.IDRange{
|
||||||
|
Begin: startID + 1,
|
||||||
|
End: endID,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
if err := t.compactionHandler.enqueueCompaction(task); err != nil {
|
if err := t.compactionHandler.enqueueCompaction(task); err != nil {
|
||||||
log.Warn("failed to execute compaction task",
|
log.Warn("failed to execute compaction task",
|
||||||
|
|
|
@ -70,7 +70,6 @@ func (h *spyCompactionHandler) enqueueCompaction(task *datapb.CompactionTask) er
|
||||||
}
|
}
|
||||||
alloc := newMock0Allocator(h.t)
|
alloc := newMock0Allocator(h.t)
|
||||||
t.allocator = alloc
|
t.allocator = alloc
|
||||||
t.ResultSegments = []int64{100, 200}
|
|
||||||
plan, err := t.BuildCompactionRequest()
|
plan, err := t.BuildCompactionRequest()
|
||||||
h.spyChan <- plan
|
h.spyChan <- plan
|
||||||
return err
|
return err
|
||||||
|
@ -532,7 +531,7 @@ func Test_compactionTrigger_force(t *testing.T) {
|
||||||
},
|
},
|
||||||
[]*datapb.CompactionPlan{
|
[]*datapb.CompactionPlan{
|
||||||
{
|
{
|
||||||
PlanID: 0,
|
PlanID: 100,
|
||||||
SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{
|
SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{
|
||||||
{
|
{
|
||||||
SegmentID: 1,
|
SegmentID: 1,
|
||||||
|
@ -580,12 +579,13 @@ func Test_compactionTrigger_force(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
// StartTime: 0,
|
// StartTime: 0,
|
||||||
|
BeginLogID: 100,
|
||||||
TimeoutInSeconds: Params.DataCoordCfg.CompactionTimeoutInSeconds.GetAsInt32(),
|
TimeoutInSeconds: Params.DataCoordCfg.CompactionTimeoutInSeconds.GetAsInt32(),
|
||||||
Type: datapb.CompactionType_MixCompaction,
|
Type: datapb.CompactionType_MixCompaction,
|
||||||
Channel: "ch1",
|
Channel: "ch1",
|
||||||
TotalRows: 200,
|
TotalRows: 200,
|
||||||
Schema: schema,
|
Schema: schema,
|
||||||
PreAllocatedSegmentIDs: &datapb.IDRange{Begin: 100, End: 200},
|
PreAllocatedSegmentIDs: &datapb.IDRange{Begin: 101, End: 200},
|
||||||
MaxSize: 1342177280,
|
MaxSize: 1342177280,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -878,6 +878,7 @@ func Test_compactionTrigger_force_maxSegmentLimit(t *testing.T) {
|
||||||
IsSorted: true,
|
IsSorted: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
BeginLogID: 100,
|
||||||
StartTime: 3,
|
StartTime: 3,
|
||||||
TimeoutInSeconds: Params.DataCoordCfg.CompactionTimeoutInSeconds.GetAsInt32(),
|
TimeoutInSeconds: Params.DataCoordCfg.CompactionTimeoutInSeconds.GetAsInt32(),
|
||||||
Type: datapb.CompactionType_MixCompaction,
|
Type: datapb.CompactionType_MixCompaction,
|
||||||
|
|
|
@ -335,12 +335,16 @@ func (m *CompactionTriggerManager) SubmitClusteringViewToScheduler(ctx context.C
|
||||||
Schema: collection.Schema,
|
Schema: collection.Schema,
|
||||||
ClusteringKeyField: view.(*ClusteringSegmentsView).clusteringKeyField,
|
ClusteringKeyField: view.(*ClusteringSegmentsView).clusteringKeyField,
|
||||||
InputSegments: lo.Map(view.GetSegmentsView(), func(segmentView *SegmentView, _ int) int64 { return segmentView.ID }),
|
InputSegments: lo.Map(view.GetSegmentsView(), func(segmentView *SegmentView, _ int) int64 { return segmentView.ID }),
|
||||||
ResultSegments: []int64{start, end}, // pre-allocated result segments range
|
ResultSegments: []int64{},
|
||||||
MaxSegmentRows: maxSegmentRows,
|
MaxSegmentRows: maxSegmentRows,
|
||||||
PreferSegmentRows: preferSegmentRows,
|
PreferSegmentRows: preferSegmentRows,
|
||||||
TotalRows: totalRows,
|
TotalRows: totalRows,
|
||||||
AnalyzeTaskID: taskID + 1,
|
AnalyzeTaskID: taskID + 1,
|
||||||
LastStateStartTime: time.Now().Unix(),
|
LastStateStartTime: time.Now().Unix(),
|
||||||
|
PreAllocatedSegmentIDs: &datapb.IDRange{
|
||||||
|
Begin: start,
|
||||||
|
End: end,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
err = m.compactionHandler.enqueueCompaction(task)
|
err = m.compactionHandler.enqueueCompaction(task)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -391,10 +395,14 @@ func (m *CompactionTriggerManager) SubmitSingleViewToScheduler(ctx context.Conte
|
||||||
Channel: view.GetGroupLabel().Channel,
|
Channel: view.GetGroupLabel().Channel,
|
||||||
Schema: collection.Schema,
|
Schema: collection.Schema,
|
||||||
InputSegments: lo.Map(view.GetSegmentsView(), func(segmentView *SegmentView, _ int) int64 { return segmentView.ID }),
|
InputSegments: lo.Map(view.GetSegmentsView(), func(segmentView *SegmentView, _ int) int64 { return segmentView.ID }),
|
||||||
ResultSegments: []int64{startID + 1, endID},
|
ResultSegments: []int64{},
|
||||||
TotalRows: totalRows,
|
TotalRows: totalRows,
|
||||||
LastStateStartTime: time.Now().Unix(),
|
LastStateStartTime: time.Now().Unix(),
|
||||||
MaxSize: getExpandedSize(expectedSize),
|
MaxSize: getExpandedSize(expectedSize),
|
||||||
|
PreAllocatedSegmentIDs: &datapb.IDRange{
|
||||||
|
Begin: startID + 1,
|
||||||
|
End: endID,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
err = m.compactionHandler.enqueueCompaction(task)
|
err = m.compactionHandler.enqueueCompaction(task)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -108,9 +108,9 @@ func newMockAllocator(t *testing.T) *allocator.MockAllocator {
|
||||||
|
|
||||||
func newMock0Allocator(t *testing.T) *allocator.MockAllocator {
|
func newMock0Allocator(t *testing.T) *allocator.MockAllocator {
|
||||||
mock0Allocator := allocator.NewMockAllocator(t)
|
mock0Allocator := allocator.NewMockAllocator(t)
|
||||||
mock0Allocator.EXPECT().AllocID(mock.Anything).Return(0, nil).Maybe()
|
mock0Allocator.EXPECT().AllocID(mock.Anything).Return(100, nil).Maybe()
|
||||||
mock0Allocator.EXPECT().AllocTimestamp(mock.Anything).Return(0, nil).Maybe()
|
mock0Allocator.EXPECT().AllocTimestamp(mock.Anything).Return(1000, nil).Maybe()
|
||||||
mock0Allocator.EXPECT().AllocN(mock.Anything).Return(0, 0, nil).Maybe()
|
mock0Allocator.EXPECT().AllocN(mock.Anything).Return(100, 200, nil).Maybe()
|
||||||
return mock0Allocator
|
return mock0Allocator
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -609,7 +609,7 @@ message CompactionPlan {
|
||||||
repeated int64 analyze_segment_ids = 15;
|
repeated int64 analyze_segment_ids = 15;
|
||||||
int32 state = 16;
|
int32 state = 16;
|
||||||
int64 begin_logID = 17;
|
int64 begin_logID = 17;
|
||||||
IDRange pre_allocated_segmentIDs = 18; // only for clustering compaction
|
IDRange pre_allocated_segmentIDs = 18;
|
||||||
int64 slot_usage = 19;
|
int64 slot_usage = 19;
|
||||||
int64 max_size = 20;
|
int64 max_size = 20;
|
||||||
}
|
}
|
||||||
|
@ -972,6 +972,7 @@ message CompactionTask{
|
||||||
int64 lastStateStartTime = 25;
|
int64 lastStateStartTime = 25;
|
||||||
int64 max_size = 26;
|
int64 max_size = 26;
|
||||||
repeated int64 tmpSegments = 27;
|
repeated int64 tmpSegments = 27;
|
||||||
|
IDRange pre_allocated_segmentIDs = 28;
|
||||||
}
|
}
|
||||||
|
|
||||||
message PartitionStatsInfo {
|
message PartitionStatsInfo {
|
||||||
|
|
Loading…
Reference in New Issue