2021-12-29 03:38:57 +00:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2021-11-05 14:25:00 +00:00
package datacoord
import (
"context"
2022-06-15 15:14:10 +00:00
"sort"
2021-11-05 14:25:00 +00:00
"sync"
"time"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
2022-02-09 10:55:46 +00:00
"github.com/milvus-io/milvus/internal/util/logutil"
2022-01-05 06:16:17 +00:00
"go.uber.org/zap"
2021-11-05 14:25:00 +00:00
)
2022-06-15 15:14:10 +00:00
type compactTime struct {
travelTime Timestamp
expireTime Timestamp
2021-11-05 14:25:00 +00:00
}
type trigger interface {
start ( )
stop ( )
2021-12-16 01:57:25 +00:00
// triggerCompaction triggers a compaction if any compaction condition satisfy.
2022-06-15 15:14:10 +00:00
triggerCompaction ( compactTime * compactTime ) error
2021-12-16 01:57:25 +00:00
// triggerSingleCompaction triggers a compaction bundled with collection-partition-channel-segment
2022-06-15 15:14:10 +00:00
triggerSingleCompaction ( collectionID , partitionID , segmentID int64 , channel string , compactTime * compactTime ) error
2021-11-05 14:25:00 +00:00
// forceTriggerCompaction force to start a compaction
2022-06-15 15:14:10 +00:00
forceTriggerCompaction ( collectionID int64 , compactTime * compactTime ) ( UniqueID , error )
2021-11-05 14:25:00 +00:00
}
type compactionSignal struct {
id UniqueID
isForce bool
isGlobal bool
collectionID UniqueID
partitionID UniqueID
segmentID UniqueID
channel string
2022-06-15 15:14:10 +00:00
compactTime * compactTime
2021-11-05 14:25:00 +00:00
}
var _ trigger = ( * compactionTrigger ) ( nil )
type compactionTrigger struct {
2022-06-15 15:14:10 +00:00
meta * meta
allocator allocator
signals chan * compactionSignal
compactionHandler compactionPlanContext
globalTrigger * time . Ticker
forceMu sync . Mutex
quit chan struct { }
wg sync . WaitGroup
2022-06-20 13:56:12 +00:00
segRefer * SegmentReferenceManager
2021-11-05 14:25:00 +00:00
}
2022-06-20 13:56:12 +00:00
func newCompactionTrigger ( meta * meta , compactionHandler compactionPlanContext , allocator allocator ,
segRefer * SegmentReferenceManager ) * compactionTrigger {
2021-11-05 14:25:00 +00:00
return & compactionTrigger {
2022-06-15 15:14:10 +00:00
meta : meta ,
allocator : allocator ,
signals : make ( chan * compactionSignal , 100 ) ,
compactionHandler : compactionHandler ,
2022-06-20 13:56:12 +00:00
segRefer : segRefer ,
2021-11-05 14:25:00 +00:00
}
}
func ( t * compactionTrigger ) start ( ) {
t . quit = make ( chan struct { } )
2022-06-15 15:14:10 +00:00
t . globalTrigger = time . NewTicker ( Params . DataCoordCfg . GlobalCompactionInterval )
2021-11-05 14:25:00 +00:00
t . wg . Add ( 2 )
go func ( ) {
defer logutil . LogPanic ( )
defer t . wg . Done ( )
for {
select {
case <- t . quit :
2022-03-02 07:35:55 +00:00
log . Info ( "compaction trigger quit" )
2021-11-05 14:25:00 +00:00
return
case signal := <- t . signals :
switch {
case signal . isGlobal :
t . handleGlobalSignal ( signal )
default :
t . handleSignal ( signal )
2022-06-15 15:14:10 +00:00
// shouldn't reset, otherwise a frequent flushed collection will affect other collections
// t.globalTrigger.Reset(Params.DataCoordCfg.GlobalCompactionInterval)
2021-11-05 14:25:00 +00:00
}
}
}
} ( )
go t . startGlobalCompactionLoop ( )
}
func ( t * compactionTrigger ) startGlobalCompactionLoop ( ) {
defer logutil . LogPanic ( )
defer t . wg . Done ( )
2021-12-16 01:59:12 +00:00
// If AutoCompaction disabled, global loop will not start
2022-06-06 07:10:05 +00:00
if ! Params . DataCoordCfg . GetEnableAutoCompaction ( ) {
2021-12-08 11:47:05 +00:00
return
}
2021-11-05 14:25:00 +00:00
for {
select {
case <- t . quit :
t . globalTrigger . Stop ( )
log . Info ( "global compaction loop exit" )
return
case <- t . globalTrigger . C :
cctx , cancel := context . WithTimeout ( context . Background ( ) , 5 * time . Second )
2022-06-15 15:14:10 +00:00
ct , err := getCompactTime ( cctx , t . allocator )
2021-11-05 14:25:00 +00:00
if err != nil {
2022-06-15 15:14:10 +00:00
log . Warn ( "unbale to get compaction time" , zap . Error ( err ) )
2021-11-05 14:25:00 +00:00
cancel ( )
continue
}
cancel ( )
2022-06-15 15:14:10 +00:00
err = t . triggerCompaction ( ct )
2022-01-07 09:41:40 +00:00
if err != nil {
log . Warn ( "unable to triggerCompaction" , zap . Error ( err ) )
}
2021-11-05 14:25:00 +00:00
}
}
}
func ( t * compactionTrigger ) stop ( ) {
close ( t . quit )
t . wg . Wait ( )
}
// triggerCompaction trigger a compaction if any compaction condition satisfy.
2022-06-15 15:14:10 +00:00
func ( t * compactionTrigger ) triggerCompaction ( compactTime * compactTime ) error {
2021-11-05 14:25:00 +00:00
id , err := t . allocSignalID ( )
if err != nil {
return err
}
signal := & compactionSignal {
2022-06-15 15:14:10 +00:00
id : id ,
isForce : false ,
isGlobal : true ,
compactTime : compactTime ,
2021-11-05 14:25:00 +00:00
}
t . signals <- signal
return nil
}
// triggerSingleCompaction triger a compaction bundled with collection-partiiton-channel-segment
2022-06-15 15:14:10 +00:00
func ( t * compactionTrigger ) triggerSingleCompaction ( collectionID , partitionID , segmentID int64 , channel string , compactTime * compactTime ) error {
2021-12-08 11:47:05 +00:00
// If AutoCompaction diabled, flush request will not trigger compaction
2022-06-06 07:10:05 +00:00
if ! Params . DataCoordCfg . GetEnableAutoCompaction ( ) {
2021-12-08 11:47:05 +00:00
return nil
}
2021-11-05 14:25:00 +00:00
id , err := t . allocSignalID ( )
if err != nil {
return err
}
signal := & compactionSignal {
id : id ,
isForce : false ,
isGlobal : false ,
collectionID : collectionID ,
partitionID : partitionID ,
segmentID : segmentID ,
channel : channel ,
2022-06-15 15:14:10 +00:00
compactTime : compactTime ,
2021-11-05 14:25:00 +00:00
}
t . signals <- signal
return nil
}
// forceTriggerCompaction force to start a compaction
2022-02-18 06:51:49 +00:00
// invoked by user `ManualCompaction` operation
2022-06-15 15:14:10 +00:00
func ( t * compactionTrigger ) forceTriggerCompaction ( collectionID int64 , compactTime * compactTime ) ( UniqueID , error ) {
2021-11-05 14:25:00 +00:00
id , err := t . allocSignalID ( )
if err != nil {
return - 1 , err
}
signal := & compactionSignal {
id : id ,
isForce : true ,
2022-02-18 06:51:49 +00:00
isGlobal : true ,
2021-11-05 14:25:00 +00:00
collectionID : collectionID ,
2022-06-15 15:14:10 +00:00
compactTime : compactTime ,
2021-11-05 14:25:00 +00:00
}
2022-02-18 06:51:49 +00:00
t . handleGlobalSignal ( signal )
2021-11-05 14:25:00 +00:00
return id , nil
}
func ( t * compactionTrigger ) allocSignalID ( ) ( UniqueID , error ) {
ctx , cancel := context . WithTimeout ( context . Background ( ) , 5 * time . Second )
defer cancel ( )
return t . allocator . allocID ( ctx )
}
func getPlanIDs ( plans [ ] * datapb . CompactionPlan ) [ ] int64 {
ids := make ( [ ] int64 , 0 , len ( plans ) )
for _ , p := range plans {
ids = append ( ids , p . GetPlanID ( ) )
}
return ids
}
func ( t * compactionTrigger ) handleGlobalSignal ( signal * compactionSignal ) {
t . forceMu . Lock ( )
defer t . forceMu . Unlock ( )
2022-02-18 06:51:49 +00:00
m := t . meta . GetSegmentsChanPart ( func ( segment * SegmentInfo ) bool {
return ( signal . collectionID == 0 || segment . CollectionID == signal . collectionID ) &&
isSegmentHealthy ( segment ) &&
isFlush ( segment ) &&
2022-06-20 13:56:12 +00:00
! segment . isCompacting && // not compacting now
! t . segRefer . HasSegmentLock ( segment . ID ) // not reference
2022-02-18 06:51:49 +00:00
} ) // m is list of chanPartSegments, which is channel-partition organized segments
for _ , group := range m {
if ! signal . isForce && t . compactionHandler . isFull ( ) {
break
}
2021-11-05 14:25:00 +00:00
2022-06-15 15:14:10 +00:00
plans := t . generatePlans ( group . segments , signal . isForce , signal . compactTime )
if len ( plans ) != 0 {
log . Info ( "global generated plans" , zap . Int64 ( "collection" , signal . collectionID ) , zap . Int ( "plan count" , len ( plans ) ) )
}
2022-02-18 06:51:49 +00:00
for _ , plan := range plans {
if ! signal . isForce && t . compactionHandler . isFull ( ) {
log . Warn ( "compaction plan skipped due to handler full" , zap . Int64 ( "collection" , signal . collectionID ) , zap . Int64 ( "planID" , plan . PlanID ) )
break
}
start := time . Now ( )
if err := t . fillOriginPlan ( plan ) ; err != nil {
log . Warn ( "failed to fill plan" , zap . Error ( err ) )
continue
}
t . compactionHandler . execCompactionPlan ( signal , plan )
2021-11-05 14:25:00 +00:00
2022-02-18 06:51:49 +00:00
log . Info ( "time cost of generating global compaction" , zap . Int64 ( "planID" , plan . PlanID ) , zap . Any ( "time cost" , time . Since ( start ) . Milliseconds ( ) ) ,
zap . Int64 ( "collectionID" , signal . collectionID ) , zap . String ( "channel" , group . channelName ) , zap . Int64 ( "partitionID" , group . partitionID ) )
}
2021-12-29 02:06:47 +00:00
}
2021-11-05 14:25:00 +00:00
}
2022-02-18 06:51:49 +00:00
// handleSignal processes segment flush caused partition-chan level compaction signal
2021-11-05 14:25:00 +00:00
func ( t * compactionTrigger ) handleSignal ( signal * compactionSignal ) {
t . forceMu . Lock ( )
defer t . forceMu . Unlock ( )
// 1. check whether segment's binlogs should be compacted or not
if t . compactionHandler . isFull ( ) {
return
}
segment := t . meta . GetSegment ( signal . segmentID )
2021-12-22 13:23:10 +00:00
if segment == nil {
log . Warn ( "segment in compaction signal not found in meta" , zap . Int64 ( "segmentID" , signal . segmentID ) )
return
}
2021-11-05 14:25:00 +00:00
channel := segment . GetInsertChannel ( )
partitionID := segment . GetPartitionID ( )
segments := t . getCandidateSegments ( channel , partitionID )
2022-06-15 15:14:10 +00:00
plans := t . generatePlans ( segments , signal . isForce , signal . compactTime )
2022-02-18 06:51:49 +00:00
log . Info ( "single generated plans" , zap . Int64 ( "collection" , signal . collectionID ) , zap . Int ( "plan count" , len ( plans ) ) )
for _ , plan := range plans {
if t . compactionHandler . isFull ( ) {
log . Warn ( "compaction plan skipped due to handler full" , zap . Int64 ( "collection" , signal . collectionID ) , zap . Int64 ( "planID" , plan . PlanID ) )
break
}
start := time . Now ( )
if err := t . fillOriginPlan ( plan ) ; err != nil {
log . Warn ( "failed to fill plan" , zap . Error ( err ) )
continue
}
t . compactionHandler . execCompactionPlan ( signal , plan )
2021-11-05 14:25:00 +00:00
2022-02-18 06:51:49 +00:00
log . Info ( "time cost of generating compaction" , zap . Int64 ( "planID" , plan . PlanID ) , zap . Any ( "time cost" , time . Since ( start ) . Milliseconds ( ) ) ,
zap . Int64 ( "collectionID" , signal . collectionID ) , zap . String ( "channel" , channel ) , zap . Int64 ( "partitionID" , partitionID ) )
2021-11-05 14:25:00 +00:00
}
}
2022-06-15 15:14:10 +00:00
func ( t * compactionTrigger ) generatePlans ( segments [ ] * SegmentInfo , force bool , compactTime * compactTime ) [ ] * datapb . CompactionPlan {
2022-02-18 06:51:49 +00:00
// find segments need internal compaction
2022-06-15 15:14:10 +00:00
// TODO add low priority candidates, for example if the segment is smaller than full 0.9 * max segment size but larger than small segment boundary, we only execute compaction when there are no compaction running actively
var prioritizedCandidates [ ] * SegmentInfo
var smallCandidates [ ] * SegmentInfo
// TODO, currently we lack of the measurement of data distribution, there should be another compaction help on redistributing segment based on scalar/vector field distribution
2022-02-18 06:51:49 +00:00
for _ , segment := range segments {
segment := segment . ShadowClone ( )
2022-06-15 15:14:10 +00:00
// TODO should we trigger compaction periodically even if the segment has no obvious reason to be compacted?
if force || t . ShouldDoSingleCompaction ( segment , compactTime ) {
prioritizedCandidates = append ( prioritizedCandidates , segment )
} else if t . isSmallSegment ( segment ) {
smallCandidates = append ( smallCandidates , segment )
2022-02-18 06:51:49 +00:00
}
}
var plans [ ] * datapb . CompactionPlan
2022-06-15 15:14:10 +00:00
// sort segment from large to small
sort . Slice ( prioritizedCandidates , func ( i , j int ) bool {
if prioritizedCandidates [ i ] . getSegmentSize ( ) != prioritizedCandidates [ i ] . getSegmentSize ( ) {
return prioritizedCandidates [ i ] . getSegmentSize ( ) > prioritizedCandidates [ i ] . getSegmentSize ( )
}
return prioritizedCandidates [ i ] . GetID ( ) < prioritizedCandidates [ j ] . GetID ( )
} )
2022-02-18 06:51:49 +00:00
2022-06-15 15:14:10 +00:00
sort . Slice ( smallCandidates , func ( i , j int ) bool {
if smallCandidates [ i ] . getSegmentSize ( ) != smallCandidates [ i ] . getSegmentSize ( ) {
return smallCandidates [ i ] . getSegmentSize ( ) > smallCandidates [ i ] . getSegmentSize ( )
}
return smallCandidates [ i ] . GetID ( ) < smallCandidates [ j ] . GetID ( )
} )
// greedy pick from large segment to small, the goal is to fill each segment to reach 512M
// we must ensure all prioritized candidates is in a plan
//TODO the compaction policy should consider segment with similar timestamp together so timetravel and data expiration could work better.
//TODO the compaction selection policy should consider if compaction workload is high
for len ( prioritizedCandidates ) > 0 {
2022-02-18 06:51:49 +00:00
var bucket [ ] * SegmentInfo
2022-06-15 15:14:10 +00:00
// pop out the first element
segment := prioritizedCandidates [ 0 ]
2022-02-18 06:51:49 +00:00
bucket = append ( bucket , segment )
2022-06-15 15:14:10 +00:00
prioritizedCandidates = prioritizedCandidates [ 1 : ]
// only do single file compaction if segment is already large enough
if segment . getSegmentSize ( ) < int64 ( Params . DataCoordCfg . SegmentMaxSize ) * 1024 * 1024 {
var result [ ] * SegmentInfo
free := int64 ( Params . DataCoordCfg . SegmentMaxSize ) * 1024 * 1024 - segment . getSegmentSize ( )
maxNum := Params . DataCoordCfg . MaxSegmentToMerge - 1
prioritizedCandidates , result , free = greedySelect ( prioritizedCandidates , free , maxNum )
bucket = append ( bucket , result ... )
maxNum -= len ( result )
if maxNum > 0 {
smallCandidates , result , _ = greedySelect ( smallCandidates , free , maxNum )
bucket = append ( bucket , result ... )
}
}
// since this is priority compaction, we will execute even if there is only segment
plans = append ( plans , segmentsToPlan ( bucket , compactTime ) )
2022-02-18 06:51:49 +00:00
}
2022-06-15 15:14:10 +00:00
// check if there are small candidates left can be merged into large segments
for len ( smallCandidates ) > 0 {
var bucket [ ] * SegmentInfo
// pop out the first element
segment := smallCandidates [ 0 ]
bucket = append ( bucket , segment )
smallCandidates = smallCandidates [ 1 : ]
var result [ ] * SegmentInfo
free := int64 ( Params . DataCoordCfg . SegmentMaxSize * 1024 * 1024 ) - segment . getSegmentSize ( )
// for small segment merge, we pick one largest segment and merge as much as small segment together with it
// Why reverse? try to merge as many segments as expected.
// for instance, if a 255M and 255M is the largest small candidates, they will never be merged because of the MinSegmentToMerge limit.
smallCandidates , result , _ = reverseGreedySelect ( smallCandidates , free , Params . DataCoordCfg . MaxSegmentToMerge - 1 )
bucket = append ( bucket , result ... )
2022-02-18 06:51:49 +00:00
2022-06-15 15:14:10 +00:00
// only merge if candidate number is large than MinSegmentToMerge
if len ( bucket ) >= Params . DataCoordCfg . MinSegmentToMerge {
plans = append ( plans , segmentsToPlan ( bucket , compactTime ) )
}
2022-02-18 06:51:49 +00:00
}
return plans
}
2022-06-15 15:14:10 +00:00
func segmentsToPlan ( segments [ ] * SegmentInfo , compactTime * compactTime ) * datapb . CompactionPlan {
2022-02-18 06:51:49 +00:00
plan := & datapb . CompactionPlan {
2022-06-15 15:14:10 +00:00
Timetravel : compactTime . travelTime ,
2022-02-18 06:51:49 +00:00
Type : datapb . CompactionType_MixCompaction ,
Channel : segments [ 0 ] . GetInsertChannel ( ) ,
}
for _ , s := range segments {
segmentBinlogs := & datapb . CompactionSegmentBinlogs {
SegmentID : s . GetID ( ) ,
FieldBinlogs : s . GetBinlogs ( ) ,
Field2StatslogPaths : s . GetStatslogs ( ) ,
Deltalogs : s . GetDeltalogs ( ) ,
}
plan . SegmentBinlogs = append ( plan . SegmentBinlogs , segmentBinlogs )
}
return plan
}
2022-06-15 15:14:10 +00:00
func greedySelect ( candidates [ ] * SegmentInfo , free int64 , maxSegment int ) ( [ ] * SegmentInfo , [ ] * SegmentInfo , int64 ) {
var result [ ] * SegmentInfo
for i := 0 ; i < len ( candidates ) ; {
candidate := candidates [ i ]
if len ( result ) < maxSegment && candidate . getSegmentSize ( ) < free {
result = append ( result , candidate )
free -= candidate . getSegmentSize ( )
candidates = append ( candidates [ : i ] , candidates [ i + 1 : ] ... )
} else {
i ++
}
}
return candidates , result , free
}
func reverseGreedySelect ( candidates [ ] * SegmentInfo , free int64 , maxSegment int ) ( [ ] * SegmentInfo , [ ] * SegmentInfo , int64 ) {
2022-02-18 06:51:49 +00:00
var result [ ] * SegmentInfo
2022-06-15 15:14:10 +00:00
for i := len ( candidates ) - 1 ; i >= 0 ; i -- {
candidate := candidates [ i ]
if ( len ( result ) < maxSegment ) && ( candidate . getSegmentSize ( ) < free ) {
result = append ( result , candidate )
free -= candidate . getSegmentSize ( )
candidates = append ( candidates [ : i ] , candidates [ i + 1 : ] ... )
}
2022-02-18 06:51:49 +00:00
}
2022-06-15 15:14:10 +00:00
return candidates , result , free
2021-11-05 14:25:00 +00:00
}
func ( t * compactionTrigger ) getCandidateSegments ( channel string , partitionID UniqueID ) [ ] * SegmentInfo {
segments := t . meta . GetSegmentsByChannel ( channel )
2022-02-18 06:51:49 +00:00
var res [ ] * SegmentInfo
2021-11-05 14:25:00 +00:00
for _ , s := range segments {
2021-12-14 05:55:07 +00:00
if ! isFlush ( s ) || s . GetInsertChannel ( ) != channel ||
2022-06-20 13:56:12 +00:00
s . GetPartitionID ( ) != partitionID || s . isCompacting || t . segRefer . HasSegmentLock ( s . ID ) {
2021-11-05 14:25:00 +00:00
continue
}
res = append ( res , s )
}
return res
}
2022-02-18 06:51:49 +00:00
func ( t * compactionTrigger ) isSmallSegment ( segment * SegmentInfo ) bool {
2022-06-15 15:14:10 +00:00
return segment . getSegmentSize ( ) < int64 ( Params . DataCoordCfg . SegmentMaxSize * Params . DataCoordCfg . SegmentSmallProportion * 1024 * 1024 )
2021-11-05 14:25:00 +00:00
}
func ( t * compactionTrigger ) fillOriginPlan ( plan * datapb . CompactionPlan ) error {
// TODO context
2021-12-14 05:55:07 +00:00
id , err := t . allocator . allocID ( context . TODO ( ) )
if err != nil {
return err
}
ts , err := t . allocator . allocTimestamp ( context . TODO ( ) )
2021-11-05 14:25:00 +00:00
if err != nil {
return err
}
plan . PlanID = id
2021-12-14 05:55:07 +00:00
plan . StartTime = ts
2022-06-15 15:14:10 +00:00
plan . TimeoutInSeconds = Params . DataCoordCfg . CompactionTimeoutInSeconds
2021-11-05 14:25:00 +00:00
return nil
}
2022-06-15 15:14:10 +00:00
func ( t * compactionTrigger ) ShouldDoSingleCompaction ( segment * SegmentInfo , compactTime * compactTime ) bool {
// count all the binlog file count
var totalLogNum int
for _ , binlogs := range segment . GetBinlogs ( ) {
totalLogNum += len ( binlogs . GetBinlogs ( ) )
}
for _ , deltaLogs := range segment . GetDeltalogs ( ) {
totalLogNum += len ( deltaLogs . GetBinlogs ( ) )
}
for _ , statsLogs := range segment . GetStatslogs ( ) {
totalLogNum += len ( statsLogs . GetBinlogs ( ) )
}
// avoid segment has too many bin logs and the etcd meta is too large, force trigger compaction
if totalLogNum > int ( Params . DataCoordCfg . SingleCompactionBinlogMaxNum ) {
log . Info ( "total binlog number is too much, trigger compaction" , zap . Int64 ( "segment" , segment . ID ) ,
zap . Int ( "Delta logs" , len ( segment . GetDeltalogs ( ) ) ) , zap . Int ( "Bin Logs" , len ( segment . GetBinlogs ( ) ) ) , zap . Int ( "Stat logs" , len ( segment . GetStatslogs ( ) ) ) )
return true
}
// if expire time is enabled, put segment into compaction candidate
totalExpiredSize := int64 ( 0 )
for _ , binlogs := range segment . GetBinlogs ( ) {
for _ , l := range binlogs . GetBinlogs ( ) {
// TODO, we should probably estimate expired log entries by total rows in binlog and the ralationship of timeTo, timeFrom and expire time
if l . TimestampTo < compactTime . expireTime {
totalExpiredSize += l . GetLogSize ( )
}
}
}
if totalExpiredSize > Params . DataCoordCfg . SingleCompactionExpiredLogMaxSize {
log . Info ( "total expired entities is too much, trigger compation" , zap . Int64 ( "segment" , segment . ID ) , zap . Int64 ( "expired log size" , totalExpiredSize ) )
return true
}
2021-11-05 14:25:00 +00:00
// single compaction only merge insert and delta log beyond the timetravel
// segment's insert binlogs dont have time range info, so we wait until the segment's last expire time is less than timetravel
// to ensure that all insert logs is beyond the timetravel.
// TODO: add meta in insert binlog
2022-06-15 15:14:10 +00:00
if segment . LastExpireTime >= compactTime . travelTime {
log . Debug ( "compaction is not triggered" , zap . Int64 ( "segment" , segment . ID ) , zap . Int64 ( "expired log size" , totalExpiredSize ) ,
zap . Uint64 ( "Expire" , segment . LastExpireTime ) , zap . Uint64 ( "Travel" , compactTime . travelTime ) )
2021-11-05 14:25:00 +00:00
return false
}
totalDeletedRows := 0
totalDeleteLogSize := int64 ( 0 )
2022-06-15 15:14:10 +00:00
for _ , deltaLogs := range segment . GetDeltalogs ( ) {
for _ , l := range deltaLogs . GetBinlogs ( ) {
if l . TimestampTo < compactTime . travelTime {
2021-12-19 12:00:42 +00:00
totalDeletedRows += int ( l . GetEntriesNum ( ) )
totalDeleteLogSize += l . GetLogSize ( )
}
2021-11-05 14:25:00 +00:00
}
}
// currently delta log size and delete ratio policy is applied
2022-06-15 15:14:10 +00:00
if float32 ( totalDeletedRows ) / float32 ( segment . NumOfRows ) >= Params . DataCoordCfg . SingleCompactionRatioThreshold || totalDeleteLogSize > Params . DataCoordCfg . SingleCompactionDeltaLogMaxSize {
log . Info ( "total delete entities is too much, trigger compation" , zap . Int64 ( "segment" , segment . ID ) ,
zap . Int ( "deleted rows" , totalDeletedRows ) , zap . Int64 ( "delete log size" , totalDeleteLogSize ) )
return true
2022-02-18 06:51:49 +00:00
}
2022-06-15 15:14:10 +00:00
log . Debug ( "compaction is not triggered" , zap . Int64 ( "segment" , segment . ID ) , zap . Int64 ( "expired log size" , totalExpiredSize ) ,
zap . Int ( "deleted rows" , totalDeletedRows ) , zap . Int64 ( "delete log size" , totalDeleteLogSize ) )
2022-02-18 06:51:49 +00:00
return false
}
2021-12-14 05:55:07 +00:00
func isFlush ( segment * SegmentInfo ) bool {
return segment . GetState ( ) == commonpb . SegmentState_Flushed || segment . GetState ( ) == commonpb . SegmentState_Flushing
}