// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package datacoord import ( "context" "path" "sync" "time" "github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/proto/commonpb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/minio/minio-go/v7" "go.uber.org/zap" ) const ( //TODO silverxia change to configuration insertLogPrefix = `insert_log` statsLogPrefix = `stats_log` deltaLogPrefix = `delta_log` ) // GcOption garbage collection options type GcOption struct { cli *minio.Client // OSS client enabled bool // enable switch checkInterval time.Duration // each interval missingTolerance time.Duration // key missing in meta tolerace time dropTolerance time.Duration // dropped segment related key tolerance time bucketName string rootPath string } // garbageCollector handles garbage files in object storage // which could be dropped collection remanent or data node failure traces type garbageCollector struct { option GcOption meta *meta startOnce sync.Once stopOnce sync.Once wg sync.WaitGroup closeCh chan struct{} } // newGarbageCollector create garbage collector with meta and option func newGarbageCollector(meta *meta, opt GcOption) *garbageCollector { log.Info("GC with option", zap.Bool("enabled", opt.enabled), zap.Duration("interval", opt.checkInterval), zap.Duration("missingTolerance", opt.missingTolerance), zap.Duration("dropTolerance", opt.dropTolerance)) return &garbageCollector{ meta: meta, option: opt, closeCh: make(chan struct{}), } } // start a goroutine and perform gc check every `checkInterval` func (gc *garbageCollector) start() { if gc.option.enabled { if gc.option.cli == nil { log.Warn("DataCoord gc enabled, but SSO client is not provided") return } gc.startOnce.Do(func() { gc.wg.Add(1) go gc.work() }) } } // work contains actual looping check logic func (gc *garbageCollector) work() { defer gc.wg.Done() ticker := time.Tick(gc.option.checkInterval) for { select { case <-ticker: gc.clearEtcd() gc.scan() case <-gc.closeCh: log.Warn("garbage collector quit") return } } } func (gc *garbageCollector) close() { gc.stopOnce.Do(func() { close(gc.closeCh) gc.wg.Wait() }) } // scan load meta file info and compares OSS keys // if missing found, performs gc cleanup func (gc *garbageCollector) scan() { var v, m, e int valid := gc.meta.ListSegmentFiles() vm := make(map[string]struct{}) for _, k := range valid { vm[k.GetLogPath()] = struct{}{} } // walk only data cluster related prefixes prefixes := make([]string, 0, 3) prefixes = append(prefixes, path.Join(gc.option.rootPath, insertLogPrefix)) prefixes = append(prefixes, path.Join(gc.option.rootPath, statsLogPrefix)) prefixes = append(prefixes, path.Join(gc.option.rootPath, deltaLogPrefix)) for _, prefix := range prefixes { for info := range gc.option.cli.ListObjects(context.TODO(), gc.option.bucketName, minio.ListObjectsOptions{ Prefix: prefix, Recursive: true, }) { _, has := vm[info.Key] if has { v++ continue } m++ // not found in meta, check last modified time exceeds tolerance duration if time.Since(info.LastModified) > gc.option.missingTolerance { e++ // ignore error since it could be cleaned up next time _ = gc.option.cli.RemoveObject(context.TODO(), gc.option.bucketName, info.Key, minio.RemoveObjectOptions{}) } } } log.Warn("scan result", zap.Int("valid", v), zap.Int("missing", m), zap.Int("removed", e)) } func (gc *garbageCollector) clearEtcd() { drops := gc.meta.SelectSegments(func(segment *SegmentInfo) bool { return segment.GetState() == commonpb.SegmentState_Dropped }) for _, sinfo := range drops { if !gc.isExpire(sinfo.GetDroppedAt()) { continue } logs := getLogs(sinfo) if gc.removeLogs(logs) { _ = gc.meta.DropSegment(sinfo.GetID()) } } } func (gc *garbageCollector) isExpire(dropts Timestamp) bool { droptime := time.Unix(0, int64(dropts)) return time.Since(droptime) > gc.option.dropTolerance } func getLogs(sinfo *SegmentInfo) []*datapb.Binlog { var logs []*datapb.Binlog for _, flog := range sinfo.GetBinlogs() { logs = append(logs, flog.GetBinlogs()...) } for _, flog := range sinfo.GetStatslogs() { logs = append(logs, flog.GetBinlogs()...) } for _, flog := range sinfo.GetDeltalogs() { logs = append(logs, flog.GetBinlogs()...) } return logs } func (gc *garbageCollector) removeLogs(logs []*datapb.Binlog) bool { delFlag := true for _, l := range logs { err := gc.option.cli.RemoveObject(context.TODO(), gc.option.bucketName, l.GetLogPath(), minio.RemoveObjectOptions{}) errResp := minio.ToErrorResponse(err) if errResp.Code != "" && errResp.Code != "NoSuchKey" { delFlag = false } } return delFlag }