2021-11-10 11:03:38 +00:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
2022-05-31 08:36:03 +00:00
2021-11-10 11:03:38 +00:00
2023-01-04 11:37:36 +00:00
2021-11-10 11:03:38 +00:00
2023-02-26 03:31:49 +00:00
2023-03-04 15:21:50 +00:00
minio "github.com/minio/minio-go/v7"
2023-01-06 06:33:36 +00:00
2023-03-04 15:21:50 +00:00
2023-01-06 06:33:36 +00:00
2023-01-04 11:37:36 +00:00
2023-01-06 06:33:36 +00:00
2023-03-04 15:21:50 +00:00
2023-01-04 11:37:36 +00:00
2023-01-06 06:33:36 +00:00
2023-03-04 15:21:50 +00:00
2023-01-06 06:33:36 +00:00
kvmocks "github.com/milvus-io/milvus/internal/kv/mocks"
2023-01-04 11:37:36 +00:00
2023-01-06 06:33:36 +00:00
catalogmocks "github.com/milvus-io/milvus/internal/metastore/mocks"
2023-01-04 11:37:36 +00:00
2023-03-04 15:21:50 +00:00
2021-11-10 11:03:38 +00:00
2023-03-04 15:21:50 +00:00
2022-07-22 14:10:28 +00:00
2021-11-10 11:03:38 +00:00
2023-03-02 03:49:47 +00:00
type GarbageCollectorSuite struct {
mockChunkManager *mocks.ChunkManager
gc *garbageCollector
func (s *GarbageCollectorSuite) SetupTest() {
meta, err := newMemoryMeta()
s.mockChunkManager = &mocks.ChunkManager{}
s.gc = newGarbageCollector(
meta, newMockHandler(), GcOption{
cli: s.mockChunkManager,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
func (s *GarbageCollectorSuite) TearDownTest() {
s.mockChunkManager = nil
s.gc = nil
func (s *GarbageCollectorSuite) TestBasicOperation() {
s.Run("normal_gc", func() {
gc := s.gc
s.mockChunkManager.EXPECT().ListWithPrefix(mock.Anything, mock.AnythingOfType("string"), mock.AnythingOfType("bool")).
Return([]string{}, []time.Time{}, nil)
// make ticker run at least once
time.Sleep(time.Millisecond * 20)
s.NotPanics(func() {
s.Run("nil_client", func() {
// initial a new garbageCollector here
gc := newGarbageCollector(nil, newMockHandler(), GcOption{
cli: nil,
enabled: true,
s.NotPanics(func() {
s.NotPanics(func() {
func (s *GarbageCollectorSuite) TestScan() {
s.Run("listCollectionPrefix_fails", func() {
s.mockChunkManager.ExpectedCalls = nil
s.mockChunkManager.EXPECT().ListWithPrefix(mock.Anything, mock.AnythingOfType("string"), mock.AnythingOfType("bool")).
Return(nil, nil, errors.New("mocked"))
s.mockChunkManager.AssertNotCalled(s.T(), "Remove", mock.Anything, mock.Anything)
s.Run("collectionPrefix_invalid", func() {
s.mockChunkManager.ExpectedCalls = nil
s.mockChunkManager.EXPECT().ListWithPrefix(mock.Anything, mock.AnythingOfType("string"), mock.AnythingOfType("bool")).
Return([]string{"files/insert_log/1/", "files/bad_prefix", "files/insert_log/string/"}, lo.RepeatBy(3, func(_ int) time.Time {
return time.Now().Add(-time.Hour)
}), nil)*/
logTypes := []string{"files/insert_log/", "files/stats_log/", "files/delta_log/"}
for _, logType := range logTypes {
validSubPath := "1/2/3/100/2000"
if logType == "files/delta_log/" {
validSubPath = "1/2/3/2000"
s.mockChunkManager.EXPECT().ListWithPrefix(mock.Anything, logType, false).
Return([]string{path.Join(logType, "1") + "/", path.Join(logType, "2") + "/", path.Join(logType, "string") + "/", "files/badprefix/"}, lo.RepeatBy(4, func(_ int) time.Time { return time.Now() }), nil)
s.mockChunkManager.EXPECT().ListWithPrefix(mock.Anything, path.Join(logType, "1")+"/", true).
Return([]string{path.Join(logType, validSubPath)}, []time.Time{time.Now().Add(time.Hour * -48)}, nil)
s.mockChunkManager.EXPECT().Remove(mock.Anything, path.Join(logType, validSubPath)).Return(nil)
s.gc.option.collValidator = func(collID int64) bool {
return collID == 1
//s.mockChunkManager.AssertNotCalled(s.T(), "Remove", mock.Anything, mock.Anything)
s.Run("fileScan_fails", func() {
s.mockChunkManager.ExpectedCalls = nil
s.mockChunkManager.Calls = nil
isCollPrefix := func(prefix string) bool {
return lo.Contains([]string{"files/insert_log/", "files/stats_log/", "files/delta_log/"}, prefix)
s.mockChunkManager.EXPECT().ListWithPrefix(mock.Anything, mock.AnythingOfType("string"), mock.AnythingOfType("bool")).Call.Return(
func(_ context.Context, prefix string, recursive bool) []string {
if isCollPrefix(prefix) {
return []string{path.Join(prefix, "1")}
return nil
func(_ context.Context, prefix string, recursive bool) []time.Time {
if isCollPrefix(prefix) {
return []time.Time{time.Now()}
return nil
func(_ context.Context, prefix string, recursive bool) error {
if isCollPrefix(prefix) {
return nil
return errors.New("mocked")
s.gc.option.collValidator = func(collID int64) bool {
return true
s.mockChunkManager.AssertNotCalled(s.T(), "Remove", mock.Anything, mock.Anything)
func TestGarbageCollectorSuite(t *testing.T) {
suite.Run(t, new(GarbageCollectorSuite))
2021-11-10 11:03:38 +00:00
func Test_garbageCollector_basic(t *testing.T) {
bucketName := `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
rootPath := `gc` + funcutil.RandomString(8)
//TODO change to Params
2021-11-24 01:55:15 +00:00
cli, _, _, _, _, err := initUtOSSEnv(bucketName, rootPath, 0)
2021-11-10 11:03:38 +00:00
require.NoError(t, err)
2022-11-03 06:41:35 +00:00
meta, err := newMemoryMeta()
2021-11-10 11:03:38 +00:00
assert.Nil(t, err)
t.Run("normal gc", func(t *testing.T) {
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2021-11-10 11:03:38 +00:00
cli: cli,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
time.Sleep(time.Millisecond * 20)
assert.NotPanics(t, func() {
t.Run("with nil cli", func(t *testing.T) {
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2021-11-10 11:03:38 +00:00
cli: nil,
enabled: true,
checkInterval: time.Millisecond * 10,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
assert.NotPanics(t, func() {
assert.NotPanics(t, func() {
2023-03-02 03:49:47 +00:00
2021-11-10 11:03:38 +00:00
2021-11-24 01:55:15 +00:00
func validateMinioPrefixElements(t *testing.T, cli *minio.Client, bucketName string, prefix string, elements []string) {
var current []string
for info := range cli.ListObjects(context.TODO(), bucketName, minio.ListObjectsOptions{Prefix: prefix, Recursive: true}) {
current = append(current, info.Key)
assert.ElementsMatch(t, elements, current)
2021-11-10 11:03:38 +00:00
func Test_garbageCollector_scan(t *testing.T) {
bucketName := `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
rootPath := `gc` + funcutil.RandomString(8)
//TODO change to Params
2021-11-24 01:55:15 +00:00
cli, inserts, stats, delta, others, err := initUtOSSEnv(bucketName, rootPath, 4)
2021-11-10 11:03:38 +00:00
require.NoError(t, err)
2022-11-03 06:41:35 +00:00
meta, err := newMemoryMeta()
2021-11-10 11:03:38 +00:00
assert.Nil(t, err)
2022-05-31 08:36:03 +00:00
t.Run("key is reference", func(t *testing.T) {
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2022-05-31 08:36:03 +00:00
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
2022-07-22 14:10:28 +00:00
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
2022-05-31 08:36:03 +00:00
2021-11-10 11:03:38 +00:00
t.Run("missing all but save tolerance", func(t *testing.T) {
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2021-11-10 11:03:38 +00:00
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
2022-07-22 14:10:28 +00:00
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
2021-11-24 01:55:15 +00:00
2021-11-10 11:03:38 +00:00
2021-11-16 06:23:21 +00:00
t.Run("hit, no gc", func(t *testing.T) {
2022-09-26 10:06:54 +00:00
segment := buildSegment(1, 10, 100, "ch", false)
2021-11-10 11:03:38 +00:00
segment.State = commonpb.SegmentState_Flushed
2021-12-19 12:00:42 +00:00
segment.Binlogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, inserts[0])}
segment.Statslogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, stats[0])}
segment.Deltalogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, delta[0])}
2021-11-10 11:03:38 +00:00
err = meta.AddSegment(segment)
require.NoError(t, err)
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2021-11-10 11:03:38 +00:00
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: time.Hour * 24,
2022-07-22 14:10:28 +00:00
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
2021-11-10 11:03:38 +00:00
t.Run("dropped gc one", func(t *testing.T) {
2022-09-26 10:06:54 +00:00
segment := buildSegment(1, 10, 100, "ch", false)
2021-11-10 11:03:38 +00:00
segment.State = commonpb.SegmentState_Dropped
2021-11-16 06:23:21 +00:00
segment.DroppedAt = uint64(time.Now().Add(-time.Hour).UnixNano())
2021-12-19 12:00:42 +00:00
segment.Binlogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, inserts[0])}
segment.Statslogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, stats[0])}
segment.Deltalogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, delta[0])}
2021-11-10 11:03:38 +00:00
err = meta.AddSegment(segment)
require.NoError(t, err)
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2021-11-10 11:03:38 +00:00
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: time.Hour * 24,
dropTolerance: 0,
2021-11-23 03:23:15 +00:00
2022-07-22 14:10:28 +00:00
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
2021-11-10 11:03:38 +00:00
t.Run("missing gc all", func(t *testing.T) {
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2021-11-10 11:03:38 +00:00
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: 0,
dropTolerance: 0,
2021-11-24 01:55:15 +00:00
2022-09-26 10:06:54 +00:00
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
t.Run("list object with error", func(t *testing.T) {
2023-01-04 11:37:36 +00:00
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
2022-09-26 10:06:54 +00:00
cli: cli,
enabled: true,
checkInterval: time.Minute * 30,
missingTolerance: 0,
dropTolerance: 0,
2022-07-26 11:32:30 +00:00
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2])
2022-07-22 14:10:28 +00:00
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
2021-11-10 11:03:38 +00:00
2022-07-22 14:10:28 +00:00
cleanupOSS(cli.Client, bucketName, rootPath)
2021-11-10 11:03:38 +00:00
// initialize unit test sso env
2022-07-22 14:10:28 +00:00
func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, inserts []string, stats []string, delta []string, other []string, err error) {
2021-11-10 11:03:38 +00:00
2022-11-17 10:59:09 +00:00
cli, err := minio.New(Params.MinioCfg.Address.GetValue(), &minio.Options{
Creds: credentials.NewStaticV4(Params.MinioCfg.AccessKeyID.GetValue(), Params.MinioCfg.SecretAccessKey.GetValue(), ""),
Secure: Params.MinioCfg.UseSSL.GetAsBool(),
2021-11-10 11:03:38 +00:00
if err != nil {
2021-11-24 01:55:15 +00:00
return nil, nil, nil, nil, nil, err
2021-11-10 11:03:38 +00:00
has, err := cli.BucketExists(context.TODO(), bucket)
if err != nil {
2021-11-24 01:55:15 +00:00
return nil, nil, nil, nil, nil, err
2021-11-10 11:03:38 +00:00
if !has {
err = cli.MakeBucket(context.TODO(), bucket, minio.MakeBucketOptions{})
if err != nil {
2021-11-24 01:55:15 +00:00
return nil, nil, nil, nil, nil, err
2021-11-10 11:03:38 +00:00
2021-11-24 01:55:15 +00:00
inserts = make([]string, 0, n)
stats = make([]string, 0, n)
delta = make([]string, 0, n)
other = make([]string, 0, n)
2021-11-10 11:03:38 +00:00
content := []byte("test")
for i := 0; i < n; i++ {
reader := bytes.NewReader(content)
2022-07-26 11:32:30 +00:00
// collID/partID/segID/fieldID/fileName
// [str]/id/id/string/string
2022-09-30 06:18:55 +00:00
var token string
2022-05-31 08:36:03 +00:00
if i == 1 {
2022-09-30 06:18:55 +00:00
token = path.Join(strconv.Itoa(i), strconv.Itoa(i), "error-seg-id", funcutil.RandomString(8), funcutil.RandomString(8))
} else {
2022-11-18 07:35:09 +00:00
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), funcutil.RandomString(8), funcutil.RandomString(8))
2022-05-31 08:36:03 +00:00
2021-11-24 01:55:15 +00:00
// insert
filePath := path.Join(root, insertLogPrefix, token)
info, err := cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
inserts = append(inserts, info.Key)
// stats
filePath = path.Join(root, statsLogPrefix, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
stats = append(stats, info.Key)
// delta
2022-09-30 06:18:55 +00:00
if i == 1 {
token = path.Join(strconv.Itoa(i), strconv.Itoa(i), "error-seg-id", funcutil.RandomString(8))
} else {
2022-11-18 07:35:09 +00:00
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), funcutil.RandomString(8))
2022-09-30 06:18:55 +00:00
2021-11-24 01:55:15 +00:00
filePath = path.Join(root, deltaLogPrefix, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
delta = append(delta, info.Key)
// other
filePath = path.Join(root, `indexes`, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
2021-11-10 11:03:38 +00:00
if err != nil {
2021-11-24 01:55:15 +00:00
return nil, nil, nil, nil, nil, err
2021-11-10 11:03:38 +00:00
2021-11-24 01:55:15 +00:00
other = append(other, info.Key)
2021-11-10 11:03:38 +00:00
2022-07-22 14:10:28 +00:00
mcm = &storage.MinioChunkManager{
Client: cli,
2022-09-29 08:18:56 +00:00
mcm.SetVar(bucket, root)
2022-07-22 14:10:28 +00:00
return mcm, inserts, stats, delta, other, nil
2021-11-10 11:03:38 +00:00
func cleanupOSS(cli *minio.Client, bucket, root string) {
ch := cli.ListObjects(context.TODO(), bucket, minio.ListObjectsOptions{Prefix: root, Recursive: true})
cli.RemoveObjects(context.TODO(), bucket, ch, minio.RemoveObjectsOptions{})
cli.RemoveBucket(context.TODO(), bucket)
2023-01-04 11:37:36 +00:00
2023-01-06 06:33:36 +00:00
func createMetaForRecycleUnusedIndexes(catalog metastore.DataCoordCatalog) *meta {
2023-01-04 11:37:36 +00:00
var (
ctx = context.Background()
collID = UniqueID(100)
//partID = UniqueID(200)
fieldID = UniqueID(300)
indexID = UniqueID(400)
return &meta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
collections: nil,
segments: nil,
channelCPs: nil,
chunkManager: nil,
indexes: map[UniqueID]map[UniqueID]*model.Index{
collID: {
indexID: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID,
IndexID: indexID,
IndexName: "_default_idx",
IsDeleted: false,
CreateTime: 10,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
indexID + 1: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID + 1,
IndexID: indexID + 1,
IndexName: "_default_idx_101",
IsDeleted: true,
CreateTime: 0,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
collID + 1: {
indexID + 10: {
TenantID: "",
CollectionID: collID + 1,
FieldID: fieldID + 10,
IndexID: indexID + 10,
IndexName: "index",
IsDeleted: true,
CreateTime: 10,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
buildID2SegmentIndex: nil,
func TestGarbageCollector_recycleUnusedIndexes(t *testing.T) {
t.Run("success", func(t *testing.T) {
2023-01-06 06:33:36 +00:00
catalog := catalogmocks.NewDataCoordCatalog(t)
2023-01-04 11:37:36 +00:00
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaForRecycleUnusedIndexes(catalog),
2023-01-04 11:37:36 +00:00
t.Run("fail", func(t *testing.T) {
2023-01-06 06:33:36 +00:00
catalog := catalogmocks.NewDataCoordCatalog(t)
2023-01-04 11:37:36 +00:00
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaForRecycleUnusedIndexes(catalog),
2023-01-04 11:37:36 +00:00
2023-01-06 06:33:36 +00:00
func createMetaForRecycleUnusedSegIndexes(catalog metastore.DataCoordCatalog) *meta {
2023-01-04 11:37:36 +00:00
var (
ctx = context.Background()
collID = UniqueID(100)
partID = UniqueID(200)
//fieldID = UniqueID(300)
indexID = UniqueID(400)
segID = UniqueID(500)
return &meta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
collections: nil,
segments: &SegmentsInfo{
segments: map[UniqueID]*SegmentInfo{
segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 1026,
State: commonpb.SegmentState_Flushed,
segmentIndexes: map[UniqueID]*model.SegmentIndex{
indexID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
segID + 1: {
SegmentInfo: nil,
segmentIndexes: map[UniqueID]*model.SegmentIndex{
indexID: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
channelCPs: nil,
chunkManager: nil,
indexes: map[UniqueID]map[UniqueID]*model.Index{},
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
buildID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
buildID + 1: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
t.Run("success", func(t *testing.T) {
2023-01-06 06:33:36 +00:00
catalog := catalogmocks.NewDataCoordCatalog(t)
2023-01-04 11:37:36 +00:00
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaForRecycleUnusedSegIndexes(catalog),
2023-01-04 11:37:36 +00:00
t.Run("fail", func(t *testing.T) {
2023-01-06 06:33:36 +00:00
catalog := catalogmocks.NewDataCoordCatalog(t)
2023-01-04 11:37:36 +00:00
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaForRecycleUnusedSegIndexes(catalog),
2023-01-04 11:37:36 +00:00
func createMetaTableForRecycleUnusedIndexFiles(catalog *datacoord.Catalog) *meta {
var (
ctx = context.Background()
collID = UniqueID(100)
partID = UniqueID(200)
//fieldID = UniqueID(300)
indexID = UniqueID(400)
segID = UniqueID(500)
buildID = UniqueID(600)
return &meta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
collections: nil,
segments: &SegmentsInfo{
segments: map[UniqueID]*SegmentInfo{
segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 1026,
State: commonpb.SegmentState_Flushed,
segmentIndexes: map[UniqueID]*model.SegmentIndex{
indexID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
segID + 1: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 1,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 1026,
State: commonpb.SegmentState_Flushed,
segmentIndexes: map[UniqueID]*model.SegmentIndex{
indexID: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_InProgress,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: nil,
IndexSize: 0,
WriteHandoff: false,
indexes: map[UniqueID]map[UniqueID]*model.Index{
collID: {
indexID: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID,
IndexID: indexID,
IndexName: "_default_idx",
IsDeleted: false,
CreateTime: 10,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
buildID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 0,
WriteHandoff: false,
buildID + 1: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 1026,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 1,
IndexVersion: 1,
IndexState: commonpb.IndexState_InProgress,
FailReason: "",
IsDeleted: false,
CreateTime: 10,
IndexFileKeys: nil,
IndexSize: 0,
WriteHandoff: false,
func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
t.Run("success", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
2023-01-04 11:37:36 +00:00
option: GcOption{
cli: cm,
t.Run("list fail", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return(nil, nil, errors.New("error"))
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
2023-01-04 11:37:36 +00:00
option: GcOption{
cli: cm,
t.Run("remove fail", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
2023-01-04 11:37:36 +00:00
option: GcOption{
cli: cm,
t.Run("remove with prefix fail", func(t *testing.T) {
cm := &mocks.ChunkManager{}
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return([]string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"}, nil, nil)
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(errors.New("error"))
gc := &garbageCollector{
2023-01-06 06:33:36 +00:00
meta: createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
2023-01-04 11:37:36 +00:00
option: GcOption{
cli: cm,
func TestGarbageCollector_clearETCD(t *testing.T) {
2023-01-06 06:33:36 +00:00
catalog := catalogmocks.NewDataCoordCatalog(t)
2023-03-09 06:13:52 +00:00
2023-01-06 06:33:36 +00:00
2023-01-04 11:37:36 +00:00
m := &meta{
2023-01-06 06:33:36 +00:00
catalog: catalog,
2023-03-04 15:21:50 +00:00
channelCPs: map[string]*msgpb.MsgPosition{
2023-01-06 13:33:36 +00:00
"dmlChannel": {
Timestamp: 1000,
2023-01-04 11:37:36 +00:00
segments: &SegmentsInfo{
segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 5000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 0,
segmentIndexes: map[UniqueID]*model.SegmentIndex{
indexID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 1024,
WriteHandoff: false,
segID + 1: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 1,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 5000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 0,
segmentIndexes: map[UniqueID]*model.SegmentIndex{
indexID: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file3", "file4"},
IndexSize: 1024,
WriteHandoff: false,
segID + 2: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 2,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 10000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 10,
CompactionFrom: []int64{segID, segID + 1},
segmentIndexes: map[UniqueID]*model.SegmentIndex{},
segID + 3: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 3,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65536,
DroppedAt: 10,
CompactionFrom: nil,
segmentIndexes: map[UniqueID]*model.SegmentIndex{},
segID + 4: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 4,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "",
NumOfRows: 12000,
State: commonpb.SegmentState_Flushed,
MaxRowNum: 65536,
DroppedAt: 10,
CompactionFrom: []int64{segID + 2, segID + 3},
segmentIndexes: map[UniqueID]*model.SegmentIndex{},
2023-01-06 13:33:36 +00:00
// before channel cp,
segID + 5: {
SegmentInfo: &datapb.SegmentInfo{
ID: segID + 5,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "dmlChannel",
NumOfRows: 2000,
State: commonpb.SegmentState_Dropped,
MaxRowNum: 65535,
DroppedAt: 0,
CompactionFrom: nil,
2023-03-04 15:21:50 +00:00
DmlPosition: &msgpb.MsgPosition{
2023-01-06 13:33:36 +00:00
Timestamp: 1200,
2023-01-04 11:37:36 +00:00
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
buildID: {
SegmentID: segID,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file1", "file2"},
IndexSize: 1024,
WriteHandoff: false,
buildID + 1: {
SegmentID: segID + 1,
CollectionID: collID,
PartitionID: partID,
NumRows: 5000,
IndexID: indexID,
BuildID: buildID + 1,
NodeID: 0,
IndexVersion: 1,
IndexState: commonpb.IndexState_Finished,
FailReason: "",
IsDeleted: false,
CreateTime: 0,
IndexFileKeys: []string{"file3", "file4"},
IndexSize: 1024,
WriteHandoff: false,
indexes: map[UniqueID]map[UniqueID]*model.Index{
collID: {
indexID: {
TenantID: "",
CollectionID: collID,
FieldID: fieldID,
IndexID: indexID,
IndexName: indexName,
IsDeleted: false,
CreateTime: 0,
TypeParams: nil,
IndexParams: nil,
IsAutoIndex: false,
UserIndexParams: nil,
collections: map[UniqueID]*collectionInfo{
collID: {
ID: collID,
Schema: &schemapb.CollectionSchema{
Name: "",
Description: "",
AutoID: false,
Fields: []*schemapb.FieldSchema{
FieldID: fieldID,
Name: "",
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_FloatVector,
TypeParams: nil,
IndexParams: nil,
AutoID: false,
State: 0,
Partitions: nil,
StartPositions: nil,
Properties: nil,
cm := &mocks.ChunkManager{}
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
gc := &garbageCollector{
option: GcOption{
cli: &mocks.ChunkManager{},
dropTolerance: 1,
meta: m,
handler: newMockHandlerWithMeta(m),
2023-03-03 06:13:49 +00:00
segA := gc.meta.GetSegment(segID)
2023-01-04 11:37:36 +00:00
assert.NotNil(t, segA)
2023-03-03 06:13:49 +00:00
segB := gc.meta.GetSegment(segID + 1)
2023-01-04 11:37:36 +00:00
assert.NotNil(t, segB)
2023-03-03 06:13:49 +00:00
segC := gc.meta.GetSegment(segID + 2)
2023-01-04 11:37:36 +00:00
assert.NotNil(t, segC)
2023-03-03 06:13:49 +00:00
segD := gc.meta.GetSegment(segID + 3)
2023-01-04 11:37:36 +00:00
assert.NotNil(t, segD)
2023-03-03 06:13:49 +00:00
segE := gc.meta.GetSegment(segID + 4)
2023-01-04 11:37:36 +00:00
assert.NotNil(t, segE)
2023-03-03 06:13:49 +00:00
segF := gc.meta.GetSegment(segID + 5)
2023-03-09 06:13:52 +00:00
assert.Nil(t, segF)
2023-01-04 11:37:36 +00:00
err := gc.meta.AddSegmentIndex(&model.SegmentIndex{
SegmentID: segID + 4,
CollectionID: collID,
PartitionID: partID,
NumRows: 12000,
IndexID: indexID,
BuildID: buildID + 4,
assert.NoError(t, err)
err = gc.meta.FinishTask(&indexpb.IndexTaskInfo{
BuildID: buildID + 4,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2", "file3", "file4"},
SerializedSize: 10240,
FailReason: "",
assert.NoError(t, err)
//segA := gc.meta.GetSegmentUnsafe(segID)
//assert.NotNil(t, segA)
//segB := gc.meta.GetSegmentUnsafe(segID + 1)
//assert.NotNil(t, segB)
2023-03-03 06:13:49 +00:00
segC = gc.meta.GetSegment(segID + 2)
2023-01-04 11:37:36 +00:00
assert.Nil(t, segC)
2023-03-03 06:13:49 +00:00
segD = gc.meta.GetSegment(segID + 3)
2023-01-04 11:37:36 +00:00
assert.Nil(t, segD)
2023-03-03 06:13:49 +00:00
segE = gc.meta.GetSegment(segID + 4)
2023-01-04 11:37:36 +00:00
assert.NotNil(t, segE)
2023-03-03 06:13:49 +00:00
segF = gc.meta.GetSegment(segID + 5)
2023-03-09 06:13:52 +00:00
assert.Nil(t, segF)
2023-01-04 11:37:36 +00:00
2023-03-03 06:13:49 +00:00
segA = gc.meta.GetSegment(segID)
2023-01-04 11:37:36 +00:00
assert.Nil(t, segA)
2023-03-03 06:13:49 +00:00
segB = gc.meta.GetSegment(segID + 1)
2023-01-04 11:37:36 +00:00
assert.Nil(t, segB)
2023-03-03 06:13:49 +00:00
segF = gc.meta.GetSegment(segID + 5)
2023-03-09 06:13:52 +00:00
assert.Nil(t, segF)
2023-01-06 13:33:36 +00:00
2023-01-04 11:37:36 +00:00