mirror of https://github.com/milvus-io/milvus.git
1677 lines
47 KiB
Go
1677 lines
47 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datacoord
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"os"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"github.com/minio/minio-go/v7"
|
|
"github.com/minio/minio-go/v7/pkg/credentials"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/mock"
|
|
"github.com/stretchr/testify/require"
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
kvmocks "github.com/milvus-io/milvus/internal/kv/mocks"
|
|
"github.com/milvus-io/milvus/internal/metastore"
|
|
"github.com/milvus-io/milvus/internal/metastore/kv/datacoord"
|
|
catalogmocks "github.com/milvus-io/milvus/internal/metastore/mocks"
|
|
"github.com/milvus-io/milvus/internal/metastore/model"
|
|
"github.com/milvus-io/milvus/internal/mocks"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
|
"github.com/milvus-io/milvus/pkg/util/lock"
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
)
|
|
|
|
func Test_garbageCollector_basic(t *testing.T) {
|
|
bucketName := `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
|
|
rootPath := `gc` + funcutil.RandomString(8)
|
|
// TODO change to Params
|
|
cli, _, _, _, _, err := initUtOSSEnv(bucketName, rootPath, 0)
|
|
require.NoError(t, err)
|
|
|
|
meta, err := newMemoryMeta()
|
|
assert.NoError(t, err)
|
|
|
|
t.Run("normal gc", func(t *testing.T) {
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: cli,
|
|
enabled: true,
|
|
checkInterval: time.Millisecond * 10,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
gc.start()
|
|
|
|
time.Sleep(time.Millisecond * 20)
|
|
assert.NotPanics(t, func() {
|
|
gc.close()
|
|
})
|
|
})
|
|
|
|
t.Run("with nil cli", func(t *testing.T) {
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: nil,
|
|
enabled: true,
|
|
checkInterval: time.Millisecond * 10,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
assert.NotPanics(t, func() {
|
|
gc.start()
|
|
})
|
|
|
|
assert.NotPanics(t, func() {
|
|
gc.close()
|
|
})
|
|
})
|
|
}
|
|
|
|
func validateMinioPrefixElements(t *testing.T, manager *storage.RemoteChunkManager, bucketName string, prefix string, elements []string) {
|
|
cli := manager.UnderlyingObjectStorage().(*storage.MinioObjectStorage).Client
|
|
var current []string
|
|
for info := range cli.ListObjects(context.TODO(), bucketName, minio.ListObjectsOptions{Prefix: prefix, Recursive: true}) {
|
|
current = append(current, info.Key)
|
|
}
|
|
assert.ElementsMatch(t, elements, current)
|
|
}
|
|
|
|
func Test_garbageCollector_scan(t *testing.T) {
|
|
bucketName := `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
|
|
rootPath := paramtable.Get().MinioCfg.RootPath.GetValue()
|
|
// TODO change to Params
|
|
cli, inserts, stats, delta, others, err := initUtOSSEnv(bucketName, rootPath, 4)
|
|
require.NoError(t, err)
|
|
|
|
meta, err := newMemoryMeta()
|
|
assert.NoError(t, err)
|
|
|
|
t.Run("key is reference", func(t *testing.T) {
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: cli,
|
|
enabled: true,
|
|
checkInterval: time.Minute * 30,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
gc.recycleUnusedBinlogFiles(context.TODO())
|
|
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
|
gc.close()
|
|
})
|
|
|
|
t.Run("missing all but save tolerance", func(t *testing.T) {
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: cli,
|
|
enabled: true,
|
|
checkInterval: time.Minute * 30,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
gc.recycleUnusedBinlogFiles(context.TODO())
|
|
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
|
|
|
gc.close()
|
|
})
|
|
t.Run("hit, no gc", func(t *testing.T) {
|
|
segment := buildSegment(1, 10, 100, "ch")
|
|
segment.State = commonpb.SegmentState_Flushed
|
|
segment.Binlogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, inserts[0])}
|
|
segment.Statslogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, stats[0])}
|
|
segment.Deltalogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, delta[0])}
|
|
err = meta.AddSegment(context.TODO(), segment)
|
|
require.NoError(t, err)
|
|
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: cli,
|
|
enabled: true,
|
|
checkInterval: time.Minute * 30,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
gc.start()
|
|
gc.recycleUnusedBinlogFiles(context.TODO())
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
|
|
|
gc.close()
|
|
})
|
|
|
|
t.Run("dropped gc one", func(t *testing.T) {
|
|
segment := buildSegment(1, 10, 100, "ch")
|
|
segment.State = commonpb.SegmentState_Dropped
|
|
segment.DroppedAt = uint64(time.Now().Add(-time.Hour).UnixNano())
|
|
segment.Binlogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, inserts[0])}
|
|
segment.Statslogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, stats[0])}
|
|
segment.Deltalogs = []*datapb.FieldBinlog{getFieldBinlogPaths(0, delta[0])}
|
|
|
|
err = meta.AddSegment(context.TODO(), segment)
|
|
require.NoError(t, err)
|
|
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: cli,
|
|
enabled: true,
|
|
checkInterval: time.Minute * 30,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: 0,
|
|
})
|
|
gc.recycleDroppedSegments(context.TODO())
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
|
|
|
gc.close()
|
|
})
|
|
t.Run("missing gc all", func(t *testing.T) {
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: cli,
|
|
enabled: true,
|
|
checkInterval: time.Minute * 30,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: 0,
|
|
dropTolerance: 0,
|
|
})
|
|
gc.start()
|
|
gc.recycleUnusedBinlogFiles(context.TODO())
|
|
gc.recycleDroppedSegments(context.TODO())
|
|
|
|
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
|
|
|
gc.close()
|
|
})
|
|
|
|
t.Run("list object with error", func(t *testing.T) {
|
|
gc := newGarbageCollector(meta, newMockHandler(), GcOption{
|
|
cli: cli,
|
|
enabled: true,
|
|
checkInterval: time.Minute * 30,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: 0,
|
|
dropTolerance: 0,
|
|
})
|
|
gc.start()
|
|
gc.recycleUnusedBinlogFiles(context.TODO())
|
|
|
|
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
|
|
validateMinioPrefixElements(t, cli, bucketName, path.Join(rootPath, `indexes`), others)
|
|
|
|
gc.close()
|
|
})
|
|
|
|
cleanupOSS(cli, bucketName, rootPath)
|
|
}
|
|
|
|
// initialize unit test sso env
|
|
func initUtOSSEnv(bucket, root string, n int) (mcm *storage.RemoteChunkManager, inserts []string, stats []string, delta []string, other []string, err error) {
|
|
paramtable.Init()
|
|
|
|
if Params.MinioCfg.UseSSL.GetAsBool() && len(Params.MinioCfg.SslCACert.GetValue()) > 0 {
|
|
err := os.Setenv("SSL_CERT_FILE", Params.MinioCfg.SslCACert.GetValue())
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
}
|
|
|
|
cli, err := minio.New(Params.MinioCfg.Address.GetValue(), &minio.Options{
|
|
Creds: credentials.NewStaticV4(Params.MinioCfg.AccessKeyID.GetValue(), Params.MinioCfg.SecretAccessKey.GetValue(), ""),
|
|
Secure: Params.MinioCfg.UseSSL.GetAsBool(),
|
|
})
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
has, err := cli.BucketExists(context.TODO(), bucket)
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
if !has {
|
|
err = cli.MakeBucket(context.TODO(), bucket, minio.MakeBucketOptions{})
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
}
|
|
inserts = make([]string, 0, n)
|
|
stats = make([]string, 0, n)
|
|
delta = make([]string, 0, n)
|
|
other = make([]string, 0, n)
|
|
|
|
content := []byte("test")
|
|
for i := 0; i < n; i++ {
|
|
reader := bytes.NewReader(content)
|
|
// collID/partID/segID/fieldID/fileName
|
|
// [str]/id/id/string/string
|
|
|
|
var token string
|
|
if i == 1 {
|
|
token = path.Join(strconv.Itoa(i), strconv.Itoa(i), "error-seg-id", strconv.Itoa(i), fmt.Sprint(rand.Int63()))
|
|
} else {
|
|
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), strconv.Itoa(i), fmt.Sprint(rand.Int63()))
|
|
}
|
|
// insert
|
|
filePath := path.Join(root, common.SegmentInsertLogPath, token)
|
|
info, err := cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
inserts = append(inserts, info.Key)
|
|
// stats
|
|
filePath = path.Join(root, common.SegmentStatslogPath, token)
|
|
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
stats = append(stats, info.Key)
|
|
|
|
// delta
|
|
if i == 1 {
|
|
token = path.Join(strconv.Itoa(i), strconv.Itoa(i), "error-seg-id", fmt.Sprint(rand.Int63()))
|
|
} else {
|
|
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), fmt.Sprint(rand.Int63()))
|
|
}
|
|
filePath = path.Join(root, common.SegmentDeltaLogPath, token)
|
|
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
delta = append(delta, info.Key)
|
|
|
|
// other
|
|
filePath = path.Join(root, `indexes`, token)
|
|
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
|
|
if err != nil {
|
|
return nil, nil, nil, nil, nil, err
|
|
}
|
|
other = append(other, info.Key)
|
|
}
|
|
mcm = storage.NewRemoteChunkManagerForTesting(
|
|
cli,
|
|
bucket,
|
|
root,
|
|
)
|
|
return mcm, inserts, stats, delta, other, nil
|
|
}
|
|
|
|
func cleanupOSS(chunkManager *storage.RemoteChunkManager, bucket, root string) {
|
|
cli := chunkManager.UnderlyingObjectStorage().(*storage.MinioObjectStorage).Client
|
|
ch := cli.ListObjects(context.TODO(), bucket, minio.ListObjectsOptions{Prefix: root, Recursive: true})
|
|
cli.RemoveObjects(context.TODO(), bucket, ch, minio.RemoveObjectsOptions{})
|
|
cli.RemoveBucket(context.TODO(), bucket)
|
|
}
|
|
|
|
func createMetaForRecycleUnusedIndexes(catalog metastore.DataCoordCatalog) *meta {
|
|
var (
|
|
ctx = context.Background()
|
|
collID = UniqueID(100)
|
|
// partID = UniqueID(200)
|
|
fieldID = UniqueID(300)
|
|
indexID = UniqueID(400)
|
|
)
|
|
return &meta{
|
|
RWMutex: lock.RWMutex{},
|
|
ctx: ctx,
|
|
catalog: catalog,
|
|
collections: nil,
|
|
segments: nil,
|
|
channelCPs: newChannelCps(),
|
|
chunkManager: nil,
|
|
indexMeta: &indexMeta{
|
|
catalog: catalog,
|
|
indexes: map[UniqueID]map[UniqueID]*model.Index{
|
|
collID: {
|
|
indexID: {
|
|
TenantID: "",
|
|
CollectionID: collID,
|
|
FieldID: fieldID,
|
|
IndexID: indexID,
|
|
IndexName: "_default_idx",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
TypeParams: nil,
|
|
IndexParams: nil,
|
|
IsAutoIndex: false,
|
|
UserIndexParams: nil,
|
|
},
|
|
indexID + 1: {
|
|
TenantID: "",
|
|
CollectionID: collID,
|
|
FieldID: fieldID + 1,
|
|
IndexID: indexID + 1,
|
|
IndexName: "_default_idx_101",
|
|
IsDeleted: true,
|
|
CreateTime: 0,
|
|
TypeParams: nil,
|
|
IndexParams: nil,
|
|
IsAutoIndex: false,
|
|
UserIndexParams: nil,
|
|
},
|
|
},
|
|
collID + 1: {
|
|
indexID + 10: {
|
|
TenantID: "",
|
|
CollectionID: collID + 1,
|
|
FieldID: fieldID + 10,
|
|
IndexID: indexID + 10,
|
|
IndexName: "index",
|
|
IsDeleted: true,
|
|
CreateTime: 10,
|
|
TypeParams: nil,
|
|
IndexParams: nil,
|
|
IsAutoIndex: false,
|
|
UserIndexParams: nil,
|
|
},
|
|
},
|
|
},
|
|
buildID2SegmentIndex: nil,
|
|
},
|
|
}
|
|
}
|
|
|
|
func TestGarbageCollector_recycleUnusedIndexes(t *testing.T) {
|
|
t.Run("success", func(t *testing.T) {
|
|
catalog := catalogmocks.NewDataCoordCatalog(t)
|
|
catalog.On("DropIndex",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(nil)
|
|
gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
|
|
gc.recycleUnusedIndexes(context.TODO())
|
|
})
|
|
|
|
t.Run("fail", func(t *testing.T) {
|
|
catalog := catalogmocks.NewDataCoordCatalog(t)
|
|
catalog.On("DropIndex",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(errors.New("fail"))
|
|
gc := newGarbageCollector(createMetaForRecycleUnusedIndexes(catalog), nil, GcOption{})
|
|
gc.recycleUnusedIndexes(context.TODO())
|
|
})
|
|
}
|
|
|
|
func createMetaForRecycleUnusedSegIndexes(catalog metastore.DataCoordCatalog) *meta {
|
|
var (
|
|
ctx = context.Background()
|
|
collID = UniqueID(100)
|
|
partID = UniqueID(200)
|
|
// fieldID = UniqueID(300)
|
|
indexID = UniqueID(400)
|
|
segID = UniqueID(500)
|
|
)
|
|
segments := map[int64]*SegmentInfo{
|
|
segID: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "",
|
|
NumOfRows: 1026,
|
|
State: commonpb.SegmentState_Flushed,
|
|
},
|
|
},
|
|
segID + 1: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "",
|
|
NumOfRows: 1026,
|
|
State: commonpb.SegmentState_Dropped,
|
|
},
|
|
},
|
|
}
|
|
meta := &meta{
|
|
RWMutex: lock.RWMutex{},
|
|
ctx: ctx,
|
|
catalog: catalog,
|
|
collections: nil,
|
|
segments: NewSegmentsInfo(),
|
|
indexMeta: &indexMeta{
|
|
catalog: catalog,
|
|
segmentIndexes: map[UniqueID]map[UniqueID]*model.SegmentIndex{
|
|
segID: {
|
|
indexID: {
|
|
SegmentID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
segID + 1: {
|
|
indexID: {
|
|
SegmentID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID + 1,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
},
|
|
indexes: map[UniqueID]map[UniqueID]*model.Index{},
|
|
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
|
|
buildID: {
|
|
SegmentID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
buildID + 1: {
|
|
SegmentID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID + 1,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
},
|
|
channelCPs: nil,
|
|
chunkManager: nil,
|
|
}
|
|
for id, segment := range segments {
|
|
meta.segments.SetSegment(id, segment)
|
|
}
|
|
return meta
|
|
}
|
|
|
|
func TestGarbageCollector_recycleUnusedSegIndexes(t *testing.T) {
|
|
t.Run("success", func(t *testing.T) {
|
|
mockChunkManager := mocks.NewChunkManager(t)
|
|
mockChunkManager.EXPECT().RootPath().Return("root")
|
|
mockChunkManager.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
|
catalog := catalogmocks.NewDataCoordCatalog(t)
|
|
catalog.On("DropSegmentIndex",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(nil)
|
|
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{
|
|
cli: mockChunkManager,
|
|
})
|
|
gc.recycleUnusedSegIndexes(context.TODO())
|
|
})
|
|
|
|
t.Run("fail", func(t *testing.T) {
|
|
catalog := catalogmocks.NewDataCoordCatalog(t)
|
|
mockChunkManager := mocks.NewChunkManager(t)
|
|
mockChunkManager.EXPECT().RootPath().Return("root")
|
|
mockChunkManager.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
|
catalog.On("DropSegmentIndex",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(errors.New("fail"))
|
|
gc := newGarbageCollector(createMetaForRecycleUnusedSegIndexes(catalog), nil, GcOption{
|
|
cli: mockChunkManager,
|
|
})
|
|
gc.recycleUnusedSegIndexes(context.TODO())
|
|
})
|
|
}
|
|
|
|
func createMetaTableForRecycleUnusedIndexFiles(catalog *datacoord.Catalog) *meta {
|
|
var (
|
|
ctx = context.Background()
|
|
collID = UniqueID(100)
|
|
partID = UniqueID(200)
|
|
// fieldID = UniqueID(300)
|
|
indexID = UniqueID(400)
|
|
segID = UniqueID(500)
|
|
buildID = UniqueID(600)
|
|
)
|
|
segments := map[UniqueID]*SegmentInfo{
|
|
segID: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "",
|
|
NumOfRows: 1026,
|
|
State: commonpb.SegmentState_Flushed,
|
|
},
|
|
},
|
|
segID + 1: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "",
|
|
NumOfRows: 1026,
|
|
State: commonpb.SegmentState_Flushed,
|
|
},
|
|
},
|
|
}
|
|
meta := &meta{
|
|
RWMutex: lock.RWMutex{},
|
|
ctx: ctx,
|
|
catalog: catalog,
|
|
collections: nil,
|
|
segments: NewSegmentsInfo(),
|
|
indexMeta: &indexMeta{
|
|
catalog: catalog,
|
|
segmentIndexes: map[UniqueID]map[UniqueID]*model.SegmentIndex{
|
|
segID: {
|
|
indexID: {
|
|
SegmentID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
segID + 1: {
|
|
indexID: {
|
|
SegmentID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID + 1,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_InProgress,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: nil,
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
},
|
|
indexes: map[UniqueID]map[UniqueID]*model.Index{
|
|
collID: {
|
|
indexID: {
|
|
TenantID: "",
|
|
CollectionID: collID,
|
|
FieldID: fieldID,
|
|
IndexID: indexID,
|
|
IndexName: "_default_idx",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
TypeParams: nil,
|
|
IndexParams: nil,
|
|
IsAutoIndex: false,
|
|
UserIndexParams: nil,
|
|
},
|
|
},
|
|
},
|
|
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
|
|
buildID: {
|
|
SegmentID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
buildID + 1: {
|
|
SegmentID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 1026,
|
|
IndexID: indexID,
|
|
BuildID: buildID + 1,
|
|
NodeID: 1,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_InProgress,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 10,
|
|
IndexFileKeys: nil,
|
|
IndexSize: 0,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for id, segment := range segments {
|
|
meta.segments.SetSegment(id, segment)
|
|
}
|
|
|
|
return meta
|
|
}
|
|
|
|
func TestGarbageCollector_recycleUnusedIndexFiles(t *testing.T) {
|
|
t.Run("success", func(t *testing.T) {
|
|
cm := &mocks.ChunkManager{}
|
|
cm.EXPECT().RootPath().Return("root")
|
|
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
|
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
|
for _, file := range []string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"} {
|
|
cowf(&storage.ChunkObjectInfo{FilePath: file})
|
|
}
|
|
return nil
|
|
})
|
|
|
|
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
|
|
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
|
gc := newGarbageCollector(
|
|
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
|
|
nil,
|
|
GcOption{
|
|
cli: cm,
|
|
})
|
|
|
|
gc.recycleUnusedIndexFiles(context.TODO())
|
|
})
|
|
|
|
t.Run("list fail", func(t *testing.T) {
|
|
cm := &mocks.ChunkManager{}
|
|
cm.EXPECT().RootPath().Return("root")
|
|
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
|
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
|
return errors.New("error")
|
|
})
|
|
gc := newGarbageCollector(
|
|
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
|
|
nil,
|
|
GcOption{
|
|
cli: cm,
|
|
})
|
|
gc.recycleUnusedIndexFiles(context.TODO())
|
|
})
|
|
|
|
t.Run("remove fail", func(t *testing.T) {
|
|
cm := &mocks.ChunkManager{}
|
|
cm.EXPECT().RootPath().Return("root")
|
|
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
|
|
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
|
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
|
for _, file := range []string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"} {
|
|
cowf(&storage.ChunkObjectInfo{FilePath: file})
|
|
}
|
|
return nil
|
|
})
|
|
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(nil)
|
|
gc := newGarbageCollector(
|
|
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
|
|
nil,
|
|
GcOption{
|
|
cli: cm,
|
|
})
|
|
gc.recycleUnusedIndexFiles(context.TODO())
|
|
})
|
|
|
|
t.Run("remove with prefix fail", func(t *testing.T) {
|
|
cm := &mocks.ChunkManager{}
|
|
cm.EXPECT().RootPath().Return("root")
|
|
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("error"))
|
|
cm.EXPECT().WalkWithPrefix(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn(
|
|
func(ctx context.Context, s string, b bool, cowf storage.ChunkObjectWalkFunc) error {
|
|
for _, file := range []string{"a/b/c/", "a/b/600/", "a/b/601/", "a/b/602/"} {
|
|
cowf(&storage.ChunkObjectInfo{FilePath: file})
|
|
}
|
|
return nil
|
|
})
|
|
cm.EXPECT().RemoveWithPrefix(mock.Anything, mock.Anything).Return(errors.New("error"))
|
|
gc := newGarbageCollector(
|
|
createMetaTableForRecycleUnusedIndexFiles(&datacoord.Catalog{MetaKv: kvmocks.NewMetaKv(t)}),
|
|
nil,
|
|
GcOption{
|
|
cli: cm,
|
|
})
|
|
gc.recycleUnusedIndexFiles(context.TODO())
|
|
})
|
|
}
|
|
|
|
func TestGarbageCollector_clearETCD(t *testing.T) {
|
|
catalog := catalogmocks.NewDataCoordCatalog(t)
|
|
catalog.On("ChannelExists",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(true)
|
|
catalog.On("DropChannelCheckpoint",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(nil).Maybe()
|
|
catalog.On("CreateSegmentIndex",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(nil)
|
|
catalog.On("AlterSegmentIndexes",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(nil)
|
|
catalog.On("DropSegment",
|
|
mock.Anything,
|
|
mock.Anything,
|
|
).Return(nil)
|
|
|
|
channelCPs := newChannelCps()
|
|
channelCPs.checkpoints["dmlChannel"] = &msgpb.MsgPosition{
|
|
Timestamp: 1000,
|
|
}
|
|
|
|
segments := map[UniqueID]*SegmentInfo{
|
|
segID: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 5000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 0,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
Binlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "log1",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
FieldID: 2,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "log2",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Deltalogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "del_log1",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
FieldID: 2,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "del_log2",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Statslogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "stats_log1",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
segID + 1: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 5000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 0,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
},
|
|
},
|
|
segID + 2: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 2,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 10000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 10,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
CompactionFrom: []int64{segID, segID + 1},
|
|
},
|
|
},
|
|
segID + 3: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 3,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 10,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
CompactionFrom: nil,
|
|
},
|
|
},
|
|
segID + 4: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 4,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 12000,
|
|
State: commonpb.SegmentState_Flushed,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 10,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
CompactionFrom: []int64{segID + 2, segID + 3},
|
|
},
|
|
},
|
|
segID + 5: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 5,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65535,
|
|
DroppedAt: 0,
|
|
CompactionFrom: nil,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 1200,
|
|
},
|
|
},
|
|
},
|
|
segID + 6: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 6,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65535,
|
|
DroppedAt: uint64(time.Now().Add(time.Hour).UnixNano()),
|
|
CompactionFrom: nil,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
Compacted: true,
|
|
},
|
|
},
|
|
// compacted and child is GCed, dml pos is big than channel cp
|
|
segID + 7: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 7,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65535,
|
|
DroppedAt: 0,
|
|
CompactionFrom: nil,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 1200,
|
|
},
|
|
Compacted: true,
|
|
},
|
|
},
|
|
}
|
|
m := &meta{
|
|
catalog: catalog,
|
|
channelCPs: channelCPs,
|
|
segments: NewSegmentsInfo(),
|
|
indexMeta: &indexMeta{
|
|
catalog: catalog,
|
|
segmentIndexes: map[UniqueID]map[UniqueID]*model.SegmentIndex{
|
|
segID: {
|
|
indexID: {
|
|
SegmentID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 5000,
|
|
IndexID: indexID,
|
|
BuildID: buildID,
|
|
NodeID: 0,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 0,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 1024,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
segID + 1: {
|
|
indexID: {
|
|
SegmentID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 5000,
|
|
IndexID: indexID,
|
|
BuildID: buildID + 1,
|
|
NodeID: 0,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 0,
|
|
IndexFileKeys: []string{"file3", "file4"},
|
|
IndexSize: 1024,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
},
|
|
|
|
buildID2SegmentIndex: map[UniqueID]*model.SegmentIndex{
|
|
buildID: {
|
|
SegmentID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 5000,
|
|
IndexID: indexID,
|
|
BuildID: buildID,
|
|
NodeID: 0,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 0,
|
|
IndexFileKeys: []string{"file1", "file2"},
|
|
IndexSize: 1024,
|
|
WriteHandoff: false,
|
|
},
|
|
buildID + 1: {
|
|
SegmentID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 5000,
|
|
IndexID: indexID,
|
|
BuildID: buildID + 1,
|
|
NodeID: 0,
|
|
IndexVersion: 1,
|
|
IndexState: commonpb.IndexState_Finished,
|
|
FailReason: "",
|
|
IsDeleted: false,
|
|
CreateTime: 0,
|
|
IndexFileKeys: []string{"file3", "file4"},
|
|
IndexSize: 1024,
|
|
WriteHandoff: false,
|
|
},
|
|
},
|
|
indexes: map[UniqueID]map[UniqueID]*model.Index{
|
|
collID: {
|
|
indexID: {
|
|
TenantID: "",
|
|
CollectionID: collID,
|
|
FieldID: fieldID,
|
|
IndexID: indexID,
|
|
IndexName: indexName,
|
|
IsDeleted: false,
|
|
CreateTime: 0,
|
|
TypeParams: nil,
|
|
IndexParams: nil,
|
|
IsAutoIndex: false,
|
|
UserIndexParams: nil,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
|
|
collections: map[UniqueID]*collectionInfo{
|
|
collID: {
|
|
ID: collID,
|
|
Schema: &schemapb.CollectionSchema{
|
|
Name: "",
|
|
Description: "",
|
|
AutoID: false,
|
|
Fields: []*schemapb.FieldSchema{
|
|
{
|
|
FieldID: fieldID,
|
|
Name: "",
|
|
IsPrimaryKey: false,
|
|
Description: "",
|
|
DataType: schemapb.DataType_FloatVector,
|
|
TypeParams: nil,
|
|
IndexParams: nil,
|
|
AutoID: false,
|
|
State: 0,
|
|
},
|
|
},
|
|
},
|
|
Partitions: nil,
|
|
StartPositions: nil,
|
|
Properties: nil,
|
|
},
|
|
},
|
|
}
|
|
|
|
for id, segment := range segments {
|
|
m.segments.SetSegment(id, segment)
|
|
}
|
|
|
|
for segID, segment := range map[UniqueID]*SegmentInfo{
|
|
segID: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 5000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 0,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
Binlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "log1",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
FieldID: 2,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "log2",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Deltalogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "del_log1",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
FieldID: 2,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "del_log2",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Statslogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "stats_log1",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
segID + 1: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 1,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 5000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 0,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
},
|
|
},
|
|
segID + 2: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 2,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 10000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 10,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
CompactionFrom: []int64{segID, segID + 1},
|
|
},
|
|
},
|
|
segID + 3: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 3,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 10,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
CompactionFrom: nil,
|
|
},
|
|
},
|
|
segID + 4: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 4,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 12000,
|
|
State: commonpb.SegmentState_Flushed,
|
|
MaxRowNum: 65536,
|
|
DroppedAt: 10,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
CompactionFrom: []int64{segID + 2, segID + 3},
|
|
},
|
|
},
|
|
segID + 5: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 5,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65535,
|
|
DroppedAt: 0,
|
|
CompactionFrom: nil,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 1200,
|
|
},
|
|
},
|
|
},
|
|
// cannot dropped for not expired.
|
|
segID + 6: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 6,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65535,
|
|
DroppedAt: uint64(time.Now().Add(time.Hour).UnixNano()),
|
|
CompactionFrom: nil,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
Compacted: true,
|
|
},
|
|
},
|
|
// compacted and child is GCed, dml pos is big than channel cp
|
|
segID + 7: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 7,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65535,
|
|
DroppedAt: 0,
|
|
CompactionFrom: nil,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 1200,
|
|
},
|
|
Compacted: true,
|
|
},
|
|
},
|
|
// can be dropped for expired and compacted
|
|
segID + 8: {
|
|
SegmentInfo: &datapb.SegmentInfo{
|
|
ID: segID + 8,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
InsertChannel: "dmlChannel",
|
|
NumOfRows: 2000,
|
|
State: commonpb.SegmentState_Dropped,
|
|
MaxRowNum: 65535,
|
|
DroppedAt: uint64(time.Now().Add(-7 * 24 * time.Hour).UnixNano()),
|
|
CompactionFrom: nil,
|
|
DmlPosition: &msgpb.MsgPosition{
|
|
Timestamp: 900,
|
|
},
|
|
Compacted: true,
|
|
},
|
|
},
|
|
} {
|
|
m.segments.SetSegment(segID, segment)
|
|
}
|
|
|
|
cm := &mocks.ChunkManager{}
|
|
cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
|
gc := newGarbageCollector(
|
|
m,
|
|
newMockHandlerWithMeta(m),
|
|
GcOption{
|
|
cli: cm,
|
|
dropTolerance: 1,
|
|
})
|
|
gc.recycleDroppedSegments(context.TODO())
|
|
|
|
/*
|
|
A B
|
|
\ /
|
|
C D
|
|
\ /
|
|
E
|
|
|
|
E: flushed, not indexed, should not be GCed
|
|
D: dropped, not indexed, should not be GCed, since E is not GCed
|
|
C: dropped, not indexed, should not be GCed, since E is not GCed
|
|
A: dropped, indexed, should not be GCed, since C is not indexed
|
|
B: dropped, indexed, should not be GCed, since C is not indexed
|
|
|
|
F: dropped, compcated is false, should not be GCed, since dml position is larger than channel cp
|
|
G: dropped, compacted is true, missing child info, should be GCed since dml pos is less than channel cp, FAST GC do not wait drop tolerance
|
|
H: dropped, compacted is true, missing child info, should not be GCed since dml pos is larger than channel cp
|
|
|
|
conclusion: only G is GCed.
|
|
*/
|
|
segA := gc.meta.GetSegment(segID)
|
|
assert.NotNil(t, segA)
|
|
segB := gc.meta.GetSegment(segID + 1)
|
|
assert.NotNil(t, segB)
|
|
segC := gc.meta.GetSegment(segID + 2)
|
|
assert.NotNil(t, segC)
|
|
segD := gc.meta.GetSegment(segID + 3)
|
|
assert.NotNil(t, segD)
|
|
segE := gc.meta.GetSegment(segID + 4)
|
|
assert.NotNil(t, segE)
|
|
segF := gc.meta.GetSegment(segID + 5)
|
|
assert.NotNil(t, segF)
|
|
segG := gc.meta.GetSegment(segID + 6)
|
|
assert.NotNil(t, segG)
|
|
segH := gc.meta.GetSegment(segID + 7)
|
|
assert.NotNil(t, segH)
|
|
segG = gc.meta.GetSegment(segID + 8)
|
|
assert.Nil(t, segG)
|
|
err := gc.meta.indexMeta.AddSegmentIndex(&model.SegmentIndex{
|
|
SegmentID: segID + 4,
|
|
CollectionID: collID,
|
|
PartitionID: partID,
|
|
NumRows: 12000,
|
|
IndexID: indexID,
|
|
BuildID: buildID + 4,
|
|
})
|
|
assert.NoError(t, err)
|
|
|
|
err = gc.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
|
|
BuildID: buildID + 4,
|
|
State: commonpb.IndexState_Finished,
|
|
IndexFileKeys: []string{"file1", "file2", "file3", "file4"},
|
|
SerializedSize: 10240,
|
|
FailReason: "",
|
|
})
|
|
assert.NoError(t, err)
|
|
|
|
gc.recycleDroppedSegments(context.TODO())
|
|
/*
|
|
|
|
A: processed prior to C, C is not GCed yet and C is not indexed, A is not GCed in this turn
|
|
B: processed prior to C, C is not GCed yet and C is not indexed, B is not GCed in this turn
|
|
|
|
E: flushed, indexed, should not be GCed
|
|
C: dropped, not indexed, should be GCed since E is indexed
|
|
D: dropped, not indexed, should be GCed since E is indexed
|
|
*/
|
|
|
|
segC = gc.meta.GetSegment(segID + 2)
|
|
assert.Nil(t, segC)
|
|
segD = gc.meta.GetSegment(segID + 3)
|
|
assert.Nil(t, segD)
|
|
|
|
gc.recycleDroppedSegments(context.TODO())
|
|
/*
|
|
A: compacted became false due to C is GCed already, A should be GCed since dropTolernace is meet
|
|
B: compacted became false due to C is GCed already, B should be GCed since dropTolerance is meet
|
|
*/
|
|
segA = gc.meta.GetSegment(segID)
|
|
assert.Nil(t, segA)
|
|
segB = gc.meta.GetSegment(segID + 1)
|
|
assert.Nil(t, segB)
|
|
}
|
|
|
|
func TestGarbageCollector_removeObjectPool(t *testing.T) {
|
|
paramtable.Init()
|
|
cm := mocks.NewChunkManager(t)
|
|
gc := newGarbageCollector(
|
|
nil,
|
|
nil,
|
|
GcOption{
|
|
cli: cm,
|
|
dropTolerance: 1,
|
|
})
|
|
logs := make(map[string]struct{})
|
|
for i := 0; i < 50; i++ {
|
|
logs[fmt.Sprintf("log%d", i)] = struct{}{}
|
|
}
|
|
|
|
t.Run("success", func(t *testing.T) {
|
|
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(nil)
|
|
defer call.Unset()
|
|
b := gc.removeObjectFiles(context.TODO(), logs)
|
|
assert.NoError(t, b)
|
|
})
|
|
|
|
t.Run("oss not found error", func(t *testing.T) {
|
|
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(merr.WrapErrIoKeyNotFound("not found"))
|
|
defer call.Unset()
|
|
b := gc.removeObjectFiles(context.TODO(), logs)
|
|
assert.NoError(t, b)
|
|
})
|
|
|
|
t.Run("oss server error", func(t *testing.T) {
|
|
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(merr.WrapErrIoFailed("server error", errors.New("err")))
|
|
defer call.Unset()
|
|
b := gc.removeObjectFiles(context.TODO(), logs)
|
|
assert.Error(t, b)
|
|
})
|
|
|
|
t.Run("other type error", func(t *testing.T) {
|
|
call := cm.EXPECT().Remove(mock.Anything, mock.Anything).Return(errors.New("other error"))
|
|
defer call.Unset()
|
|
b := gc.removeObjectFiles(context.TODO(), logs)
|
|
assert.Error(t, b)
|
|
})
|
|
}
|
|
|
|
type GarbageCollectorSuite struct {
|
|
suite.Suite
|
|
|
|
bucketName string
|
|
rootPath string
|
|
|
|
cli *storage.RemoteChunkManager
|
|
inserts []string
|
|
stats []string
|
|
delta []string
|
|
others []string
|
|
|
|
meta *meta
|
|
}
|
|
|
|
func (s *GarbageCollectorSuite) SetupTest() {
|
|
s.bucketName = `datacoord-ut` + strings.ToLower(funcutil.RandomString(8))
|
|
s.rootPath = `gc` + funcutil.RandomString(8)
|
|
|
|
var err error
|
|
s.cli, s.inserts, s.stats, s.delta, s.others, err = initUtOSSEnv(s.bucketName, s.rootPath, 4)
|
|
s.Require().NoError(err)
|
|
|
|
s.meta, err = newMemoryMeta()
|
|
s.Require().NoError(err)
|
|
}
|
|
|
|
func (s *GarbageCollectorSuite) TearDownTest() {
|
|
cleanupOSS(s.cli, s.bucketName, s.rootPath)
|
|
}
|
|
|
|
func (s *GarbageCollectorSuite) TestPauseResume() {
|
|
s.Run("not_enabled", func() {
|
|
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
|
|
cli: s.cli,
|
|
enabled: false,
|
|
checkInterval: time.Millisecond * 10,
|
|
scanInterval: time.Hour * 24 * 7,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
|
|
gc.start()
|
|
defer gc.close()
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
err := gc.Pause(ctx, time.Second)
|
|
s.NoError(err)
|
|
|
|
err = gc.Resume(ctx)
|
|
s.Error(err)
|
|
})
|
|
|
|
s.Run("pause_then_resume", func() {
|
|
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
|
|
cli: s.cli,
|
|
enabled: true,
|
|
checkInterval: time.Millisecond * 10,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
|
|
gc.start()
|
|
defer gc.close()
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
err := gc.Pause(ctx, time.Minute)
|
|
s.NoError(err)
|
|
|
|
s.NotZero(gc.pauseUntil.Load())
|
|
|
|
err = gc.Resume(ctx)
|
|
s.NoError(err)
|
|
|
|
s.Zero(gc.pauseUntil.Load())
|
|
})
|
|
|
|
s.Run("pause_before_until", func() {
|
|
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
|
|
cli: s.cli,
|
|
enabled: true,
|
|
checkInterval: time.Millisecond * 10,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
|
|
gc.start()
|
|
defer gc.close()
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
err := gc.Pause(ctx, time.Minute)
|
|
s.NoError(err)
|
|
|
|
until := gc.pauseUntil.Load()
|
|
s.NotZero(until)
|
|
|
|
err = gc.Pause(ctx, time.Second)
|
|
s.NoError(err)
|
|
|
|
second := gc.pauseUntil.Load()
|
|
|
|
s.Equal(until, second)
|
|
})
|
|
|
|
s.Run("pause_resume_timeout", func() {
|
|
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
|
|
cli: s.cli,
|
|
enabled: true,
|
|
checkInterval: time.Millisecond * 10,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond)
|
|
defer cancel()
|
|
err := gc.Pause(ctx, time.Minute)
|
|
s.Error(err)
|
|
|
|
s.Zero(gc.pauseUntil.Load())
|
|
|
|
err = gc.Resume(ctx)
|
|
s.Error(err)
|
|
|
|
s.Zero(gc.pauseUntil.Load())
|
|
})
|
|
}
|
|
|
|
func (s *GarbageCollectorSuite) TestRunRecycleTaskWithPauser() {
|
|
gc := newGarbageCollector(s.meta, newMockHandler(), GcOption{
|
|
cli: s.cli,
|
|
enabled: true,
|
|
checkInterval: time.Millisecond * 10,
|
|
scanInterval: time.Hour * 7 * 24,
|
|
missingTolerance: time.Hour * 24,
|
|
dropTolerance: time.Hour * 24,
|
|
})
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*2500)
|
|
defer cancel()
|
|
|
|
cnt := 0
|
|
gc.runRecycleTaskWithPauser(ctx, "test", time.Second, func(ctx context.Context) {
|
|
cnt++
|
|
})
|
|
s.Equal(cnt, 2)
|
|
}
|
|
|
|
func TestGarbageCollector(t *testing.T) {
|
|
suite.Run(t, new(GarbageCollectorSuite))
|
|
}
|