recall repo maintenance history on restart

Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
pull/8580/head
Lyndon-Li 2025-01-06 17:35:57 +08:00
parent ceeab10b6e
commit 4ce7361f5a
6 changed files with 379 additions and 41 deletions

2
go.mod
View File

@ -31,6 +31,7 @@ require (
github.com/kubernetes-csi/external-snapshotter/client/v7 v7.0.0
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.20.5
github.com/robfig/cron/v3 v3.0.1
@ -154,7 +155,6 @@ require (
github.com/natefinch/atomic v1.0.1 // indirect
github.com/nxadm/tail v1.4.8 // indirect
github.com/oklog/run v1.0.0 // indirect
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect
github.com/pierrec/lz4 v2.6.1+incompatible // indirect
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect

View File

@ -13,7 +13,10 @@ import (
"github.com/spf13/pflag"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
@ -65,6 +68,8 @@ func (o *Options) Run(f velerocli.Factory) {
logger := logging.DefaultLogger(o.LogLevelFlag.Parse(), o.FormatFlag.Parse())
logger.SetOutput(os.Stdout)
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))
time.Sleep(time.Minute)
pruneError := o.runRepoPrune(f, f.Namespace(), logger)
@ -116,9 +121,9 @@ func (o *Options) initClient(f velerocli.Factory) (client.Client, error) {
return cli, nil
}
func initRepoManager(namespace string, cli client.Client, logger logrus.FieldLogger) (repomanager.Manager, error) {
func initRepoManager(namespace string, cli client.Client, kubeClient kubernetes.Interface, logger logrus.FieldLogger) (repomanager.Manager, error) {
// ensure the repo key secret is set up
if err := repokey.EnsureCommonRepositoryKey(cli, namespace); err != nil {
if err := repokey.EnsureCommonRepositoryKey(kubeClient.CoreV1(), namespace); err != nil {
return nil, errors.Wrap(err, "failed to ensure repository key")
}
@ -155,7 +160,12 @@ func (o *Options) runRepoPrune(f velerocli.Factory, namespace string, logger log
return err
}
manager, err := initRepoManager(namespace, cli, logger)
kubeClient, err := f.KubeClient()
if err != nil {
return err
}
manager, err := initRepoManager(namespace, cli, kubeClient, logger)
if err != nil {
return err
}

View File

@ -491,7 +491,7 @@ func (s *server) checkNodeAgent() {
func (s *server) initRepoManager() error {
// ensure the repo key secret is set up
if err := repokey.EnsureCommonRepositoryKey(s.mgr.GetClient(), s.namespace); err != nil {
if err := repokey.EnsureCommonRepositoryKey(s.kubeClient.CoreV1(), s.namespace); err != nil {
return err
}

View File

@ -25,6 +25,7 @@ import (
"slices"
"time"
"github.com/petar/GoLLRB/llrb"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
@ -348,43 +349,45 @@ func (r *BackupRepoReconciler) recallMaintenance(ctx context.Context, req *veler
})
}
type maintenanceStatusWrapper struct {
status *velerov1api.BackupRepositoryMaintenanceStatus
}
func (w maintenanceStatusWrapper) Less(other llrb.Item) bool {
return w.status.StartTimestamp.Before(other.(maintenanceStatusWrapper).status.StartTimestamp)
}
func consolidateHistory(coming, cur []velerov1api.BackupRepositoryMaintenanceStatus) []velerov1api.BackupRepositoryMaintenanceStatus {
if len(coming) == 0 {
return nil
}
if isIdenticalHistory(coming, cur) {
if isIdenticalHistory(cur, coming) {
return nil
}
consolidator := llrb.New()
for i := range cur {
consolidator.ReplaceOrInsert(maintenanceStatusWrapper{&cur[i]})
}
for i := range coming {
consolidator.ReplaceOrInsert(maintenanceStatusWrapper{&coming[i]})
}
truncated := []velerov1api.BackupRepositoryMaintenanceStatus{}
i := len(cur) - 1
j := len(coming) - 1
for i >= 0 || j >= 0 {
for consolidator.Len() > 0 {
if len(truncated) == defaultMaintenanceStatusQueueLength {
break
}
if i >= 0 && j >= 0 {
if isEarlierMaintenanceStatus(cur[i], coming[j]) {
truncated = append(truncated, coming[j])
j--
} else {
truncated = append(truncated, cur[i])
i--
}
} else if i >= 0 {
truncated = append(truncated, cur[i])
i--
} else {
truncated = append(truncated, coming[j])
j--
}
item := consolidator.DeleteMax()
truncated = append(truncated, *item.(maintenanceStatusWrapper).status)
}
slices.Reverse(truncated)
if isIdenticalHistory(truncated, cur) {
if isIdenticalHistory(cur, truncated) {
return nil
}
@ -421,10 +424,6 @@ func isIdenticalHistory(a, b []velerov1api.BackupRepositoryMaintenanceStatus) bo
return true
}
func isEarlierMaintenanceStatus(a, b velerov1api.BackupRepositoryMaintenanceStatus) bool {
return a.StartTimestamp.Before(b.StartTimestamp)
}
var funcStartMaintenanceJob = repository.StartMaintenanceJob
var funcWaitMaintenanceJobComplete = repository.WaitMaintenanceJobComplete
@ -438,10 +437,6 @@ func (r *BackupRepoReconciler) runMaintenanceIfDue(ctx context.Context, req *vel
log.Info("Running maintenance on backup repository")
// prune failures should be displayed in the `.status.message` field but
// should not cause the repo to move to `NotReady`.
log.Debug("Pruning repo")
job, err := funcStartMaintenanceJob(r.Client, ctx, req, r.repoMaintenanceConfig, r.podResources, r.logLevel, r.logFormat, log)
if err != nil {
log.WithError(err).Warn("Starting repo maintenance failed")

View File

@ -846,3 +846,340 @@ func TestRecallMaintenance(t *testing.T) {
})
}
}
func TestConsolidateHistory(t *testing.T) {
now := time.Now()
tests := []struct {
name string
cur []velerov1api.BackupRepositoryMaintenanceStatus
coming []velerov1api.BackupRepositoryMaintenanceStatus
expected []velerov1api.BackupRepositoryMaintenanceStatus
}{
{
name: "zero coming",
cur: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
},
expected: nil,
},
{
name: "identical coming",
cur: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
},
coming: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
},
},
expected: nil,
},
{
name: "less than limit",
cur: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
},
coming: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
},
expected: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
},
},
{
name: "more than limit",
cur: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
},
coming: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 4)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-4",
},
},
expected: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 4)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-4",
},
},
},
{
name: "more than limit 2",
cur: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
},
coming: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 4)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-4",
},
},
expected: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 4)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-4",
},
},
},
{
name: "coming is not used",
cur: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 4)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-4",
},
},
coming: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
},
},
expected: nil,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
consolidated := consolidateHistory(test.coming, test.cur)
if test.expected == nil {
assert.Nil(t, consolidated)
} else {
assert.Len(t, consolidated, len(test.expected))
for i := 0; i < len(test.expected); i++ {
assert.Equal(t, *test.expected[i].StartTimestamp, *consolidated[i].StartTimestamp)
assert.Equal(t, *test.expected[i].CompleteTimestamp, *consolidated[i].CompleteTimestamp)
assert.Equal(t, test.expected[i].Result, consolidated[i].Result)
assert.Equal(t, test.expected[i].Message, consolidated[i].Message)
}
assert.Nil(t, consolidateHistory(test.coming, consolidated))
}
})
}
}
func TestGetLastMaintenanceTimeFromHistory(t *testing.T) {
now := time.Now()
tests := []struct {
name string
history []velerov1api.BackupRepositoryMaintenanceStatus
expected time.Time
}{
{
name: "first one is nil",
history: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
},
expected: now.Add(time.Hour * 3),
},
{
name: "another one is nil",
history: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
},
expected: now.Add(time.Hour * 3),
},
{
name: "disordered",
history: []velerov1api.BackupRepositoryMaintenanceStatus{
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour * 2)},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour * 3)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message-3",
},
{
StartTimestamp: &metav1.Time{Time: now},
CompleteTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceSucceeded,
Message: "fake-maintenance-message",
},
{
StartTimestamp: &metav1.Time{Time: now.Add(time.Hour)},
Result: velerov1api.BackupRepositoryMaintenanceFailed,
Message: "fake-maintenance-message-2",
},
},
expected: now.Add(time.Hour * 3),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
time := getLastMaintenanceTimeFromHistory(test.history)
assert.Equal(t, test.expected, time.Time)
})
}
}

View File

@ -24,11 +24,9 @@ import (
corev1api "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
"github.com/vmware-tanzu/velero/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
)
const (
@ -38,13 +36,11 @@ const (
encryptionKey = "static-passw0rd"
)
func EnsureCommonRepositoryKey(cli client.Client, namespace string) error {
existing := &corev1api.Secret{}
err := cli.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: credentialsSecretName}, existing)
func EnsureCommonRepositoryKey(secretClient corev1client.SecretsGetter, namespace string) error {
_, err := secretClient.Secrets(namespace).Get(context.TODO(), credentialsSecretName, metav1.GetOptions{})
if err != nil && !apierrors.IsNotFound(err) {
return errors.WithStack(err)
}
if err == nil {
return nil
}
@ -62,7 +58,7 @@ func EnsureCommonRepositoryKey(cli client.Client, namespace string) error {
},
}
if err := cli.Create(context.TODO(), secret); err != nil {
if _, err = secretClient.Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{}); err != nil {
return errors.Wrapf(err, "error creating %s secret", credentialsSecretName)
}