recall repo maintenance history on restart

Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
pull/8580/head
Lyndon-Li 2025-01-06 16:39:00 +08:00
parent db69829fd7
commit 6b73a256d5
7 changed files with 26 additions and 97 deletions

View File

@ -0,0 +1 @@
Fix issue #7753, recall repo maintenance history on Velero server restart

View File

@ -13,7 +13,6 @@ import (
"github.com/spf13/pflag"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/internal/credentials"
@ -88,41 +87,36 @@ func (o *Options) Run(f velerocli.Factory) {
}
}
func (o *Options) initClient(f velerocli.Factory) (client.Client, kubernetes.Interface, error) {
func (o *Options) initClient(f velerocli.Factory) (client.Client, error) {
scheme := runtime.NewScheme()
err := velerov1api.AddToScheme(scheme)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to add velero scheme")
return nil, errors.Wrap(err, "failed to add velero scheme")
}
err = v1.AddToScheme(scheme)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to add api core scheme")
return nil, errors.Wrap(err, "failed to add api core scheme")
}
config, err := f.ClientConfig()
if err != nil {
return nil, nil, errors.Wrap(err, "failed to get client config")
return nil, errors.Wrap(err, "failed to get client config")
}
cli, err := client.New(config, client.Options{
Scheme: scheme,
})
if err != nil {
return nil, nil, errors.Wrap(err, "failed to create client")
return nil, errors.Wrap(err, "failed to create client")
}
kubeClient, err := f.KubeClient()
if err != nil {
return nil, nil, errors.Wrap(err, "failed to create kube client")
}
return cli, kubeClient, nil
return cli, nil
}
func initRepoManager(namespace string, kubeClient kubernetes.Interface, cli client.Client, logger logrus.FieldLogger) (repomanager.Manager, error) {
func initRepoManager(namespace string, cli client.Client, logger logrus.FieldLogger) (repomanager.Manager, error) {
// ensure the repo key secret is set up
if err := repokey.EnsureCommonRepositoryKey(kubeClient.CoreV1(), namespace); err != nil {
if err := repokey.EnsureCommonRepositoryKey(cli, namespace); err != nil {
return nil, errors.Wrap(err, "failed to ensure repository key")
}
@ -154,12 +148,12 @@ func initRepoManager(namespace string, kubeClient kubernetes.Interface, cli clie
}
func (o *Options) runRepoPrune(f velerocli.Factory, namespace string, logger logrus.FieldLogger) error {
cli, kubeClient, err := o.initClient(f)
cli, err := o.initClient(f)
if err != nil {
return err
}
manager, err := initRepoManager(namespace, kubeClient, cli, logger)
manager, err := initRepoManager(namespace, cli, logger)
if err != nil {
return err
}

View File

@ -480,7 +480,7 @@ func (s *server) checkNodeAgent() {
func (s *server) initRepoManager() error {
// ensure the repo key secret is set up
if err := repokey.EnsureCommonRepositoryKey(s.kubeClient.CoreV1(), s.namespace); err != nil {
if err := repokey.EnsureCommonRepositoryKey(s.mgr.GetClient(), s.namespace); err != nil {
return err
}

View File

@ -338,12 +338,9 @@ func (r *BackupRepoReconciler) recallMaintenance(ctx context.Context, req *veler
log.Warn("Updating backup repository because of unrecorded histories")
if lastMaintenanceTime.After(req.Status.LastMaintenanceTime.Time) {
log.Warnf("Updating backup repository last maintenance time (%v) from history (%v)", req.Status.LastMaintenanceTime.Time, lastMaintenanceTime.Time)
}
return r.patchBackupRepository(ctx, req, func(rr *velerov1api.BackupRepository) {
if lastMaintenanceTime.After(rr.Status.LastMaintenanceTime.Time) {
log.Warnf("Updating backup repository last maintenance time (%v) from history (%v)", rr.Status.LastMaintenanceTime.Time, lastMaintenanceTime.Time)
rr.Status.LastMaintenanceTime = lastMaintenanceTime
}
@ -453,8 +450,8 @@ func (r *BackupRepoReconciler) runMaintenanceIfDue(ctx context.Context, req *vel
})
}
// when WaitMaintenanceJobComplete fails, the maintenance result will be left temporarily
// If the maintenenance still completes later, recallMaintenance recalls the left onces and update LastMaintenanceTime and history
// when WaitMaintenanceJobComplete fails, the maintenance result will be left aside temporarily
// If the maintenenance still completes later, recallMaintenance recalls the left once and update LastMaintenanceTime and history
status, err := funcWaitMaintenanceJobComplete(r.Client, ctx, job, r.namespace, log)
if err != nil {
return errors.Wrapf(err, "error waiting repo maintenance completion status")

View File

@ -24,9 +24,11 @@ import (
corev1api "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/apimachinery/pkg/types"
"github.com/vmware-tanzu/velero/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
)
const (
@ -36,11 +38,13 @@ const (
encryptionKey = "static-passw0rd"
)
func EnsureCommonRepositoryKey(secretClient corev1client.SecretsGetter, namespace string) error {
_, err := secretClient.Secrets(namespace).Get(context.TODO(), credentialsSecretName, metav1.GetOptions{})
func EnsureCommonRepositoryKey(cli client.Client, namespace string) error {
existing := &corev1api.Secret{}
err := cli.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: credentialsSecretName}, existing)
if err != nil && !apierrors.IsNotFound(err) {
return errors.WithStack(err)
}
if err == nil {
return nil
}
@ -58,7 +62,7 @@ func EnsureCommonRepositoryKey(secretClient corev1client.SecretsGetter, namespac
},
}
if _, err = secretClient.Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{}); err != nil {
if err := cli.Create(context.TODO(), secret); err != nil {
return errors.Wrapf(err, "error creating %s secret", credentialsSecretName)
}

View File

@ -148,31 +148,6 @@ func getMaintenanceResultFromJob(cli client.Client, job *batchv1.Job) (string, e
return terminated.Message, nil
}
func getLatestMaintenanceJob(cli client.Client, ns string) (*batchv1.Job, error) {
// Get the maintenance job list by label
jobList := &batchv1.JobList{}
err := cli.List(context.TODO(), jobList, &client.ListOptions{
Namespace: ns,
},
&client.HasLabels{RepositoryNameLabel},
)
if err != nil {
return nil, err
}
if len(jobList.Items) == 0 {
return nil, nil
}
// Get the latest maintenance job
sort.Slice(jobList.Items, func(i, j int) bool {
return jobList.Items[i].CreationTimestamp.Time.After(jobList.Items[j].CreationTimestamp.Time)
})
return &jobList.Items[0], nil
}
// getMaintenanceJobConfig is called to get the Maintenance Job Config for the
// BackupRepository specified by the repo parameter.
//
@ -266,6 +241,7 @@ func getMaintenanceJobConfig(
return result, nil
}
// WaitMaintenanceJobComplete waits the completion of the specified maintenance job and return the BackupRepositoryMaintenanceStatus
func WaitMaintenanceJobComplete(cli client.Client, ctx context.Context, jobName, ns string, logger logrus.FieldLogger) (velerov1api.BackupRepositoryMaintenanceStatus, error) {
log := logger.WithField("job name", jobName)
@ -285,7 +261,7 @@ func WaitMaintenanceJobComplete(cli client.Client, ctx context.Context, jobName,
}
// WaitAllMaintenanceJobComplete checks all the incomplete maintenance jobs of the specified repo and wait for them to complete,
// and then return the maintenance jobs in the range of limit
// and then return the maintenance jobs' status in the range of limit
func WaitAllMaintenanceJobComplete(ctx context.Context, cli client.Client, repo *velerov1api.BackupRepository, limit int, log logrus.FieldLogger) ([]velerov1api.BackupRepositoryMaintenanceStatus, error) {
jobList := &batchv1.JobList{}
err := cli.List(context.TODO(), jobList, &client.ListOptions{
@ -338,6 +314,7 @@ func WaitAllMaintenanceJobComplete(ctx context.Context, cli client.Client, repo
return history, nil
}
// StartMaintenanceJob creates a new maintenance job
func StartMaintenanceJob(cli client.Client, ctx context.Context, repo *velerov1api.BackupRepository, repoMaintenanceJobConfig string,
podResources kube.PodResources, logLevel logrus.Level, logFormat *logging.FormatFlag, logger logrus.FieldLogger) (string, error) {
bsl := &velerov1api.BackupStorageLocation{}

View File

@ -269,50 +269,6 @@ func TestGetMaintenanceResultFromJob(t *testing.T) {
assert.Equal(t, "test message", result)
}
func TestGetLatestMaintenanceJob(t *testing.T) {
// Set up test repo
repo := "test-repo"
// Create some maintenance jobs for testing
var objs []client.Object
// Create a newer job
newerJob := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "job1",
Namespace: "default",
Labels: map[string]string{RepositoryNameLabel: repo},
CreationTimestamp: metav1.Time{
Time: metav1.Now().Add(time.Duration(-24) * time.Hour),
},
},
Spec: batchv1.JobSpec{},
}
objs = append(objs, newerJob)
// Create an older job
olderJob := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "job2",
Namespace: "default",
Labels: map[string]string{RepositoryNameLabel: repo},
},
Spec: batchv1.JobSpec{},
}
objs = append(objs, olderJob)
// Create a fake Kubernetes client
scheme := runtime.NewScheme()
_ = batchv1.AddToScheme(scheme)
cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build()
// Call the function
job, err := getLatestMaintenanceJob(cli, "default")
assert.NoError(t, err)
// We expect the returned job to be the newer job
assert.Equal(t, newerJob.Name, job.Name)
}
func TestGetMaintenanceJobConfig(t *testing.T) {
ctx := context.Background()
logger := logrus.New()