Clean up leaked CSI snapshot for incomplete backup
This commit makes sure when a backup is deleted the controller will delete the CSI snapshot even when the bakckup tarball is not uploaded. fixes #8160 Signed-off-by: Daniel Jiang <daniel.jiang@broadcom.com>pull/8637/head
parent
5b1738abf8
commit
1c372893ec
|
@ -13,7 +13,7 @@ jobs:
|
|||
- name: Codespell
|
||||
uses: codespell-project/actions-codespell@master
|
||||
with:
|
||||
# ignore the config/.../crd.go file as it's generated binary data that is edited elswhere.
|
||||
# ignore the config/.../crd.go file as it's generated binary data that is edited elsewhere.
|
||||
skip: .git,*.png,*.jpg,*.woff,*.ttf,*.gif,*.ico,./config/crd/v1beta1/crds/crds.go,./config/crd/v1/crds/crds.go,./config/crd/v2alpha1/crds/crds.go,./go.sum,./LICENSE
|
||||
ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin
|
||||
check_filenames: true
|
||||
|
|
|
@ -333,7 +333,7 @@ linters-settings:
|
|||
force-case-trailing-whitespace: 0
|
||||
# Force cuddling of err checks with err var assignment
|
||||
force-err-cuddling: false
|
||||
# Allow leading comments to be separated with empty liens
|
||||
# Allow leading comments to be separated with empty lines
|
||||
allow-separated-leading-comment: false
|
||||
|
||||
linters:
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Clean up leaked CSI snapshot for incomplete backup
|
|
@ -22,6 +22,8 @@ import (
|
|||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/vmware-tanzu/velero/pkg/util/csi"
|
||||
|
||||
jsonpatch "github.com/evanphx/json-patch/v5"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
@ -35,6 +37,8 @@ import (
|
|||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1"
|
||||
|
||||
"github.com/vmware-tanzu/velero/internal/credentials"
|
||||
"github.com/vmware-tanzu/velero/internal/delete"
|
||||
"github.com/vmware-tanzu/velero/internal/volume"
|
||||
|
@ -251,12 +255,18 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
|
|||
}
|
||||
// don't defer CleanupClients here, since it was already called above.
|
||||
|
||||
var errs []string
|
||||
|
||||
if len(actions) > 0 {
|
||||
// Download the tarball
|
||||
backupFile, err := downloadToTempFile(backup.Name, backupStore, log)
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Errorf("Unable to download tarball for backup %s, skipping associated DeleteItemAction plugins", backup.Name)
|
||||
log.Info("Cleaning up CSI volumesnapshots")
|
||||
if err := r.deleteCSIVolumeSnapshots(ctx, backup, log); err != nil {
|
||||
errs = append(errs, err.Error())
|
||||
}
|
||||
} else {
|
||||
defer closeAndRemoveFile(backupFile, r.logger)
|
||||
deleteCtx := &delete.Context{
|
||||
|
@ -279,8 +289,6 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
|
|||
}
|
||||
}
|
||||
|
||||
var errs []string
|
||||
|
||||
if backupStore != nil {
|
||||
log.Info("Removing PV snapshots")
|
||||
|
||||
|
@ -495,6 +503,24 @@ func (r *backupDeletionReconciler) deleteExistingDeletionRequests(ctx context.Co
|
|||
return errs
|
||||
}
|
||||
|
||||
// deleteCSIVolumeSnapshots clean up the CSI snapshots created by the backup, this should be called when the backup is failed
|
||||
// when it's running, e.g. due to velero pod restart, and the backup.tar is failed to be downloaded from storage.
|
||||
func (r *backupDeletionReconciler) deleteCSIVolumeSnapshots(ctx context.Context, backup *velerov1api.Backup, log logrus.FieldLogger) error {
|
||||
vsList := snapshotv1api.VolumeSnapshotList{}
|
||||
if err := r.Client.List(ctx, &vsList, &client.ListOptions{
|
||||
LabelSelector: labels.SelectorFromSet(map[string]string{
|
||||
velerov1api.BackupNameLabel: label.GetValidName(backup.Name),
|
||||
}),
|
||||
}); err != nil {
|
||||
return errors.Wrap(err, "error listing volume snapshots")
|
||||
}
|
||||
for _, item := range vsList.Items {
|
||||
vs := item
|
||||
csi.CleanupVolumeSnapshot(&vs, r.Client, log)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *backupDeletionReconciler) deletePodVolumeSnapshots(ctx context.Context, backup *velerov1api.Backup) []error {
|
||||
if r.repoMgr == nil {
|
||||
return nil
|
||||
|
|
|
@ -25,6 +25,8 @@ import (
|
|||
"reflect"
|
||||
"time"
|
||||
|
||||
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1"
|
||||
|
||||
"context"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
@ -609,7 +611,13 @@ func TestBackupDeletionControllerReconcile(t *testing.T) {
|
|||
Provider: "provider-1",
|
||||
},
|
||||
}
|
||||
td := setupBackupDeletionControllerTest(t, defaultTestDbr(), backup, location, snapshotLocation)
|
||||
|
||||
csiSnapshot := builder.ForVolumeSnapshot("user-ns", "vs-1").ObjectMeta(
|
||||
builder.WithLabelsMap(map[string]string{
|
||||
"velero.io/backup-name": "foo",
|
||||
})).SourcePVC("some-pvc").Result()
|
||||
|
||||
td := setupBackupDeletionControllerTest(t, defaultTestDbr(), backup, location, snapshotLocation, csiSnapshot)
|
||||
td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1")
|
||||
|
||||
snapshots := []*volume.Snapshot{
|
||||
|
@ -654,6 +662,13 @@ func TestBackupDeletionControllerReconcile(t *testing.T) {
|
|||
}, &velerov1api.Backup{})
|
||||
assert.True(t, apierrors.IsNotFound(err), "Expected not found error, but actual value of error: %v", err)
|
||||
|
||||
// leaked CSI snapshot should be deleted
|
||||
err = td.fakeClient.Get(context.TODO(), types.NamespacedName{
|
||||
Namespace: "user-ns",
|
||||
Name: "vs-1",
|
||||
}, &snapshotv1api.VolumeSnapshot{})
|
||||
assert.True(t, apierrors.IsNotFound(err), "Expected not found error for the leaked CSI snapshot, but actual value of error: %v", err)
|
||||
|
||||
// Make sure snapshot was deleted
|
||||
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
|
||||
})
|
||||
|
|
|
@ -490,6 +490,8 @@ func SetVolumeSnapshotContentDeletionPolicy(
|
|||
return crClient.Patch(context.TODO(), vsc, crclient.MergeFrom(originVSC))
|
||||
}
|
||||
|
||||
// CleanupVolumeSnapshot deletes the VolumeSnapshot and the associated VolumeSnapshotContent. It will make sure the
|
||||
// physical snapshot is also deleted.
|
||||
func CleanupVolumeSnapshot(
|
||||
volSnap *snapshotv1api.VolumeSnapshot,
|
||||
crClient crclient.Client,
|
||||
|
@ -528,7 +530,8 @@ func CleanupVolumeSnapshot(
|
|||
}
|
||||
}
|
||||
|
||||
// DeleteVolumeSnapshot handles the VolumeSnapshot instance deletion.
|
||||
// DeleteVolumeSnapshot handles the VolumeSnapshot instance deletion. It will make sure the VolumeSnapshotContent is
|
||||
// recreated so that the physical snapshot is retained.
|
||||
func DeleteVolumeSnapshot(
|
||||
vs snapshotv1api.VolumeSnapshot,
|
||||
vsc snapshotv1api.VolumeSnapshotContent,
|
||||
|
|
Loading…
Reference in New Issue