Clean up leaked CSI snapshot for incomplete backup
This commit makes sure when a backup is deleted the controller will delete the CSI snapshot even when the bakckup tarball is not uploaded. fixes #8160 Signed-off-by: Daniel Jiang <daniel.jiang@broadcom.com>pull/8637/head
parent
5b1738abf8
commit
1c372893ec
|
@ -13,7 +13,7 @@ jobs:
|
||||||
- name: Codespell
|
- name: Codespell
|
||||||
uses: codespell-project/actions-codespell@master
|
uses: codespell-project/actions-codespell@master
|
||||||
with:
|
with:
|
||||||
# ignore the config/.../crd.go file as it's generated binary data that is edited elswhere.
|
# ignore the config/.../crd.go file as it's generated binary data that is edited elsewhere.
|
||||||
skip: .git,*.png,*.jpg,*.woff,*.ttf,*.gif,*.ico,./config/crd/v1beta1/crds/crds.go,./config/crd/v1/crds/crds.go,./config/crd/v2alpha1/crds/crds.go,./go.sum,./LICENSE
|
skip: .git,*.png,*.jpg,*.woff,*.ttf,*.gif,*.ico,./config/crd/v1beta1/crds/crds.go,./config/crd/v1/crds/crds.go,./config/crd/v2alpha1/crds/crds.go,./go.sum,./LICENSE
|
||||||
ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin
|
ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin
|
||||||
check_filenames: true
|
check_filenames: true
|
||||||
|
|
|
@ -333,7 +333,7 @@ linters-settings:
|
||||||
force-case-trailing-whitespace: 0
|
force-case-trailing-whitespace: 0
|
||||||
# Force cuddling of err checks with err var assignment
|
# Force cuddling of err checks with err var assignment
|
||||||
force-err-cuddling: false
|
force-err-cuddling: false
|
||||||
# Allow leading comments to be separated with empty liens
|
# Allow leading comments to be separated with empty lines
|
||||||
allow-separated-leading-comment: false
|
allow-separated-leading-comment: false
|
||||||
|
|
||||||
linters:
|
linters:
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Clean up leaked CSI snapshot for incomplete backup
|
|
@ -22,6 +22,8 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/vmware-tanzu/velero/pkg/util/csi"
|
||||||
|
|
||||||
jsonpatch "github.com/evanphx/json-patch/v5"
|
jsonpatch "github.com/evanphx/json-patch/v5"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
@ -35,6 +37,8 @@ import (
|
||||||
ctrl "sigs.k8s.io/controller-runtime"
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
|
||||||
|
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1"
|
||||||
|
|
||||||
"github.com/vmware-tanzu/velero/internal/credentials"
|
"github.com/vmware-tanzu/velero/internal/credentials"
|
||||||
"github.com/vmware-tanzu/velero/internal/delete"
|
"github.com/vmware-tanzu/velero/internal/delete"
|
||||||
"github.com/vmware-tanzu/velero/internal/volume"
|
"github.com/vmware-tanzu/velero/internal/volume"
|
||||||
|
@ -251,12 +255,18 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
|
||||||
}
|
}
|
||||||
// don't defer CleanupClients here, since it was already called above.
|
// don't defer CleanupClients here, since it was already called above.
|
||||||
|
|
||||||
|
var errs []string
|
||||||
|
|
||||||
if len(actions) > 0 {
|
if len(actions) > 0 {
|
||||||
// Download the tarball
|
// Download the tarball
|
||||||
backupFile, err := downloadToTempFile(backup.Name, backupStore, log)
|
backupFile, err := downloadToTempFile(backup.Name, backupStore, log)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithError(err).Errorf("Unable to download tarball for backup %s, skipping associated DeleteItemAction plugins", backup.Name)
|
log.WithError(err).Errorf("Unable to download tarball for backup %s, skipping associated DeleteItemAction plugins", backup.Name)
|
||||||
|
log.Info("Cleaning up CSI volumesnapshots")
|
||||||
|
if err := r.deleteCSIVolumeSnapshots(ctx, backup, log); err != nil {
|
||||||
|
errs = append(errs, err.Error())
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
defer closeAndRemoveFile(backupFile, r.logger)
|
defer closeAndRemoveFile(backupFile, r.logger)
|
||||||
deleteCtx := &delete.Context{
|
deleteCtx := &delete.Context{
|
||||||
|
@ -279,8 +289,6 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var errs []string
|
|
||||||
|
|
||||||
if backupStore != nil {
|
if backupStore != nil {
|
||||||
log.Info("Removing PV snapshots")
|
log.Info("Removing PV snapshots")
|
||||||
|
|
||||||
|
@ -495,6 +503,24 @@ func (r *backupDeletionReconciler) deleteExistingDeletionRequests(ctx context.Co
|
||||||
return errs
|
return errs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// deleteCSIVolumeSnapshots clean up the CSI snapshots created by the backup, this should be called when the backup is failed
|
||||||
|
// when it's running, e.g. due to velero pod restart, and the backup.tar is failed to be downloaded from storage.
|
||||||
|
func (r *backupDeletionReconciler) deleteCSIVolumeSnapshots(ctx context.Context, backup *velerov1api.Backup, log logrus.FieldLogger) error {
|
||||||
|
vsList := snapshotv1api.VolumeSnapshotList{}
|
||||||
|
if err := r.Client.List(ctx, &vsList, &client.ListOptions{
|
||||||
|
LabelSelector: labels.SelectorFromSet(map[string]string{
|
||||||
|
velerov1api.BackupNameLabel: label.GetValidName(backup.Name),
|
||||||
|
}),
|
||||||
|
}); err != nil {
|
||||||
|
return errors.Wrap(err, "error listing volume snapshots")
|
||||||
|
}
|
||||||
|
for _, item := range vsList.Items {
|
||||||
|
vs := item
|
||||||
|
csi.CleanupVolumeSnapshot(&vs, r.Client, log)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (r *backupDeletionReconciler) deletePodVolumeSnapshots(ctx context.Context, backup *velerov1api.Backup) []error {
|
func (r *backupDeletionReconciler) deletePodVolumeSnapshots(ctx context.Context, backup *velerov1api.Backup) []error {
|
||||||
if r.repoMgr == nil {
|
if r.repoMgr == nil {
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -25,6 +25,8 @@ import (
|
||||||
"reflect"
|
"reflect"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1"
|
||||||
|
|
||||||
"context"
|
"context"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
@ -609,7 +611,13 @@ func TestBackupDeletionControllerReconcile(t *testing.T) {
|
||||||
Provider: "provider-1",
|
Provider: "provider-1",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
td := setupBackupDeletionControllerTest(t, defaultTestDbr(), backup, location, snapshotLocation)
|
|
||||||
|
csiSnapshot := builder.ForVolumeSnapshot("user-ns", "vs-1").ObjectMeta(
|
||||||
|
builder.WithLabelsMap(map[string]string{
|
||||||
|
"velero.io/backup-name": "foo",
|
||||||
|
})).SourcePVC("some-pvc").Result()
|
||||||
|
|
||||||
|
td := setupBackupDeletionControllerTest(t, defaultTestDbr(), backup, location, snapshotLocation, csiSnapshot)
|
||||||
td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1")
|
td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1")
|
||||||
|
|
||||||
snapshots := []*volume.Snapshot{
|
snapshots := []*volume.Snapshot{
|
||||||
|
@ -654,6 +662,13 @@ func TestBackupDeletionControllerReconcile(t *testing.T) {
|
||||||
}, &velerov1api.Backup{})
|
}, &velerov1api.Backup{})
|
||||||
assert.True(t, apierrors.IsNotFound(err), "Expected not found error, but actual value of error: %v", err)
|
assert.True(t, apierrors.IsNotFound(err), "Expected not found error, but actual value of error: %v", err)
|
||||||
|
|
||||||
|
// leaked CSI snapshot should be deleted
|
||||||
|
err = td.fakeClient.Get(context.TODO(), types.NamespacedName{
|
||||||
|
Namespace: "user-ns",
|
||||||
|
Name: "vs-1",
|
||||||
|
}, &snapshotv1api.VolumeSnapshot{})
|
||||||
|
assert.True(t, apierrors.IsNotFound(err), "Expected not found error for the leaked CSI snapshot, but actual value of error: %v", err)
|
||||||
|
|
||||||
// Make sure snapshot was deleted
|
// Make sure snapshot was deleted
|
||||||
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
|
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
|
||||||
})
|
})
|
||||||
|
|
|
@ -490,6 +490,8 @@ func SetVolumeSnapshotContentDeletionPolicy(
|
||||||
return crClient.Patch(context.TODO(), vsc, crclient.MergeFrom(originVSC))
|
return crClient.Patch(context.TODO(), vsc, crclient.MergeFrom(originVSC))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CleanupVolumeSnapshot deletes the VolumeSnapshot and the associated VolumeSnapshotContent. It will make sure the
|
||||||
|
// physical snapshot is also deleted.
|
||||||
func CleanupVolumeSnapshot(
|
func CleanupVolumeSnapshot(
|
||||||
volSnap *snapshotv1api.VolumeSnapshot,
|
volSnap *snapshotv1api.VolumeSnapshot,
|
||||||
crClient crclient.Client,
|
crClient crclient.Client,
|
||||||
|
@ -528,7 +530,8 @@ func CleanupVolumeSnapshot(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteVolumeSnapshot handles the VolumeSnapshot instance deletion.
|
// DeleteVolumeSnapshot handles the VolumeSnapshot instance deletion. It will make sure the VolumeSnapshotContent is
|
||||||
|
// recreated so that the physical snapshot is retained.
|
||||||
func DeleteVolumeSnapshot(
|
func DeleteVolumeSnapshot(
|
||||||
vs snapshotv1api.VolumeSnapshot,
|
vs snapshotv1api.VolumeSnapshot,
|
||||||
vsc snapshotv1api.VolumeSnapshotContent,
|
vsc snapshotv1api.VolumeSnapshotContent,
|
||||||
|
|
Loading…
Reference in New Issue