Delete dangling volumesnapshotcontents from velero backups (#2480)

* Delete dangling volumesnapshotcontents from velero backups

Signed-off-by: Ashish Amarnath <ashisham@vmware.com>

* add changelog

Signed-off-by: Ashish Amarnath <ashisham@vmware.com>

* handle not found errors from VSC delete

Signed-off-by: Ashish Amarnath <ashisham@vmware.com>

* clean up unit tests

Signed-off-by: Ashish Amarnath <ashisham@vmware.com>
pull/2500/head
Ashish Amarnath 2020-05-04 12:41:27 -07:00 committed by GitHub
parent 577af5a5b1
commit 6b5a084f32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 253 additions and 33 deletions

View File

@ -0,0 +1 @@
during backup deletion also delete CSI volumesnapshotcontents that were created as a part of the backup but the associated volumesnapshot object does not exist

View File

@ -709,6 +709,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.sharedInformerFactory.Velero().V1().BackupStorageLocations().Lister(),
s.sharedInformerFactory.Velero().V1().VolumeSnapshotLocations().Lister(),
csiVSLister,
csiVSCLister,
s.csiSnapshotClient,
newPluginManager,
s.metrics,

View File

@ -67,6 +67,7 @@ type backupDeletionController struct {
backupLocationLister velerov1listers.BackupStorageLocationLister
snapshotLocationLister velerov1listers.VolumeSnapshotLocationLister
csiSnapshotLister snapshotv1beta1listers.VolumeSnapshotLister
csiSnapshotContentLister snapshotv1beta1listers.VolumeSnapshotContentLister
csiSnapshotClient *snapshotterClientSet.Clientset
processRequestFunc func(*velerov1api.DeleteBackupRequest) error
clock clock.Clock
@ -89,6 +90,7 @@ func NewBackupDeletionController(
backupLocationLister velerov1listers.BackupStorageLocationLister,
snapshotLocationLister velerov1listers.VolumeSnapshotLocationLister,
csiSnapshotLister snapshotv1beta1listers.VolumeSnapshotLister,
csiSnapshotContentLister snapshotv1beta1listers.VolumeSnapshotContentLister,
csiSnapshotClient *snapshotterClientSet.Clientset,
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager,
metrics *metrics.ServerMetrics,
@ -106,6 +108,7 @@ func NewBackupDeletionController(
backupLocationLister: backupLocationLister,
snapshotLocationLister: snapshotLocationLister,
csiSnapshotLister: csiSnapshotLister,
csiSnapshotContentLister: csiSnapshotContentLister,
csiSnapshotClient: csiSnapshotClient,
metrics: metrics,
// use variables to refer to these functions so they can be
@ -324,7 +327,14 @@ func (c *backupDeletionController) processRequest(req *velerov1api.DeleteBackupR
if features.IsEnabled(velerov1api.CSIFeatureFlag) {
log.Info("Removing CSI volumesnapshots")
if csiErrs := deleteCSIVolumeSnapshots(backup, c.csiSnapshotLister, c.csiSnapshotClient.SnapshotV1beta1(), log); len(csiErrs) > 0 {
if csiErrs := deleteCSIVolumeSnapshots(backup.Name, c.csiSnapshotLister, c.csiSnapshotClient.SnapshotV1beta1(), log); len(csiErrs) > 0 {
for _, err := range csiErrs {
errs = append(errs, err.Error())
}
}
log.Info("Removing CSI volumesnapshotcontents")
if csiErrs := deleteCSIVolumeSnapshotContents(backup.Name, c.csiSnapshotContentLister, c.csiSnapshotClient.SnapshotV1beta1(), log); len(csiErrs) > 0 {
for _, err := range csiErrs {
errs = append(errs, err.Error())
}
@ -461,11 +471,21 @@ func (c *backupDeletionController) deleteResticSnapshots(backup *velerov1api.Bac
return errs
}
func deleteCSIVolumeSnapshots(backup *velerov1api.Backup, csiSnapshotLister snapshotv1beta1listers.VolumeSnapshotLister,
func setVolumeSnapshotContentDeletionPolicy(vscName string, csiClient snapshotter.SnapshotV1beta1Interface, log *logrus.Entry) error {
log.Infof("Setting DeletionPolicy of CSI volumesnapshotcontent %s to Delete", vscName)
pb := []byte(`{"spec":{"deletionPolicy":"Delete"}}`)
_, err := csiClient.VolumeSnapshotContents().Patch(vscName, types.MergePatchType, pb)
if err != nil {
return err
}
return nil
}
func deleteCSIVolumeSnapshots(backupName string, csiSnapshotLister snapshotv1beta1listers.VolumeSnapshotLister,
csiClient snapshotter.SnapshotV1beta1Interface, log *logrus.Entry) []error {
errs := []error{}
selector := labels.SelectorFromSet(map[string]string{velerov1api.BackupNameLabel: backup.Name})
selector := labels.SelectorFromSet(map[string]string{velerov1api.BackupNameLabel: backupName})
csiVolSnaps, err := csiSnapshotLister.List(selector)
if err != nil {
return []error{err}
@ -477,12 +497,10 @@ func deleteCSIVolumeSnapshots(backup *velerov1api.Backup, csiSnapshotLister snap
if csiVS.Status != nil && csiVS.Status.BoundVolumeSnapshotContentName != nil {
// we patch the DeletionPolicy of the volumesnapshotcontent to set it to Delete.
// This ensures that the volume snapshot in the storage provider is also deleted.
log.Infof("Setting DeletionPolicy of CSI volumesnapshotcontent %s to Delete", *csiVS.Status.BoundVolumeSnapshotContentName)
pb := []byte(`{"spec":{"deletionPolicy":"Delete"}}`)
_, err := csiClient.VolumeSnapshotContents().Patch(*csiVS.Status.BoundVolumeSnapshotContentName, types.MergePatchType, pb)
err := setVolumeSnapshotContentDeletionPolicy(*csiVS.Status.BoundVolumeSnapshotContentName, csiClient, log)
if err != nil && !apierrors.IsNotFound(err) {
errs = append(errs, errors.Wrapf(err, "failed to update deletion policy on volumesnapshotcontent %s skipping deletion of volumesnapshot %s/%s",
*csiVS.Status.BoundVolumeSnapshotContentName, csiVS.Namespace, csiVS.Name))
log.Errorf("Skipping deletion of volumesnapshot %s/%s", csiVS.Namespace, csiVS.Name)
errs = append(errs, err)
continue
}
}
@ -494,6 +512,41 @@ func deleteCSIVolumeSnapshots(backup *velerov1api.Backup, csiSnapshotLister snap
return errs
}
func deleteCSIVolumeSnapshotContents(backupName string, csiVSCLister snapshotv1beta1listers.VolumeSnapshotContentLister,
csiClient snapshotter.SnapshotV1beta1Interface, log *logrus.Entry) []error {
errs := []error{}
selector := labels.SelectorFromSet(map[string]string{velerov1api.BackupNameLabel: backupName})
csiVolSnapConts, err := csiVSCLister.List(selector)
if err != nil {
return []error{err}
}
// It is possible that by the time deleteCSIVolumeSnapshotContents is called after deleteCSIVolumeSnapshots
// that deletion of VSCs hasn't been completed, by the snapshot-controller (one of the CSI components).
// For that reason the csiVSCLister returned VSCs that are yet to be deleted. To handle this scenario,
// we swallow `IsNotFound` errors from the setVolumeSnapshotContentDeletionPolicy function and the
// csiClient.VolumeSnapshotContents().Delete(...)
log.Infof("Deleting %d CSI volumesnapshotcontents", len(csiVolSnapConts))
for _, snapCont := range csiVolSnapConts {
err := setVolumeSnapshotContentDeletionPolicy(snapCont.Name, csiClient, log)
if err != nil && !apierrors.IsNotFound(err) {
log.Errorf("Failed to set DeletionPolicy on volumesnapshotcontent %s. Skipping deletion", snapCont.Name)
errs = append(errs, err)
continue
}
if apierrors.IsNotFound(err) {
log.Infof("volumesnapshotcontent %s not found", snapCont.Name)
continue
}
log.Infof("Deleting volumesnapshotcontent %s", snapCont.Name)
err = csiClient.VolumeSnapshotContents().Delete(snapCont.Name, &metav1.DeleteOptions{})
if err != nil && !apierrors.IsNotFound(err) {
errs = append(errs, err)
}
}
return errs
}
const deleteBackupRequestMaxAge = 24 * time.Hour
func (c *backupDeletionController) deleteExpiredRequests() {

View File

@ -66,6 +66,7 @@ func TestBackupDeletionControllerProcessQueueItem(t *testing.T) {
sharedInformers.Velero().V1().BackupStorageLocations().Lister(),
sharedInformers.Velero().V1().VolumeSnapshotLocations().Lister(),
nil, // csiSnapshotLister
nil, // csiSnapshotContentLister
nil, // csiSnapshotClient
nil, // new plugin manager func
metrics.NewServerMetrics(),
@ -158,6 +159,7 @@ func setupBackupDeletionControllerTest(objects ...runtime.Object) *backupDeletio
sharedInformers.Velero().V1().BackupStorageLocations().Lister(),
sharedInformers.Velero().V1().VolumeSnapshotLocations().Lister(),
nil, // csiSnapshotLister
nil, // csiSnapshotContentLister
nil, // csiSnapshotClient
func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager },
metrics.NewServerMetrics(),
@ -858,6 +860,7 @@ func TestBackupDeletionControllerDeleteExpiredRequests(t *testing.T) {
sharedInformers.Velero().V1().BackupStorageLocations().Lister(),
sharedInformers.Velero().V1().VolumeSnapshotLocations().Lister(),
nil, // csiSnapshotLister
nil, // csiSnapshotContentLister
nil, // csiSnapshotClient
nil, // new plugin manager func
metrics.NewServerMetrics(),
@ -883,6 +886,85 @@ func TestBackupDeletionControllerDeleteExpiredRequests(t *testing.T) {
}
}
func TestSetVolumeSnapshotContentDeletionPolicy(t *testing.T) {
testCases := []struct {
name string
inputVSCName string
objs []runtime.Object
expectError bool
}{
{
name: "should update DeletionPolicy of a VSC from retain to delete",
inputVSCName: "retainVSC",
objs: []runtime.Object{
&snapshotv1beta1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "retainVSC",
},
Spec: snapshotv1beta1api.VolumeSnapshotContentSpec{
DeletionPolicy: snapshotv1beta1api.VolumeSnapshotContentRetain,
},
},
},
expectError: false,
},
{
name: "should be a no-op updating if DeletionPolicy of a VSC is already Delete",
inputVSCName: "deleteVSC",
objs: []runtime.Object{
&snapshotv1beta1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "deleteVSC",
},
Spec: snapshotv1beta1api.VolumeSnapshotContentSpec{
DeletionPolicy: snapshotv1beta1api.VolumeSnapshotContentDelete,
},
},
},
expectError: false,
},
{
name: "should update DeletionPolicy of a VSC with no DeletionPolicy",
inputVSCName: "nothingVSC",
objs: []runtime.Object{
&snapshotv1beta1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "nothingVSC",
},
Spec: snapshotv1beta1api.VolumeSnapshotContentSpec{},
},
},
expectError: false,
},
{
name: "should return not found error if supplied VSC does not exist",
inputVSCName: "does-not-exist",
objs: []runtime.Object{},
expectError: true,
},
}
log := velerotest.NewLogger().WithFields(
logrus.Fields{
"unit-test": "unit-test",
},
)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
fakeClient := snapshotFake.NewSimpleClientset(tc.objs...)
err := setVolumeSnapshotContentDeletionPolicy(tc.inputVSCName, fakeClient.SnapshotV1beta1(), log)
if tc.expectError {
assert.NotNil(t, err)
} else {
assert.Nil(t, err)
actual, err := fakeClient.SnapshotV1beta1().VolumeSnapshotContents().Get(tc.inputVSCName, metav1.GetOptions{})
assert.Nil(t, err)
assert.Equal(t, snapshotv1beta1api.VolumeSnapshotContentDelete, actual.Spec.DeletionPolicy)
}
})
}
}
func TestDeleteCSIVolumeSnapshots(t *testing.T) {
//Backup1
ns1VS1VSCName := "ns1vs1vsc"
@ -1014,47 +1096,130 @@ func TestDeleteCSIVolumeSnapshots(t *testing.T) {
},
}
objs := []runtime.Object{&ns1VS1, &ns1VS1VSC, &ns1VS2, &ns1VS2VSC, &ns2VS1, &ns2VS1VSC, &ns2VS2, &ns2VS2VSC, &ns1NilStatusVS, &ns1NilBoundVSCVS, &ns1NonExistentVSCVS}
fakeClient := snapshotFake.NewSimpleClientset(objs...)
fakeSharedInformer := snapshotv1beta1informers.NewSharedInformerFactoryWithOptions(fakeClient, 0)
for _, o := range objs {
fakeSharedInformer.Snapshot().V1beta1().VolumeSnapshots().Informer().GetStore().Add(o)
}
testCases := []struct {
name string
backup *velerov1.Backup
name string
backupName string
objs []runtime.Object
}{
{
name: "should delete volumesnapshots bound to existing volumesnapshotcontent",
backup: builder.ForBackup("velero", "backup1").Result(),
name: "should delete volumesnapshots bound to existing volumesnapshotcontent",
backupName: "backup1",
objs: []runtime.Object{&ns1VS1VSC, &ns1VS1, &ns1VS2VSC, &ns1VS2, &ns2VS1VSC, &ns2VS1, &ns2VS2VSC, &ns2VS2},
},
{
name: "should delete volumesnapshots with nil status",
backup: builder.ForBackup("velero", "backup2").Result(),
name: "should delete volumesnapshots with nil status",
backupName: "backup2",
objs: []runtime.Object{&ns1NilStatusVS},
},
{
name: "should delete volumesnapshots with nil BoundVolumeSnapshotContentName",
backup: builder.ForBackup("velero", "backup3").Result(),
name: "should delete volumesnapshots with nil BoundVolumeSnapshotContentName",
backupName: "backup3",
objs: []runtime.Object{&ns1NilBoundVSCVS},
},
{
name: "should delete volumesnapshots bound to non-existent volumesnapshotcontents",
backup: builder.ForBackup("velero", "backup4").Result(),
name: "should delete volumesnapshots bound to non-existent volumesnapshotcontents",
backupName: "backup4",
objs: []runtime.Object{&ns1NonExistentVSCVS},
},
{
name: "should be a no-op when there are no volumesnapshots to delete",
backup: builder.ForBackup("velero", "backup-no-vs").Result(),
name: "should be a no-op when there are no volumesnapshots to delete",
backupName: "backup-no-vs",
objs: []runtime.Object{},
},
}
log := velerotest.NewLogger().WithFields(
logrus.Fields{
"unit-test": "unit-test",
},
)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
log := velerotest.NewLogger().WithFields(
logrus.Fields{
"unit-test": "unit-test",
},
)
errs := deleteCSIVolumeSnapshots(tc.backup, fakeSharedInformer.Snapshot().V1beta1().VolumeSnapshots().Lister(), fakeClient.SnapshotV1beta1(), log)
fakeClient := snapshotFake.NewSimpleClientset(tc.objs...)
fakeSharedInformer := snapshotv1beta1informers.NewSharedInformerFactoryWithOptions(fakeClient, 0)
for _, o := range tc.objs {
fakeSharedInformer.Snapshot().V1beta1().VolumeSnapshots().Informer().GetStore().Add(o)
}
errs := deleteCSIVolumeSnapshots(tc.backupName, fakeSharedInformer.Snapshot().V1beta1().VolumeSnapshots().Lister(), fakeClient.SnapshotV1beta1(), log)
assert.Empty(t, errs)
})
}
}
func TestDeleteCSIVolumeSnapshotContents(t *testing.T) {
retainVSC := snapshotv1beta1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "retainVSC",
Labels: map[string]string{
velerov1.BackupNameLabel: "backup1",
},
},
Spec: snapshotv1beta1api.VolumeSnapshotContentSpec{
DeletionPolicy: snapshotv1beta1api.VolumeSnapshotContentRetain,
},
}
deleteVSC := snapshotv1beta1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "deleteVSC",
Labels: map[string]string{
velerov1.BackupNameLabel: "backup2",
},
},
Spec: snapshotv1beta1api.VolumeSnapshotContentSpec{
DeletionPolicy: snapshotv1beta1api.VolumeSnapshotContentDelete,
},
}
nothingVSC := snapshotv1beta1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "nothingVSC",
Labels: map[string]string{
velerov1.BackupNameLabel: "backup3",
},
},
Spec: snapshotv1beta1api.VolumeSnapshotContentSpec{},
}
testCases := []struct {
name string
backupName string
objs []runtime.Object
}{
{
name: "should delete volumesnapshotcontent with DeletionPolicy Retain",
backupName: "backup1",
objs: []runtime.Object{&retainVSC},
},
{
name: "should delete volumesnapshotcontent with DeletionPolicy Delete",
backupName: "backup3",
objs: []runtime.Object{&deleteVSC},
},
{
name: "should delete volumesnapshotcontent with No DeletionPolicy",
backupName: "backup3",
objs: []runtime.Object{&nothingVSC},
},
{
name: "should return no error when backup has no volumesnapshotconents",
backupName: "backup-with-no-vsc",
objs: []runtime.Object{},
},
}
log := velerotest.NewLogger().WithFields(
logrus.Fields{
"unit-test": "unit-test",
},
)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
fakeClient := snapshotFake.NewSimpleClientset(tc.objs...)
fakeSharedInformer := snapshotv1beta1informers.NewSharedInformerFactoryWithOptions(fakeClient, 0)
for _, o := range tc.objs {
fakeSharedInformer.Snapshot().V1beta1().VolumeSnapshotContents().Informer().GetStore().Add(o)
}
errs := deleteCSIVolumeSnapshotContents(tc.backupName, fakeSharedInformer.Snapshot().V1beta1().VolumeSnapshotContents().Lister(), fakeClient.SnapshotV1beta1(), log)
assert.Empty(t, errs)
})
}