From 6b7dd12bf76c2fbfe5a3908ad385cb199a8100ad Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Fri, 14 Feb 2025 14:20:03 +0800 Subject: [PATCH] Modify VS and VSC restore actions. Signed-off-by: Xun Jiang --- design/clean_artifacts_in_csi_flow.md | 181 +++++++++++------- .../csi/volumesnapshotcontent_action.go | 8 + .../csi/volumesnapshotcontent_action_test.go | 52 +++++ internal/volume/volumes_information.go | 1 + pkg/backup/actions/csi/pvc_action_test.go | 4 +- .../actions/csi/volumesnapshot_action.go | 5 +- .../volume_snapshot_content_builder.go | 4 +- pkg/cmd/server/plugin/plugin.go | 8 +- pkg/constant/constant.go | 3 +- .../restore_finalizer_controller.go | 20 +- .../restore_finalizer_controller_test.go | 8 +- pkg/restore/actions/csi/pvc_action.go | 19 +- pkg/restore/actions/csi/pvc_action_test.go | 16 +- .../actions/csi/volumesnapshot_action.go | 105 ++-------- .../actions/csi/volumesnapshot_action_test.go | 92 ++++----- .../csi/volumesnapshotcontent_action.go | 78 +++++++- .../csi/volumesnapshotcontent_action_test.go | 95 ++++++++- pkg/util/csi/volume_snapshot.go | 20 +- pkg/util/csi/volume_snapshot_test.go | 2 +- pkg/util/util.go | 12 ++ site/content/docs/main/csi.md | 4 +- 21 files changed, 448 insertions(+), 289 deletions(-) diff --git a/design/clean_artifacts_in_csi_flow.md b/design/clean_artifacts_in_csi_flow.md index 5fd4c2a8c..2d42bb2a9 100644 --- a/design/clean_artifacts_in_csi_flow.md +++ b/design/clean_artifacts_in_csi_flow.md @@ -95,7 +95,7 @@ func DeleteReadyVolumeSnapshot( ### Restore -#### Restore the VolumeSnapshotContent from the backup instead of creating a new one dynamically +#### Restore the VolumeSnapshotContent The current behavior of VSC restoration is that the VSC from the backup is restore, and the restored VS also triggers creating a new VSC dynamically. Two VSCs created for the same VS in one restore seems not right. @@ -106,6 +106,16 @@ If the `SkipRestore` is set true in the restore action's result, the secret retu As a result, restore the VSC from the backup, and setup the VSC and the VS's relation is a better choice. +Another consideration is the VSC name should not be the same as the backed-up VSC's, because the older version Velero's restore and backup keep the VSC after completion. + +There's high possibility that the restore will fail due to the VSC already exists in the cluster. + +Multiple restores of the same backup will also meet the same problem. + +The proposed solution is using the restore's UID and the VS's name to generate sha256 hash value as the new VSC name. Both the VS and VSC RestoreItemAction can access those UIDs, and it will avoid the conflicts issues. + +The restored VS name also shares the same generated name. + The VS-referenced VSC name and the VSC's snapshot handle name are in their status. Velero restore process purges the restore resources' metadata and status before running the RestoreItemActions. @@ -114,6 +124,78 @@ As a result, we cannot read these information in the VS and VSC RestoreItemActio Fortunately, RestoreItemAction input parameters includes the `ItemFromBackup`. The status is intact in `ItemFromBackup`. +``` go +func (p *volumeSnapshotRestoreItemAction) Execute( + input *velero.RestoreItemActionExecuteInput, +) (*velero.RestoreItemActionExecuteOutput, error) { + p.log.Info("Starting VolumeSnapshotRestoreItemAction") + + if boolptr.IsSetToFalse(input.Restore.Spec.RestorePVs) { + p.log.Infof("Restore %s/%s did not request for PVs to be restored.", + input.Restore.Namespace, input.Restore.Name) + return &velero.RestoreItemActionExecuteOutput{SkipRestore: true}, nil + } + + var vs snapshotv1api.VolumeSnapshot + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.Item.UnstructuredContent(), &vs); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Wrapf(err, "failed to convert input.Item from unstructured") + } + + var vsFromBackup snapshotv1api.VolumeSnapshot + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.ItemFromBackup.UnstructuredContent(), &vsFromBackup); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Wrapf(err, "failed to convert input.Item from unstructured") + } + + // If cross-namespace restore is configured, change the namespace + // for VolumeSnapshot object to be restored + newNamespace, ok := input.Restore.Spec.NamespaceMapping[vs.GetNamespace()] + if !ok { + // Use original namespace + newNamespace = vs.Namespace + } + + if csiutil.IsVolumeSnapshotExists(newNamespace, vs.Name, p.crClient) { + p.log.Debugf("VolumeSnapshot %s already exists in the cluster. Return without change.", vs.Namespace+"/"+vs.Name) + return &velero.RestoreItemActionExecuteOutput{UpdatedItem: input.Item}, nil + } + + newVSCName := generateSha256FromRestoreAndVsUID(string(input.Restore.UID), string(vsFromBackup.UID)) + // Reset Spec to convert the VolumeSnapshot from using + // the dynamic VolumeSnapshotContent to the static one. + resetVolumeSnapshotSpecForRestore(&vs, &newVSCName) + + // Reset VolumeSnapshot annotation. By now, only change + // DeletionPolicy to Retain. + resetVolumeSnapshotAnnotation(&vs) + + vsMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vs) + if err != nil { + p.log.Errorf("Fail to convert VS %s to unstructured", vs.Namespace+"/"+vs.Name) + return nil, errors.WithStack(err) + } + + p.log.Infof(`Returning from VolumeSnapshotRestoreItemAction with + no additionalItems`) + + return &velero.RestoreItemActionExecuteOutput{ + UpdatedItem: &unstructured.Unstructured{Object: vsMap}, + AdditionalItems: []velero.ResourceIdentifier{}, + }, nil +} + +// generateSha256FromRestoreAndVsUID Use the restore UID and the VS UID to generate the new VSC name. +// By this way, VS and VSC RIA action can get the same VSC name. +func generateSha256FromRestoreAndVsUID(restoreUID string, vsUID string) string { + sha256Bytes := sha256.Sum256([]byte(restoreUID + "/" + vsUID)) + return "vsc-" + hex.EncodeToString(sha256Bytes[:]) +} +``` + +#### Restore the VolumeSnapshot ``` go // Execute restores a VolumeSnapshotContent object without modification // returning the snapshot lister secret, if any, as additional items to restore. @@ -128,9 +210,9 @@ func (p *volumeSnapshotContentRestoreItemAction) Execute( p.log.Info("Starting VolumeSnapshotContentRestoreItemAction") - var snapCont snapshotv1api.VolumeSnapshotContent + var vsc snapshotv1api.VolumeSnapshotContent if err := runtime.DefaultUnstructuredConverter.FromUnstructured( - input.Item.UnstructuredContent(), &snapCont); err != nil { + input.Item.UnstructuredContent(), &vsc); err != nil { return &velero.RestoreItemActionExecuteOutput{}, errors.Wrapf(err, "failed to convert input.Item from unstructured") } @@ -144,39 +226,42 @@ func (p *volumeSnapshotContentRestoreItemAction) Execute( // If cross-namespace restore is configured, change the namespace // for VolumeSnapshot object to be restored - newNamespace, ok := input.Restore.Spec.NamespaceMapping[snapCont.Spec.VolumeSnapshotRef.Namespace] + newNamespace, ok := input.Restore.Spec.NamespaceMapping[vsc.Spec.VolumeSnapshotRef.Namespace] if ok { // Update the referenced VS namespace to the mapping one. - snapCont.Spec.VolumeSnapshotRef.Namespace = newNamespace + vsc.Spec.VolumeSnapshotRef.Namespace = newNamespace } + // Reset VSC name to align with VS. + vsc.Name = generateSha256FromRestoreAndVsUID(string(input.Restore.UID), string(vscFromBackup.Spec.VolumeSnapshotRef.UID)) + // Reset the ResourceVersion and UID of referenced VolumeSnapshot. - snapCont.Spec.VolumeSnapshotRef.ResourceVersion = "" - snapCont.Spec.VolumeSnapshotRef.UID = "" + vsc.Spec.VolumeSnapshotRef.ResourceVersion = "" + vsc.Spec.VolumeSnapshotRef.UID = "" // Set the DeletionPolicy to Retain to avoid VS deletion will not trigger snapshot deletion - snapCont.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentRetain + vsc.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentRetain if vscFromBackup.Status != nil && vscFromBackup.Status.SnapshotHandle != nil { - snapCont.Spec.Source.VolumeHandle = nil - snapCont.Spec.Source.SnapshotHandle = vscFromBackup.Status.SnapshotHandle + vsc.Spec.Source.VolumeHandle = nil + vsc.Spec.Source.SnapshotHandle = vscFromBackup.Status.SnapshotHandle } else { - p.log.Errorf("fail to get snapshot handle from VSC %s status", snapCont.Name) - return nil, errors.Errorf("fail to get snapshot handle from VSC %s status", snapCont.Name) + p.log.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) + return nil, errors.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) } additionalItems := []velero.ResourceIdentifier{} - if csi.IsVolumeSnapshotContentHasDeleteSecret(&snapCont) { + if csi.IsVolumeSnapshotContentHasDeleteSecret(&vsc) { additionalItems = append(additionalItems, velero.ResourceIdentifier{ GroupResource: schema.GroupResource{Group: "", Resource: "secrets"}, - Name: snapCont.Annotations[velerov1api.PrefixedSecretNameAnnotation], - Namespace: snapCont.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], + Name: vsc.Annotations[velerov1api.PrefixedSecretNameAnnotation], + Namespace: vsc.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], }, ) } - vscMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&snapCont) + vscMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vsc) if err != nil { return nil, errors.WithStack(err) } @@ -190,61 +275,6 @@ func (p *volumeSnapshotContentRestoreItemAction) Execute( } ``` -Because VSC is not restored dynamically, if we run restores two times for the same backup in the same cluster, the second restore will fail due to the VSC is there in the cluster, and it is already bound to an existing VS. - -To avoid this issue, it's better to delete the restored VS and VSC after restore completes. - -The VolumeSnapshot and VolumeSnapshotContent are delete in the `restore_finalizer_controller`. - -``` go -func (ctx *finalizerContext) deleteVolumeSnapshotAndVolumeSnapshotContent() (errs results.Result) { - for _, operation := range ctx.restoreItemOperationList.items { - if operation.Spec.RestoreItemAction == constant.PluginCsiVolumeSnapshotRestoreRIA && - operation.Status.Phase == itemoperation.OperationPhaseCompleted { - if operation.Spec.OperationID == "" || !strings.Contains(operation.Spec.OperationID, "/") { - ctx.logger.Errorf("invalid OperationID: %s", operation.Spec.OperationID) - errs.Add("", errors.Errorf("invalid OperationID: %s", operation.Spec.OperationID)) - continue - } - - operationIDParts := strings.Split(operation.Spec.OperationID, "/") - - vs := new(snapshotv1api.VolumeSnapshot) - vsc := new(snapshotv1api.VolumeSnapshotContent) - - if err := ctx.crClient.Get( - context.TODO(), - client.ObjectKey{Namespace: operationIDParts[0], Name: operationIDParts[1]}, - vs, - ); err != nil { - ctx.logger.Errorf("Fail to get the VolumeSnapshot %s: %s", operation.Spec.OperationID, err.Error()) - errs.Add(operationIDParts[0], errors.Errorf("Fail to get the VolumeSnapshot %s: %s", operation.Spec.OperationID, err.Error())) - continue - } - - if err := ctx.crClient.Delete(context.TODO(), vs); err != nil { - ctx.logger.Errorf("Fail to delete VolumeSnapshot %s: %s", operation.Spec.OperationID, err.Error()) - errs.Add(vs.Namespace, err) - } - - if vs.Status != nil && vs.Status.BoundVolumeSnapshotContentName != nil { - vsc.Name = *vs.Status.BoundVolumeSnapshotContentName - } else { - ctx.logger.Errorf("VolumeSnapshotContent %s is not ready.", vsc.Name) - errs.Add("", errors.Errorf("VolumeSnapshotContent %s is not ready.", vsc.Name)) - continue - } - - if err := ctx.crClient.Delete(context.TODO(), vsc); err != nil { - ctx.logger.Errorf("Fail to delete VolumeSnapshotContent %s: %s", vsc.Name, err.Error()) - errs.Add("", errors.Errorf("Fail to delete the VolumeSnapshotContent %s", err)) - } - } - } - - return errs -} -``` ### Backup Sync csi-volumesnapshotclasses.json, csi-volumesnapshotcontents.json, and csi-volumesnapshots.json are CSI-related metadata files in the BSL for each backup. @@ -266,8 +296,17 @@ For the VSC DeleteItemAction, we need to generate a VSC. Because we only care ab Create a static VSC, then point it to a pseudo VS, and reference to the snapshot handle should be enough. +To avoid the created VSC conflict with older version Velero B/R generated ones, the VSC name is set to `vsc-uuid`. + The following is an example of the implementation. ``` go + uuid, err := uuid.NewRandom() + if err != nil { + p.log.WithError(err).Errorf("Fail to generate the UUID to create VSC %s", snapCont.Name) + return errors.Wrapf(err, "Fail to generate the UUID to create VSC %s", snapCont.Name) + } + snapCont.Name = "vsc-" + uuid.String() + snapCont.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentDelete snapCont.Spec.Source = snapshotv1api.VolumeSnapshotContentSource{ diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action.go b/internal/delete/actions/csi/volumesnapshotcontent_action.go index 641513eef..8ac7d4d2a 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action.go @@ -20,6 +20,7 @@ import ( "context" "time" + "github.com/google/uuid" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -80,6 +81,13 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( p.log.Infof("Deleting VolumeSnapshotContent %s", snapCont.Name) + uuid, err := uuid.NewRandom() + if err != nil { + p.log.WithError(err).Errorf("Fail to generate the UUID to create VSC %s", snapCont.Name) + return errors.Wrapf(err, "Fail to generate the UUID to create VSC %s", snapCont.Name) + } + snapCont.Name = "vsc-" + uuid.String() + snapCont.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentDelete snapCont.Spec.Source = snapshotv1api.VolumeSnapshotContentSource{ diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go index 805c1b82e..223b84a83 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go @@ -25,6 +25,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" crclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -156,3 +157,54 @@ func TestNewVolumeSnapshotContentDeleteItemAction(t *testing.T) { _, err1 := plugin1(logger) require.NoError(t, err1) } + +func TestCheckVSCReadiness(t *testing.T) { + tests := []struct { + name string + vsc *snapshotv1api.VolumeSnapshotContent + createVSC bool + expectErr bool + ready bool + }{ + { + name: "VSC not exist", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vsc-1", + Namespace: "velero", + }, + }, + createVSC: false, + expectErr: true, + ready: false, + }, + { + name: "VSC not ready", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vsc-1", + Namespace: "velero", + }, + }, + createVSC: true, + expectErr: false, + ready: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx := context.TODO() + crClient := velerotest.NewFakeControllerRuntimeClient(t) + if test.createVSC { + require.NoError(t, crClient.Create(ctx, test.vsc)) + } + + ready, err := checkVSCReadiness(ctx, test.vsc, crClient) + require.Equal(t, test.ready, ready) + if test.expectErr { + require.Error(t, err) + } + }) + } +} diff --git a/internal/volume/volumes_information.go b/internal/volume/volumes_information.go index a89806ee0..d0c70a2b8 100644 --- a/internal/volume/volumes_information.go +++ b/internal/volume/volumes_information.go @@ -838,6 +838,7 @@ func (t *RestoreVolumeInfoTracker) Result() []*RestoreVolumeInfo { if csiSnapshot.Spec.Source.VolumeSnapshotContentName != nil { vscName = *csiSnapshot.Spec.Source.VolumeSnapshotContentName } + volumeInfo := &RestoreVolumeInfo{ PVCNamespace: pvcNS, PVCName: pvcName, diff --git a/pkg/backup/actions/csi/pvc_action_test.go b/pkg/backup/actions/csi/pvc_action_test.go index 59da6145f..5af6e2f33 100644 --- a/pkg/backup/actions/csi/pvc_action_test.go +++ b/pkg/backup/actions/csi/pvc_action_test.go @@ -124,8 +124,8 @@ func TestExecute(t *testing.T) { operationID: ".", expectedErr: nil, expectedPVC: builder.ForPersistentVolumeClaim("velero", "testPVC"). - ObjectMeta(builder.WithAnnotations(velerov1api.MustIncludeAdditionalItemAnnotation, "true", velerov1api.DataUploadNameAnnotation, "velero/", velerov1api.VolumeSnapshotLabel, ""), - builder.WithLabels(velerov1api.BackupNameLabel, "test", velerov1api.VolumeSnapshotLabel, "")). + ObjectMeta(builder.WithAnnotations(velerov1api.MustIncludeAdditionalItemAnnotation, "true", velerov1api.DataUploadNameAnnotation, "velero/"), + builder.WithLabels(velerov1api.BackupNameLabel, "test")). VolumeName("testPV").StorageClass("testSC").Phase(corev1.ClaimBound).Result(), }, { diff --git a/pkg/backup/actions/csi/volumesnapshot_action.go b/pkg/backup/actions/csi/volumesnapshot_action.go index 244785c0d..7a3bc79d7 100644 --- a/pkg/backup/actions/csi/volumesnapshot_action.go +++ b/pkg/backup/actions/csi/volumesnapshot_action.go @@ -131,13 +131,13 @@ func (p *volumeSnapshotBackupItemAction) Execute( backup.Status.Phase == velerov1api.BackupPhaseFinalizingPartiallyFailed { p.log. WithField("Backup", fmt.Sprintf("%s/%s", backup.Namespace, backup.Name)). - WithField("BackupPhase", backup.Status.Phase).Debugf("Clean VolumeSnapshots.") + WithField("BackupPhase", backup.Status.Phase).Debugf("Cleaning VolumeSnapshots.") if vsc == nil { vsc = &snapshotv1api.VolumeSnapshotContent{} } - csi.DeleteVolumeSnapshot(*vs, *vsc, p.crClient, p.log) + csi.DeleteReadyVolumeSnapshot(*vs, *vsc, p.crClient, p.log) return item, nil, "", nil, nil } @@ -164,6 +164,7 @@ func (p *volumeSnapshotBackupItemAction) Execute( annotations[velerov1api.VolumeSnapshotHandleAnnotation] = *vsc.Status.SnapshotHandle annotations[velerov1api.DriverNameAnnotation] = vsc.Spec.Driver } + if vsc.Status.RestoreSize != nil { annotations[velerov1api.VolumeSnapshotRestoreSize] = resource.NewQuantity( *vsc.Status.RestoreSize, resource.BinarySI).String() diff --git a/pkg/builder/volume_snapshot_content_builder.go b/pkg/builder/volume_snapshot_content_builder.go index 3f8567e7e..00c7dbdb8 100644 --- a/pkg/builder/volume_snapshot_content_builder.go +++ b/pkg/builder/volume_snapshot_content_builder.go @@ -20,6 +20,7 @@ import ( snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ) // VolumeSnapshotContentBuilder builds VolumeSnapshotContent object. @@ -60,12 +61,13 @@ func (v *VolumeSnapshotContentBuilder) DeletionPolicy(policy snapshotv1api.Delet } // VolumeSnapshotRef sets the built VolumeSnapshotContent's spec.VolumeSnapshotRef value. -func (v *VolumeSnapshotContentBuilder) VolumeSnapshotRef(namespace, name string) *VolumeSnapshotContentBuilder { +func (v *VolumeSnapshotContentBuilder) VolumeSnapshotRef(namespace, name, uid string) *VolumeSnapshotContentBuilder { v.object.Spec.VolumeSnapshotRef = v1.ObjectReference{ APIVersion: "snapshot.storage.k8s.io/v1", Kind: "VolumeSnapshot", Namespace: namespace, Name: name, + UID: types.UID(uid), } return v } diff --git a/pkg/cmd/server/plugin/plugin.go b/pkg/cmd/server/plugin/plugin.go index 1c0f0bc7d..d25fb54f8 100644 --- a/pkg/cmd/server/plugin/plugin.go +++ b/pkg/cmd/server/plugin/plugin.go @@ -163,12 +163,12 @@ func NewCommand(f client.Factory) *cobra.Command { newPvcRestoreItemAction(f), ). RegisterRestoreItemActionV2( - "velero.io/csi-volumesnapshot-restorer", + constant.PluginCsiVolumeSnapshotRestoreRIA, newVolumeSnapshotRestoreItemAction(f), ). RegisterRestoreItemActionV2( "velero.io/csi-volumesnapshotcontent-restorer", - newVolumeSnapshotContentRestoreItemAction, + newVolumeSnapshotContentRestoreItemAction(f), ). RegisterRestoreItemActionV2( "velero.io/csi-volumesnapshotclass-restorer", @@ -442,8 +442,8 @@ func newVolumeSnapshotRestoreItemAction(f client.Factory) plugincommon.HandlerIn return csiria.NewVolumeSnapshotRestoreItemAction(f) } -func newVolumeSnapshotContentRestoreItemAction(logger logrus.FieldLogger) (any, error) { - return csiria.NewVolumeSnapshotContentRestoreItemAction(logger) +func newVolumeSnapshotContentRestoreItemAction(f client.Factory) plugincommon.HandlerInitializer { + return csiria.NewVolumeSnapshotContentRestoreItemAction(f) } func newVolumeSnapshotClassRestoreItemAction(logger logrus.FieldLogger) (any, error) { diff --git a/pkg/constant/constant.go b/pkg/constant/constant.go index dc2c0e558..cfadcc5dc 100644 --- a/pkg/constant/constant.go +++ b/pkg/constant/constant.go @@ -20,5 +20,6 @@ const ( ControllerServerStatusRequest = "server-status-request" ControllerRestoreFinalizer = "restore-finalizer" - PluginCSIPVCRestoreRIA = "velero.io/csi-pvc-restorer" + PluginCSIPVCRestoreRIA = "velero.io/csi-pvc-restorer" + PluginCsiVolumeSnapshotRestoreRIA = "velero.io/csi-volumesnapshot-restorer" ) diff --git a/pkg/controller/restore_finalizer_controller.go b/pkg/controller/restore_finalizer_controller.go index 6ca80d575..d06849207 100644 --- a/pkg/controller/restore_finalizer_controller.go +++ b/pkg/controller/restore_finalizer_controller.go @@ -22,31 +22,27 @@ import ( "sync" "time" - "k8s.io/apimachinery/pkg/runtime/schema" - - "github.com/vmware-tanzu/velero/pkg/constant" - "github.com/vmware-tanzu/velero/pkg/itemoperation" - "github.com/vmware-tanzu/velero/pkg/plugin/velero" - - storagev1api "k8s.io/api/storage/v1" - "github.com/pkg/errors" "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + storagev1api "k8s.io/api/storage/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/clock" - "github.com/vmware-tanzu/velero/internal/hook" "github.com/vmware-tanzu/velero/internal/volume" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "github.com/vmware-tanzu/velero/pkg/constant" + "github.com/vmware-tanzu/velero/pkg/itemoperation" "github.com/vmware-tanzu/velero/pkg/metrics" "github.com/vmware-tanzu/velero/pkg/persistence" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" + "github.com/vmware-tanzu/velero/pkg/plugin/velero" kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube" "github.com/vmware-tanzu/velero/pkg/util/results" ) diff --git a/pkg/controller/restore_finalizer_controller_test.go b/pkg/controller/restore_finalizer_controller_test.go index 518b82424..23c2b044d 100644 --- a/pkg/controller/restore_finalizer_controller_test.go +++ b/pkg/controller/restore_finalizer_controller_test.go @@ -23,17 +23,13 @@ import ( "testing" "time" - "k8s.io/apimachinery/pkg/runtime/schema" - - "github.com/vmware-tanzu/velero/pkg/itemoperation" - "github.com/vmware-tanzu/velero/pkg/plugin/velero" - "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" corev1api "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" testclocks "k8s.io/utils/clock/testing" ctrl "sigs.k8s.io/controller-runtime" @@ -43,10 +39,12 @@ import ( "github.com/vmware-tanzu/velero/internal/volume" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" + "github.com/vmware-tanzu/velero/pkg/itemoperation" "github.com/vmware-tanzu/velero/pkg/metrics" persistencemocks "github.com/vmware-tanzu/velero/pkg/persistence/mocks" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks" + "github.com/vmware-tanzu/velero/pkg/plugin/velero" velerotest "github.com/vmware-tanzu/velero/pkg/test" pkgUtilKubeMocks "github.com/vmware-tanzu/velero/pkg/util/kube/mocks" "github.com/vmware-tanzu/velero/pkg/util/results" diff --git a/pkg/restore/actions/csi/pvc_action.go b/pkg/restore/actions/csi/pvc_action.go index ef0143629..9efd3649c 100644 --- a/pkg/restore/actions/csi/pvc_action.go +++ b/pkg/restore/actions/csi/pvc_action.go @@ -220,8 +220,10 @@ func (p *pvcRestoreItemAction) Execute( logger.Infof("DataDownload %s/%s is created successfully.", dataDownload.Namespace, dataDownload.Name) } else { - volumeSnapshotName, ok := pvcFromBackup.Annotations[velerov1api.VolumeSnapshotLabel] - if !ok { + targetVSName := "" + if vsName, nameOK := pvcFromBackup.Annotations[velerov1api.VolumeSnapshotLabel]; nameOK { + targetVSName = util.GenerateSha256FromRestoreUIDAndVsName(string(input.Restore.UID), vsName) + } else { logger.Info("Skipping PVCRestoreItemAction for PVC,", "PVC does not have a CSI VolumeSnapshot.") // Make no change in the input PVC. @@ -229,8 +231,9 @@ func (p *pvcRestoreItemAction) Execute( UpdatedItem: input.Item, }, nil } + if err := restoreFromVolumeSnapshot( - &pvc, newNamespace, p.crClient, volumeSnapshotName, logger, + &pvc, newNamespace, p.crClient, targetVSName, logger, ); err != nil { logger.Errorf("Failed to restore PVC from VolumeSnapshot.") return nil, errors.WithStack(err) @@ -502,19 +505,17 @@ func restoreFromVolumeSnapshot( }, vs, ); err != nil { - return errors.Wrapf(err, - fmt.Sprintf("Failed to get Volumesnapshot %s/%s to restore PVC %s/%s", - newNamespace, volumeSnapshotName, newNamespace, pvc.Name), - ) + return errors.Wrapf(err, "Failed to get Volumesnapshot %s/%s to restore PVC %s/%s", + newNamespace, volumeSnapshotName, newNamespace, pvc.Name) } if _, exists := vs.Annotations[velerov1api.VolumeSnapshotRestoreSize]; exists { restoreSize, err := resource.ParseQuantity( vs.Annotations[velerov1api.VolumeSnapshotRestoreSize]) if err != nil { - return errors.Wrapf(err, fmt.Sprintf( + return errors.Wrapf(err, "Failed to parse %s from annotation on Volumesnapshot %s/%s into restore size", - vs.Annotations[velerov1api.VolumeSnapshotRestoreSize], vs.Namespace, vs.Name)) + vs.Annotations[velerov1api.VolumeSnapshotRestoreSize], vs.Namespace, vs.Name) } // It is possible that the volume provider allocated a larger // capacity volume than what was requested in the backed up PVC. diff --git a/pkg/restore/actions/csi/pvc_action_test.go b/pkg/restore/actions/csi/pvc_action_test.go index 53e1908f2..a3b78a064 100644 --- a/pkg/restore/actions/csi/pvc_action_test.go +++ b/pkg/restore/actions/csi/pvc_action_test.go @@ -45,6 +45,7 @@ import ( "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/plugin/velero" velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util" "github.com/vmware-tanzu/velero/pkg/util/boolptr" ) @@ -546,6 +547,7 @@ func TestCancel(t *testing.T) { } func TestExecute(t *testing.T) { + vsName := util.GenerateSha256FromRestoreUIDAndVsName("restoreUID", "vsName") tests := []struct { name string backup *velerov1api.Backup @@ -573,20 +575,22 @@ func TestExecute(t *testing.T) { { name: "VolumeSnapshot cannot be found", backup: builder.ForBackup("velero", "testBackup").Result(), - restore: builder.ForRestore("velero", "testRestore").Backup("testBackup").Result(), - pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "testVS")).Result(), - expectedErr: "Failed to get Volumesnapshot velero/testVS to restore PVC velero/testPVC: volumesnapshots.snapshot.storage.k8s.io \"testVS\" not found", + restore: builder.ForRestore("velero", "testRestore").ObjectMeta(builder.WithUID("restoreUID")).Backup("testBackup").Result(), + pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "vsName")).Result(), + expectedErr: fmt.Sprintf("Failed to get Volumesnapshot velero/%s to restore PVC velero/testPVC: volumesnapshots.snapshot.storage.k8s.io \"%s\" not found", vsName, vsName), }, { name: "Restore from VolumeSnapshot", backup: builder.ForBackup("velero", "testBackup").Result(), - restore: builder.ForRestore("velero", "testRestore").Backup("testBackup").Result(), - pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "testVS")). + restore: builder.ForRestore("velero", "testRestore").ObjectMeta(builder.WithUID("restoreUID")).Backup("testBackup").Result(), + pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "vsName")). RequestResource(map[corev1api.ResourceName]resource.Quantity{corev1api.ResourceStorage: resource.MustParse("10Gi")}). DataSource(&corev1api.TypedLocalObjectReference{APIGroup: &snapshotv1api.SchemeGroupVersion.Group, Kind: "VolumeSnapshot", Name: "testVS"}). DataSourceRef(&corev1api.TypedObjectReference{APIGroup: &snapshotv1api.SchemeGroupVersion.Group, Kind: "VolumeSnapshot", Name: "testVS"}). Result(), - vs: builder.ForVolumeSnapshot("velero", "testVS").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotRestoreSize, "10Gi")).Result(), + vs: builder.ForVolumeSnapshot("velero", vsName).ObjectMeta( + builder.WithAnnotations(velerov1api.VolumeSnapshotRestoreSize, "10Gi"), + ).Result(), expectedPVC: builder.ForPersistentVolumeClaim("velero", "testPVC").Result(), }, { diff --git a/pkg/restore/actions/csi/volumesnapshot_action.go b/pkg/restore/actions/csi/volumesnapshot_action.go index 9bac9a336..d9a162d77 100644 --- a/pkg/restore/actions/csi/volumesnapshot_action.go +++ b/pkg/restore/actions/csi/volumesnapshot_action.go @@ -17,24 +17,19 @@ limitations under the License. package csi import ( - "context" - snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/pkg/errors" "github.com/sirupsen/logrus" - core_v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" crclient "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/client" - "github.com/vmware-tanzu/velero/pkg/label" plugincommon "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/plugin/velero" + "github.com/vmware-tanzu/velero/pkg/util" "github.com/vmware-tanzu/velero/pkg/util/boolptr" - "github.com/vmware-tanzu/velero/pkg/util/csi" ) // volumeSnapshotRestoreItemAction is a Velero restore item @@ -54,8 +49,7 @@ func (p *volumeSnapshotRestoreItemAction) AppliesTo() ( }, nil } -func resetVolumeSnapshotSpecForRestore( - vs *snapshotv1api.VolumeSnapshot, vscName *string) { +func resetVolumeSnapshotSpecForRestore(vs *snapshotv1api.VolumeSnapshot, vscName *string) { // Spec of the backed-up object used the PVC as the source // of the volumeSnapshot. Restore operation will however, // restore the VolumeSnapshot from the VolumeSnapshotContent @@ -68,10 +62,6 @@ func resetVolumeSnapshotAnnotation(vs *snapshotv1api.VolumeSnapshot) { string(snapshotv1api.VolumeSnapshotContentRetain) } -// Execute uses the data such as CSI driver name, storage -// snapshot handle, snapshot deletion secret (if any) from -// the annotations to recreate a VolumeSnapshotContent object -// and statically bind the VolumeSnapshot object being restored. func (p *volumeSnapshotRestoreItemAction) Execute( input *velero.RestoreItemActionExecuteInput, ) (*velero.RestoreItemActionExecuteOutput, error) { @@ -90,84 +80,29 @@ func (p *volumeSnapshotRestoreItemAction) Execute( errors.Wrapf(err, "failed to convert input.Item from unstructured") } - // If cross-namespace restore is configured, change the namespace - // for VolumeSnapshot object to be restored - newNamespace, ok := input.Restore.Spec.NamespaceMapping[vs.GetNamespace()] - if !ok { - // Use original namespace - newNamespace = vs.Namespace + var vsFromBackup snapshotv1api.VolumeSnapshot + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.ItemFromBackup.UnstructuredContent(), &vsFromBackup); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Wrapf(err, "failed to convert input.Item from unstructured") } - if !csi.IsVolumeSnapshotExists(newNamespace, vs.Name, p.crClient) { - snapHandle, exists := vs.Annotations[velerov1api.VolumeSnapshotHandleAnnotation] - if !exists { - return nil, errors.Errorf( - "VolumeSnapshot %s/%s does not have a %s annotation", - vs.Namespace, - vs.Name, - velerov1api.VolumeSnapshotHandleAnnotation, - ) - } + generatedName := util.GenerateSha256FromRestoreUIDAndVsName(string(input.Restore.UID), vsFromBackup.Name) - csiDriverName, exists := vs.Annotations[velerov1api.DriverNameAnnotation] - if !exists { - return nil, errors.Errorf( - "VolumeSnapshot %s/%s does not have a %s annotation", - vs.Namespace, vs.Name, velerov1api.DriverNameAnnotation) - } + // Reset Spec to convert the VolumeSnapshot from using + // the dynamic VolumeSnapshotContent to the static one. + resetVolumeSnapshotSpecForRestore(&vs, &generatedName) + // Also reset the VS name to avoid potential conflict caused by multiple restores of the same backup. + // Both VS and VSC share the same generated name. + vs.Name = generatedName - p.log.Debugf("Set VolumeSnapshotContent %s/%s DeletionPolicy to Retain to make sure VS deletion in namespace will not delete Snapshot on cloud provider.", - newNamespace, vs.Name) - - vsc := snapshotv1api.VolumeSnapshotContent{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: vs.Name + "-", - Labels: map[string]string{ - velerov1api.RestoreNameLabel: label.GetValidName(input.Restore.Name), - }, - }, - Spec: snapshotv1api.VolumeSnapshotContentSpec{ - DeletionPolicy: snapshotv1api.VolumeSnapshotContentRetain, - Driver: csiDriverName, - VolumeSnapshotRef: core_v1.ObjectReference{ - APIVersion: "snapshot.storage.k8s.io/v1", - Kind: "VolumeSnapshot", - Namespace: newNamespace, - Name: vs.Name, - }, - Source: snapshotv1api.VolumeSnapshotContentSource{ - SnapshotHandle: &snapHandle, - }, - }, - } - - // we create the VolumeSnapshotContent here instead of relying on the - // restore flow because we want to statically bind this VolumeSnapshot - // with a VolumeSnapshotContent that will be used as its source for pre-populating - // the volume that will be created as a result of the restore. To perform - // this static binding, a bi-directional link between the VolumeSnapshotContent - // and VolumeSnapshot objects have to be setup. Further, it is disallowed - // to convert a dynamically created VolumeSnapshotContent for static binding. - // See: https://github.com/kubernetes-csi/external-snapshotter/issues/274 - if err := p.crClient.Create(context.TODO(), &vsc); err != nil { - return nil, errors.Wrapf(err, - "failed to create VolumeSnapshotContents %s", - vsc.GenerateName) - } - p.log.Infof("Created VolumesnapshotContents %s with static binding to volumesnapshot %s/%s", - vsc, newNamespace, vs.Name) - - // Reset Spec to convert the VolumeSnapshot from using - // the dynamic VolumeSnapshotContent to the static one. - resetVolumeSnapshotSpecForRestore(&vs, &vsc.Name) - - // Reset VolumeSnapshot annotation. By now, only change - // DeletionPolicy to Retain. - resetVolumeSnapshotAnnotation(&vs) - } + // Reset VolumeSnapshot annotation. By now, only change + // DeletionPolicy to Retain. + resetVolumeSnapshotAnnotation(&vs) vsMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vs) if err != nil { + p.log.Errorf("Fail to convert VS %s to unstructured", vs.Namespace+"/"+vs.Name) return nil, errors.WithStack(err) } @@ -195,6 +130,7 @@ func (p *volumeSnapshotRestoreItemAction) Cancel( operationID string, restore *velerov1api.Restore, ) error { + // CSI Specification doesn't support canceling a snapshot creation. return nil } @@ -206,7 +142,8 @@ func (p *volumeSnapshotRestoreItemAction) AreAdditionalItemsReady( } func NewVolumeSnapshotRestoreItemAction( - f client.Factory) plugincommon.HandlerInitializer { + f client.Factory, +) plugincommon.HandlerInitializer { return func(logger logrus.FieldLogger) (any, error) { crClient, err := f.KubebuilderClient() if err != nil { diff --git a/pkg/restore/actions/csi/volumesnapshot_action_test.go b/pkg/restore/actions/csi/volumesnapshot_action_test.go index 1cc3f0ba9..308c02895 100644 --- a/pkg/restore/actions/csi/volumesnapshot_action_test.go +++ b/pkg/restore/actions/csi/volumesnapshot_action_test.go @@ -21,8 +21,6 @@ import ( "fmt" "testing" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" @@ -36,6 +34,7 @@ import ( factorymocks "github.com/vmware-tanzu/velero/pkg/client/mocks" "github.com/vmware-tanzu/velero/pkg/plugin/velero" velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util" ) var ( @@ -89,7 +88,7 @@ func TestResetVolumeSnapshotSpecForRestore(t *testing.T) { before := tc.vs.DeepCopy() resetVolumeSnapshotSpecForRestore(&tc.vs, &tc.vscName) - assert.Equalf(t, tc.vs.Name, before.Name, "unexpected change to Object.Name, Want: %s; Got %s", tc.name, before.Name, tc.vs.Name) + assert.Equalf(t, tc.vs.Name, before.Name, "unexpected change to Object.Name, Want: %s; Got %s", before.Name, tc.vs.Name) assert.Equal(t, tc.vs.Namespace, before.Namespace, "unexpected change to Object.Namespace, Want: %s; Got %s", tc.name, before.Namespace, tc.vs.Namespace) assert.NotNil(t, tc.vs.Spec.Source) assert.Nil(t, tc.vs.Spec.Source.PersistentVolumeClaimName) @@ -103,15 +102,15 @@ func TestResetVolumeSnapshotSpecForRestore(t *testing.T) { } func TestVSExecute(t *testing.T) { - snapshotHandle := "vsc" + newVscName := util.GenerateSha256FromRestoreUIDAndVsName("restoreUID", "vsName") tests := []struct { - name string - item runtime.Unstructured - vs *snapshotv1api.VolumeSnapshot - restore *velerov1api.Restore - expectErr bool - createVS bool - expectedVSC *snapshotv1api.VolumeSnapshotContent + name string + item runtime.Unstructured + vs *snapshotv1api.VolumeSnapshot + restore *velerov1api.Restore + expectErr bool + createVS bool + expectedVS *snapshotv1api.VolumeSnapshot }{ { name: "Restore's RestorePVs is false", @@ -119,45 +118,24 @@ func TestVSExecute(t *testing.T) { expectErr: false, }, { - name: "Namespace remapping and VS already exists in cluster. Nothing change", - vs: builder.ForVolumeSnapshot("ns", "name").Result(), - restore: builder.ForRestore("velero", "restore").NamespaceMappings("ns", "newNS").Result(), - createVS: true, - expectErr: false, - }, - { - name: "VS doesn't have VolumeSnapshotHandleAnnotation annotation", - vs: builder.ForVolumeSnapshot("ns", "name").Result(), - restore: builder.ForRestore("velero", "restore").NamespaceMappings("ns", "newNS").Result(), - expectErr: true, - }, - { - name: "VS doesn't have DriverNameAnnotation annotation", - vs: builder.ForVolumeSnapshot("ns", "name").ObjectMeta( - builder.WithAnnotations(velerov1api.VolumeSnapshotHandleAnnotation, ""), - ).Result(), + name: "VS doesn't have VSC in status", + vs: builder.ForVolumeSnapshot("ns", "name").ObjectMeta(builder.WithAnnotations("1", "1")).Status().Result(), restore: builder.ForRestore("velero", "restore").NamespaceMappings("ns", "newNS").Result(), expectErr: true, }, { name: "Normal case, VSC should be created", - vs: builder.ForVolumeSnapshot("ns", "test").ObjectMeta(builder.WithAnnotationsMap( - map[string]string{ - velerov1api.VolumeSnapshotHandleAnnotation: "vsc", - velerov1api.DriverNameAnnotation: "pd.csi.storage.gke.io", - }, - )).Result(), - restore: builder.ForRestore("velero", "restore").Result(), - expectErr: false, - expectedVSC: builder.ForVolumeSnapshotContent("vsc").ObjectMeta( - builder.WithLabels(velerov1api.RestoreNameLabel, "restore"), - ).VolumeSnapshotRef("ns", "test"). - DeletionPolicy(snapshotv1api.VolumeSnapshotContentRetain). - Driver("pd.csi.storage.gke.io"). - Source(snapshotv1api.VolumeSnapshotContentSource{ - SnapshotHandle: &snapshotHandle, - }). - Result(), + vs: builder.ForVolumeSnapshot("ns", "vsName").ObjectMeta( + builder.WithAnnotationsMap( + map[string]string{ + velerov1api.VolumeSnapshotHandleAnnotation: "vsc", + velerov1api.DriverNameAnnotation: "pd.csi.storage.gke.io", + }, + ), + ).SourceVolumeSnapshotContentName(newVscName).Status().BoundVolumeSnapshotContentName("vscName").Result(), + restore: builder.ForRestore("velero", "restore").ObjectMeta(builder.WithUID("restoreUID")).Result(), + expectErr: false, + expectedVS: builder.ForVolumeSnapshot("ns", "test").SourceVolumeSnapshotContentName(newVscName).Result(), }, } @@ -181,10 +159,11 @@ func TestVSExecute(t *testing.T) { } } - _, err := p.Execute( + result, err := p.Execute( &velero.RestoreItemActionExecuteInput{ - Item: test.item, - Restore: test.restore, + Item: test.item, + ItemFromBackup: test.item, + Restore: test.restore, }, ) @@ -192,18 +171,11 @@ func TestVSExecute(t *testing.T) { require.NoError(t, err) } - if test.expectedVSC != nil { - vscList := new(snapshotv1api.VolumeSnapshotContentList) - require.NoError(t, p.crClient.List(context.TODO(), vscList)) - require.True(t, cmp.Equal( - *test.expectedVSC, - vscList.Items[0], - cmpopts.IgnoreFields( - snapshotv1api.VolumeSnapshotContent{}, - "Kind", "APIVersion", "GenerateName", "Name", - "ResourceVersion", - ), - )) + if test.expectedVS != nil { + var vs snapshotv1api.VolumeSnapshot + require.NoError(t, runtime.DefaultUnstructuredConverter.FromUnstructured( + result.UpdatedItem.UnstructuredContent(), &vs)) + require.Equal(t, test.expectedVS.Spec, vs.Spec) } }) } diff --git a/pkg/restore/actions/csi/volumesnapshotcontent_action.go b/pkg/restore/actions/csi/volumesnapshotcontent_action.go index bea63b53b..f4b42b30a 100644 --- a/pkg/restore/actions/csi/volumesnapshotcontent_action.go +++ b/pkg/restore/actions/csi/volumesnapshotcontent_action.go @@ -20,11 +20,16 @@ import ( snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + crclient "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "github.com/vmware-tanzu/velero/pkg/client" + plugincommon "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/plugin/velero" + "github.com/vmware-tanzu/velero/pkg/util" "github.com/vmware-tanzu/velero/pkg/util/boolptr" "github.com/vmware-tanzu/velero/pkg/util/csi" ) @@ -32,7 +37,8 @@ import ( // volumeSnapshotContentRestoreItemAction is a restore item action // plugin for Velero type volumeSnapshotContentRestoreItemAction struct { - log logrus.FieldLogger + log logrus.FieldLogger + client crclient.Client } // AppliesTo returns information indicating VolumeSnapshotContentRestoreItemAction @@ -51,34 +57,77 @@ func (p *volumeSnapshotContentRestoreItemAction) AppliesTo() ( func (p *volumeSnapshotContentRestoreItemAction) Execute( input *velero.RestoreItemActionExecuteInput, ) (*velero.RestoreItemActionExecuteOutput, error) { - p.log.Info("Starting VolumeSnapshotContentRestoreItemAction") - var snapCont snapshotv1api.VolumeSnapshotContent if boolptr.IsSetToFalse(input.Restore.Spec.RestorePVs) { p.log.Infof("Restore did not request for PVs to be restored %s/%s", input.Restore.Namespace, input.Restore.Name) return &velero.RestoreItemActionExecuteOutput{SkipRestore: true}, nil } + + p.log.Info("Starting VolumeSnapshotContentRestoreItemAction") + + var vsc snapshotv1api.VolumeSnapshotContent if err := runtime.DefaultUnstructuredConverter.FromUnstructured( - input.Item.UnstructuredContent(), &snapCont); err != nil { + input.Item.UnstructuredContent(), &vsc); err != nil { return &velero.RestoreItemActionExecuteOutput{}, errors.Wrapf(err, "failed to convert input.Item from unstructured") } + var vscFromBackup snapshotv1api.VolumeSnapshotContent + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.ItemFromBackup.UnstructuredContent(), &vscFromBackup); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Errorf(err.Error(), "failed to convert input.ItemFromBackup from unstructured") + } + + // If cross-namespace restore is configured, change the namespace + // for VolumeSnapshot object to be restored + newNamespace, ok := input.Restore.Spec.NamespaceMapping[vsc.Spec.VolumeSnapshotRef.Namespace] + if ok { + // Update the referenced VS namespace to the mapping one. + vsc.Spec.VolumeSnapshotRef.Namespace = newNamespace + } + + // Reset VSC name to align with VS. + vsc.Name = util.GenerateSha256FromRestoreUIDAndVsName( + string(input.Restore.UID), vscFromBackup.Spec.VolumeSnapshotRef.Name) + // Also reset the referenced VS name. + vsc.Spec.VolumeSnapshotRef.Name = vsc.Name + + // Reset the ResourceVersion and UID of referenced VolumeSnapshot. + vsc.Spec.VolumeSnapshotRef.ResourceVersion = "" + vsc.Spec.VolumeSnapshotRef.UID = "" + + // Set the DeletionPolicy to Retain to avoid VS deletion will not trigger snapshot deletion + vsc.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentRetain + + if vscFromBackup.Status != nil && vscFromBackup.Status.SnapshotHandle != nil { + vsc.Spec.Source.VolumeHandle = nil + vsc.Spec.Source.SnapshotHandle = vscFromBackup.Status.SnapshotHandle + } else { + p.log.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) + return nil, errors.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) + } + additionalItems := []velero.ResourceIdentifier{} - if csi.IsVolumeSnapshotContentHasDeleteSecret(&snapCont) { + if csi.IsVolumeSnapshotContentHasDeleteSecret(&vsc) { additionalItems = append(additionalItems, velero.ResourceIdentifier{ GroupResource: schema.GroupResource{Group: "", Resource: "secrets"}, - Name: snapCont.Annotations[velerov1api.PrefixedSecretNameAnnotation], - Namespace: snapCont.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], + Name: vsc.Annotations[velerov1api.PrefixedSecretNameAnnotation], + Namespace: vsc.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], }, ) } + vscMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vsc) + if err != nil { + return nil, errors.WithStack(err) + } + p.log.Infof("Returning from VolumeSnapshotContentRestoreItemAction with %d additionalItems", len(additionalItems)) return &velero.RestoreItemActionExecuteOutput{ - UpdatedItem: input.Item, + UpdatedItem: &unstructured.Unstructured{Object: vscMap}, AdditionalItems: additionalItems, }, nil } @@ -108,6 +157,15 @@ func (p *volumeSnapshotContentRestoreItemAction) AreAdditionalItemsReady( return true, nil } -func NewVolumeSnapshotContentRestoreItemAction(logger logrus.FieldLogger) (any, error) { - return &volumeSnapshotContentRestoreItemAction{logger}, nil +func NewVolumeSnapshotContentRestoreItemAction( + f client.Factory, +) plugincommon.HandlerInitializer { + return func(logger logrus.FieldLogger) (any, error) { + crClient, err := f.KubebuilderClient() + if err != nil { + return nil, err + } + + return &volumeSnapshotContentRestoreItemAction{logger, crClient}, nil + } } diff --git a/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go b/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go index 71ff67354..579096a59 100644 --- a/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go +++ b/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go @@ -17,6 +17,8 @@ limitations under the License. package csi import ( + "context" + "fmt" "testing" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" @@ -27,18 +29,25 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" + factorymocks "github.com/vmware-tanzu/velero/pkg/client/mocks" "github.com/vmware-tanzu/velero/pkg/kuberesource" "github.com/vmware-tanzu/velero/pkg/plugin/velero" + velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util" ) func TestVSCExecute(t *testing.T) { + snapshotHandleName := "testHandle" + newVscName := util.GenerateSha256FromRestoreUIDAndVsName("restoreUID", "vsName") tests := []struct { name string item runtime.Unstructured vsc *snapshotv1api.VolumeSnapshotContent restore *velerov1api.Restore expectErr bool + createVSC bool expectedItems []velero.ResourceIdentifier + expectedVSC *snapshotv1api.VolumeSnapshotContent }{ { name: "Restore's RestorePVs is false", @@ -46,14 +55,16 @@ func TestVSCExecute(t *testing.T) { expectErr: false, }, { - name: "Normal case, additional items should return", + name: "Normal case, additional items should return ", vsc: builder.ForVolumeSnapshotContent("test").ObjectMeta(builder.WithAnnotationsMap( map[string]string{ velerov1api.PrefixedSecretNameAnnotation: "name", velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", }, - )).Result(), - restore: builder.ForRestore("velero", "restore").Result(), + )).VolumeSnapshotRef("velero", "vsName", "vsUID"). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), + restore: builder.ForRestore("velero", "restore").ObjectMeta(builder.WithUID("restoreUID")). + NamespaceMappings("velero", "restore").Result(), expectErr: false, expectedItems: []velero.ResourceIdentifier{ { @@ -62,26 +73,63 @@ func TestVSCExecute(t *testing.T) { Name: "name", }, }, + expectedVSC: builder.ForVolumeSnapshotContent(newVscName).ObjectMeta(builder.WithAnnotationsMap( + map[string]string{ + velerov1api.PrefixedSecretNameAnnotation: "name", + velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", + }, + )).VolumeSnapshotRef("restore", newVscName, ""). + Source(snapshotv1api.VolumeSnapshotContentSource{SnapshotHandle: &snapshotHandleName}). + DeletionPolicy(snapshotv1api.VolumeSnapshotContentRetain). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), + }, + { + name: "VSC exists in cluster, same as the normal case", + vsc: builder.ForVolumeSnapshotContent("test").ObjectMeta(builder.WithAnnotationsMap( + map[string]string{ + velerov1api.PrefixedSecretNameAnnotation: "name", + velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", + }, + )).VolumeSnapshotRef("velero", "vsName", "vsUID"). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), + restore: builder.ForRestore("velero", "restore").ObjectMeta(builder.WithUID("restoreUID")). + NamespaceMappings("velero", "restore").Result(), + createVSC: true, + expectErr: false, + expectedVSC: builder.ForVolumeSnapshotContent(newVscName).ObjectMeta(builder.WithAnnotationsMap( + map[string]string{ + velerov1api.PrefixedSecretNameAnnotation: "name", + velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", + }, + )).VolumeSnapshotRef("restore", newVscName, ""). + Source(snapshotv1api.VolumeSnapshotContentSource{SnapshotHandle: &snapshotHandleName}). + DeletionPolicy(snapshotv1api.VolumeSnapshotContentRetain). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - p, err := NewVolumeSnapshotContentRestoreItemAction(logrus.StandardLogger()) - require.NoError(t, err) - - action := p.(*volumeSnapshotContentRestoreItemAction) + action := volumeSnapshotContentRestoreItemAction{ + log: logrus.StandardLogger(), + client: velerotest.NewFakeControllerRuntimeClient(t), + } if test.vsc != nil { vsMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(test.vsc) require.NoError(t, err) test.item = &unstructured.Unstructured{Object: vsMap} + + if test.createVSC { + require.NoError(t, action.client.Create(context.TODO(), test.vsc)) + } } output, err := action.Execute( &velero.RestoreItemActionExecuteInput{ - Item: test.item, - Restore: test.restore, + Item: test.item, + ItemFromBackup: test.item, + Restore: test.restore, }, ) @@ -89,6 +137,18 @@ func TestVSCExecute(t *testing.T) { require.NoError(t, err) } + if test.expectedVSC != nil { + vsc := new(snapshotv1api.VolumeSnapshotContent) + require.NoError(t, + runtime.DefaultUnstructuredConverter.FromUnstructured( + output.UpdatedItem.UnstructuredContent(), + vsc, + ), + ) + + require.Equal(t, test.expectedVSC, vsc) + } + if len(test.expectedItems) > 0 { require.Equal(t, test.expectedItems, output.AdditionalItems) } @@ -112,3 +172,20 @@ func TestVSCAppliesTo(t *testing.T) { selector, ) } + +func TestNewVolumeSnapshotContentRestoreItemAction(t *testing.T) { + logger := logrus.StandardLogger() + crClient := velerotest.NewFakeControllerRuntimeClient(t) + + f := &factorymocks.Factory{} + f.On("KubebuilderClient").Return(nil, fmt.Errorf("")) + plugin := NewVolumeSnapshotContentRestoreItemAction(f) + _, err := plugin(logger) + require.Error(t, err) + + f1 := &factorymocks.Factory{} + f1.On("KubebuilderClient").Return(crClient, nil) + plugin1 := NewVolumeSnapshotContentRestoreItemAction(f1) + _, err1 := plugin1(logger) + require.NoError(t, err1) +} diff --git a/pkg/util/csi/volume_snapshot.go b/pkg/util/csi/volume_snapshot.go index 649b877fc..4e4103efa 100644 --- a/pkg/util/csi/volume_snapshot.go +++ b/pkg/util/csi/volume_snapshot.go @@ -70,8 +70,8 @@ func WaitVolumeSnapshotReady( if err != nil { return false, errors.Wrapf( err, - fmt.Sprintf("error to get VolumeSnapshot %s/%s", - volumeSnapshotNS, volumeSnapshot), + "error to get VolumeSnapshot %s/%s", + volumeSnapshotNS, volumeSnapshot, ) } @@ -175,7 +175,7 @@ func EnsureDeleteVS(ctx context.Context, snapshotClient snapshotter.SnapshotV1In return true, nil } - return false, errors.Wrapf(err, fmt.Sprintf("error to get VolumeSnapshot %s", vsName)) + return false, errors.Wrapf(err, "error to get VolumeSnapshot %s", vsName) } updated = vs @@ -234,7 +234,7 @@ func EnsureDeleteVSC(ctx context.Context, snapshotClient snapshotter.SnapshotV1I return true, nil } - return false, errors.Wrapf(err, fmt.Sprintf("error to get VolumeSnapshotContent %s", vscName)) + return false, errors.Wrapf(err, "error to get VolumeSnapshotContent %s", vscName) } updated = vsc @@ -532,8 +532,7 @@ func CleanupVolumeSnapshot( } } -// DeleteVolumeSnapshot handles the VolumeSnapshot and VolumeSnapshotContent instance deletion. -func DeleteVolumeSnapshot( +func DeleteReadyVolumeSnapshot( vs snapshotv1api.VolumeSnapshot, vsc snapshotv1api.VolumeSnapshotContent, client crclient.Client, @@ -623,9 +622,10 @@ func WaitUntilVSCHandleIsReady( vs, ); err != nil { return false, - errors.Wrapf(err, fmt.Sprintf( + errors.Wrapf( + err, "failed to get volumesnapshot %s/%s", - volSnap.Namespace, volSnap.Name), + volSnap.Namespace, volSnap.Name, ) } @@ -645,8 +645,8 @@ func WaitUntilVSCHandleIsReady( return false, errors.Wrapf( err, - fmt.Sprintf("failed to get VolumeSnapshotContent %s for VolumeSnapshot %s/%s", - *vs.Status.BoundVolumeSnapshotContentName, vs.Namespace, vs.Name), + "failed to get VolumeSnapshotContent %s for VolumeSnapshot %s/%s", + *vs.Status.BoundVolumeSnapshotContentName, vs.Namespace, vs.Name, ) } diff --git a/pkg/util/csi/volume_snapshot_test.go b/pkg/util/csi/volume_snapshot_test.go index c39a4f7fc..014cadaba 100644 --- a/pkg/util/csi/volume_snapshot_test.go +++ b/pkg/util/csi/volume_snapshot_test.go @@ -1497,7 +1497,7 @@ func TestDeleteVolumeSnapshots(t *testing.T) { ) logger := logging.DefaultLogger(logrus.DebugLevel, logging.FormatText) - DeleteVolumeSnapshot(tc.vs, tc.vsc, client, logger) + DeleteReadyVolumeSnapshot(tc.vs, tc.vsc, client, logger) vsList := new(snapshotv1api.VolumeSnapshotList) err := client.List( diff --git a/pkg/util/util.go b/pkg/util/util.go index 2aad8ccb1..6c5fdbb2a 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -16,6 +16,11 @@ limitations under the License. package util +import ( + "crypto/sha256" + "encoding/hex" +) + func Contains(slice []string, key string) bool { for _, i := range slice { if i == key { @@ -24,3 +29,10 @@ func Contains(slice []string, key string) bool { } return false } + +// GenerateSha256FromRestoreUIDAndVsName Use the restore UID and the VS Name to generate +// the new VSC and VS name. By this way, VS and VSC RIA action can get the same VSC name. +func GenerateSha256FromRestoreUIDAndVsName(restoreUID string, vsName string) string { + sha256Bytes := sha256.Sum256([]byte(restoreUID + "/" + vsName)) + return hex.EncodeToString(sha256Bytes[:]) +} diff --git a/site/content/docs/main/csi.md b/site/content/docs/main/csi.md index 42d0b653c..89f697f2d 100644 --- a/site/content/docs/main/csi.md +++ b/site/content/docs/main/csi.md @@ -109,8 +109,8 @@ Velero will include the generated VolumeSnapshot and VolumeSnapshotContent objec upload all VolumeSnapshots and VolumeSnapshotContents objects in a JSON file to the object storage system. **Note that only Kubernetes objects are uploaded to the object storage, not the data in snapshots.** -When Velero synchronizes backups into a new cluster, VolumeSnapshotContent objects and the VolumeSnapshotClass that is chosen to take -snapshot will be synced into the cluster as well, so that Velero can manage backup expiration appropriately. +From v1.16, when Velero synchronizes backups into a new cluster, the VolumeSnapshotClass that is chosen to take +snapshot will be synced into the cluster, so that Velero can manage backup expiration appropriately. The `DeletionPolicy` on the VolumeSnapshotContent will be the same as the `DeletionPolicy` on the VolumeSnapshotClass that was used to create the VolumeSnapshot. Setting a `DeletionPolicy` of `Retain` on the VolumeSnapshotClass will preserve the volume snapshot in the storage system for the lifetime of the Velero backup and will prevent the deletion of the volume snapshot, in the storage system, in the event of a disaster where the namespace with the VolumeSnapshot object may be lost.