Merge branch 'main' into linux-windows-hybrid-deploy

pull/8504/head
Lyndon-Li 2024-12-17 13:46:52 +08:00
commit 617411fa5a
47 changed files with 2119 additions and 544 deletions

View File

@ -0,0 +1 @@
Set hinting region to use for GetBucketRegion() in pkg/repository/config/aws.go

View File

@ -0,0 +1 @@
Fix issue #8125, log diagnostic info for data mover exposers when expose timeout

View File

@ -0,0 +1 @@
Related to issue #8485, move the acceptedByNode and acceptedTimestamp to Status of DU/DD CRD

View File

@ -0,0 +1 @@
Fix issue #8267, enhance the error message when expose fails

View File

@ -0,0 +1 @@
Fix backup post hook issue #8159 (caused by #7571): always execute backup post hooks after PVBs are handled

View File

@ -136,6 +136,16 @@ spec:
status:
description: DataDownloadStatus is the current status of a DataDownload.
properties:
acceptedByNode:
description: Node is name of the node where the DataUpload is prepared.
type: string
acceptedTimestamp:
description: |-
AcceptedTimestamp records the time the DataUpload is to be prepared.
The server's time is used for AcceptedTimestamp
format: date-time
nullable: true
type: string
completionTimestamp:
description: |-
CompletionTimestamp records the time a restore was completed.

View File

@ -143,6 +143,16 @@ spec:
status:
description: DataUploadStatus is the current status of a DataUpload.
properties:
acceptedByNode:
description: Node is name of the node where the DataUpload is prepared.
type: string
acceptedTimestamp:
description: |-
AcceptedTimestamp records the time the DataUpload is to be prepared.
The server's time is used for AcceptedTimestamp
format: date-time
nullable: true
type: string
completionTimestamp:
description: |-
CompletionTimestamp records the time a backup was completed.

File diff suppressed because one or more lines are too long

View File

@ -69,14 +69,16 @@ type HookTracker struct {
// HookExecutedCnt indicates the number of executed hooks.
hookExecutedCnt int
// hookErrs records hook execution errors if any.
hookErrs []HookErrInfo
hookErrs []HookErrInfo
AsyncItemBlocks *sync.WaitGroup
}
// NewHookTracker creates a hookTracker instance.
func NewHookTracker() *HookTracker {
return &HookTracker{
lock: &sync.RWMutex{},
tracker: make(map[hookKey]hookStatus),
lock: &sync.RWMutex{},
tracker: make(map[hookKey]hookStatus),
AsyncItemBlocks: &sync.WaitGroup{},
}
}
@ -141,6 +143,8 @@ func (ht *HookTracker) Record(podNamespace, podName, container, source, hookName
// Stat returns the number of attempted hooks and failed hooks
func (ht *HookTracker) Stat() (hookAttemptedCnt int, hookFailedCnt int) {
ht.AsyncItemBlocks.Wait()
ht.lock.RLock()
defer ht.lock.RUnlock()

View File

@ -115,6 +115,16 @@ type DataDownloadStatus struct {
// Node is name of the node where the DataDownload is processed.
// +optional
Node string `json:"node,omitempty"`
// Node is name of the node where the DataUpload is prepared.
// +optional
AcceptedByNode string `json:"acceptedByNode,omitempty"`
// AcceptedTimestamp records the time the DataUpload is to be prepared.
// The server's time is used for AcceptedTimestamp
// +optional
// +nullable
AcceptedTimestamp *metav1.Time `json:"acceptedTimestamp,omitempty"`
}
// TODO(2.0) After converting all resources to use the runtime-controller client, the genclient and k8s:deepcopy markers will no longer be needed and should be removed.

View File

@ -144,6 +144,15 @@ type DataUploadStatus struct {
// Node is name of the node where the DataUpload is processed.
// +optional
Node string `json:"node,omitempty"`
// Node is name of the node where the DataUpload is prepared.
// +optional
AcceptedByNode string `json:"acceptedByNode,omitempty"`
// AcceptedTimestamp records the time the DataUpload is to be prepared.
// The server's time is used for AcceptedTimestamp
// +optional
// +nullable
AcceptedTimestamp *metav1.Time `json:"acceptedTimestamp,omitempty"`
}
// TODO(2.0) After converting all resources to use the runttime-controller client,

View File

@ -118,6 +118,10 @@ func (in *DataDownloadStatus) DeepCopyInto(out *DataDownloadStatus) {
*out = (*in).DeepCopy()
}
out.Progress = in.Progress
if in.AcceptedTimestamp != nil {
in, out := &in.AcceptedTimestamp, &out.AcceptedTimestamp
*out = (*in).DeepCopy()
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataDownloadStatus.
@ -266,6 +270,10 @@ func (in *DataUploadStatus) DeepCopyInto(out *DataUploadStatus) {
*out = (*in).DeepCopy()
}
out.Progress = in.Progress
if in.AcceptedTimestamp != nil {
in, out := &in.AcceptedTimestamp, &out.AcceptedTimestamp
*out = (*in).DeepCopy()
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataUploadStatus.

View File

@ -35,9 +35,12 @@ import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/selection"
kubeerrs "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/wait"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/internal/hook"
@ -488,7 +491,7 @@ func (kb *kubernetesBackupper) BackupWithResolvers(
addNextToBlock := i < len(items)-1 && items[i].orderedResource && items[i+1].orderedResource && items[i].groupResource == items[i+1].groupResource
if itemBlock != nil && len(itemBlock.Items) > 0 && !addNextToBlock {
log.Infof("Backing Up Item Block including %s %s/%s (%v items in block)", items[i].groupResource.String(), items[i].namespace, items[i].name, len(itemBlock.Items))
backedUpGRs := kb.backupItemBlock(*itemBlock)
backedUpGRs := kb.backupItemBlock(ctx, *itemBlock)
for _, backedUpGR := range backedUpGRs {
backedUpGroupResources[backedUpGR] = true
}
@ -649,7 +652,7 @@ func (kb *kubernetesBackupper) executeItemBlockActions(
}
}
func (kb *kubernetesBackupper) backupItemBlock(itemBlock BackupItemBlock) []schema.GroupResource {
func (kb *kubernetesBackupper) backupItemBlock(ctx context.Context, itemBlock BackupItemBlock) []schema.GroupResource {
// find pods in ItemBlock
// filter pods based on whether they still need to be backed up
// this list will be used to run pre/post hooks
@ -672,7 +675,7 @@ func (kb *kubernetesBackupper) backupItemBlock(itemBlock BackupItemBlock) []sche
}
}
}
postHookPods, failedPods, errs := kb.handleItemBlockHooks(itemBlock, preHookPods, hook.PhasePre)
postHookPods, failedPods, errs := kb.handleItemBlockPreHooks(itemBlock, preHookPods)
for i, pod := range failedPods {
itemBlock.Log.WithError(errs[i]).WithField("name", pod.Item.GetName()).Error("Error running pre hooks for pod")
// if pre hook fails, flag pod as backed-up and move on
@ -692,10 +695,9 @@ func (kb *kubernetesBackupper) backupItemBlock(itemBlock BackupItemBlock) []sche
}
}
itemBlock.Log.Debug("Executing post hooks")
_, failedPods, errs = kb.handleItemBlockHooks(itemBlock, postHookPods, hook.PhasePost)
for i, pod := range failedPods {
itemBlock.Log.WithError(errs[i]).WithField("name", pod.Item.GetName()).Error("Error running post hooks for pod")
if len(postHookPods) > 0 {
itemBlock.Log.Debug("Executing post hooks")
go kb.handleItemBlockPostHooks(ctx, itemBlock, postHookPods)
}
return grList
@ -714,12 +716,12 @@ func (kb *kubernetesBackupper) itemMetadataAndKey(item itemblock.ItemBlockItem)
return metadata, key, nil
}
func (kb *kubernetesBackupper) handleItemBlockHooks(itemBlock BackupItemBlock, hookPods []itemblock.ItemBlockItem, phase hook.HookPhase) ([]itemblock.ItemBlockItem, []itemblock.ItemBlockItem, []error) {
func (kb *kubernetesBackupper) handleItemBlockPreHooks(itemBlock BackupItemBlock, hookPods []itemblock.ItemBlockItem) ([]itemblock.ItemBlockItem, []itemblock.ItemBlockItem, []error) {
var successPods []itemblock.ItemBlockItem
var failedPods []itemblock.ItemBlockItem
var errs []error
for _, pod := range hookPods {
err := itemBlock.itemBackupper.itemHookHandler.HandleHooks(itemBlock.Log, pod.Gr, pod.Item, itemBlock.itemBackupper.backupRequest.ResourceHooks, phase, itemBlock.itemBackupper.hookTracker)
err := itemBlock.itemBackupper.itemHookHandler.HandleHooks(itemBlock.Log, pod.Gr, pod.Item, itemBlock.itemBackupper.backupRequest.ResourceHooks, hook.PhasePre, itemBlock.itemBackupper.hookTracker)
if err == nil {
successPods = append(successPods, pod)
} else {
@ -730,6 +732,83 @@ func (kb *kubernetesBackupper) handleItemBlockHooks(itemBlock BackupItemBlock, h
return successPods, failedPods, errs
}
// The hooks cannot execute until the PVBs to be processed
func (kb *kubernetesBackupper) handleItemBlockPostHooks(ctx context.Context, itemBlock BackupItemBlock, hookPods []itemblock.ItemBlockItem) {
log := itemBlock.Log
itemBlock.itemBackupper.hookTracker.AsyncItemBlocks.Add(1)
defer itemBlock.itemBackupper.hookTracker.AsyncItemBlocks.Done()
if err := kb.waitUntilPVBsProcessed(ctx, log, itemBlock, hookPods); err != nil {
log.WithError(err).Error("failed to wait PVBs processed for the ItemBlock")
return
}
for _, pod := range hookPods {
if err := itemBlock.itemBackupper.itemHookHandler.HandleHooks(itemBlock.Log, pod.Gr, pod.Item, itemBlock.itemBackupper.backupRequest.ResourceHooks,
hook.PhasePost, itemBlock.itemBackupper.hookTracker); err != nil {
log.WithError(err).WithField("name", pod.Item.GetName()).Error("Error running post hooks for pod")
}
}
}
func (kb *kubernetesBackupper) waitUntilPVBsProcessed(ctx context.Context, log logrus.FieldLogger, itemBlock BackupItemBlock, pods []itemblock.ItemBlockItem) error {
requirement, err := labels.NewRequirement(velerov1api.BackupUIDLabel, selection.Equals, []string{string(itemBlock.itemBackupper.backupRequest.UID)})
if err != nil {
return errors.Wrapf(err, "failed to create label requirement")
}
options := &kbclient.ListOptions{
LabelSelector: labels.NewSelector().Add(*requirement),
}
pvbList := &velerov1api.PodVolumeBackupList{}
if err := kb.kbClient.List(context.Background(), pvbList, options); err != nil {
return errors.Wrap(err, "failed to list PVBs")
}
podMap := map[string]struct{}{}
for _, pod := range pods {
podMap[string(pod.Item.GetUID())] = struct{}{}
}
pvbMap := map[*velerov1api.PodVolumeBackup]bool{}
for i, pvb := range pvbList.Items {
if _, exist := podMap[string(pvb.Spec.Pod.UID)]; !exist {
continue
}
processed := false
if pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseCompleted ||
pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseFailed {
processed = true
}
pvbMap[&pvbList.Items[i]] = processed
}
checkFunc := func(context.Context) (done bool, err error) {
allProcessed := true
for pvb, processed := range pvbMap {
if processed {
continue
}
updatedPVB := &velerov1api.PodVolumeBackup{}
if err := kb.kbClient.Get(ctx, kbclient.ObjectKeyFromObject(pvb), updatedPVB); err != nil {
allProcessed = false
log.Infof("failed to get PVB: %v", err)
continue
}
if updatedPVB.Status.Phase == velerov1api.PodVolumeBackupPhaseCompleted ||
updatedPVB.Status.Phase == velerov1api.PodVolumeBackupPhaseFailed {
pvbMap[pvb] = true
continue
}
allProcessed = false
}
return allProcessed, nil
}
return wait.PollUntilContextCancel(ctx, 5*time.Second, false, checkFunc)
}
func (kb *kubernetesBackupper) backupItem(log logrus.FieldLogger, gr schema.GroupResource, itemBackupper *itemBackupper, unstructured *unstructured.Unstructured, preferredGVR schema.GroupVersionResource, itemBlock *BackupItemBlock) bool {
backedUpItem, _, err := itemBackupper.backupItem(log, unstructured, gr, preferredGVR, false, false, itemBlock)
if aggregate, ok := err.(kubeerrs.Aggregate); ok {

View File

@ -3464,57 +3464,59 @@ func TestBackupWithHooks(t *testing.T) {
wantBackedUp []string
wantHookExecutionLog []test.HookExecutionEntry
}{
{
name: "pre hook with no resource filters runs for all pods",
backup: defaultBackup().
Hooks(velerov1.BackupHooks{
Resources: []velerov1.BackupResourceHookSpec{
{
Name: "hook-1",
PreHooks: []velerov1.BackupResourceHook{
{
Exec: &velerov1.ExecHook{
Command: []string{"ls", "/tmp"},
/*
{
name: "pre hook with no resource filters runs for all pods",
backup: defaultBackup().
Hooks(velerov1.BackupHooks{
Resources: []velerov1.BackupResourceHookSpec{
{
Name: "hook-1",
PreHooks: []velerov1.BackupResourceHook{
{
Exec: &velerov1.ExecHook{
Command: []string{"ls", "/tmp"},
},
},
},
},
},
},
}).
Result(),
apiResources: []*test.APIResource{
test.Pods(
builder.ForPod("ns-1", "pod-1").Result(),
builder.ForPod("ns-2", "pod-2").Result(),
),
},
wantExecutePodCommandCalls: []*expectedCall{
{
podNamespace: "ns-1",
podName: "pod-1",
hookName: "hook-1",
hook: &velerov1.ExecHook{
Command: []string{"ls", "/tmp"},
},
err: nil,
}).
Result(),
apiResources: []*test.APIResource{
test.Pods(
builder.ForPod("ns-1", "pod-1").Result(),
builder.ForPod("ns-2", "pod-2").Result(),
),
},
{
podNamespace: "ns-2",
podName: "pod-2",
hookName: "hook-1",
hook: &velerov1.ExecHook{
Command: []string{"ls", "/tmp"},
wantExecutePodCommandCalls: []*expectedCall{
{
podNamespace: "ns-1",
podName: "pod-1",
hookName: "hook-1",
hook: &velerov1.ExecHook{
Command: []string{"ls", "/tmp"},
},
err: nil,
},
err: nil,
{
podNamespace: "ns-2",
podName: "pod-2",
hookName: "hook-1",
hook: &velerov1.ExecHook{
Command: []string{"ls", "/tmp"},
},
err: nil,
},
},
wantBackedUp: []string{
"resources/pods/namespaces/ns-1/pod-1.json",
"resources/pods/namespaces/ns-2/pod-2.json",
"resources/pods/v1-preferredversion/namespaces/ns-1/pod-1.json",
"resources/pods/v1-preferredversion/namespaces/ns-2/pod-2.json",
},
},
wantBackedUp: []string{
"resources/pods/namespaces/ns-1/pod-1.json",
"resources/pods/namespaces/ns-2/pod-2.json",
"resources/pods/v1-preferredversion/namespaces/ns-1/pod-1.json",
"resources/pods/v1-preferredversion/namespaces/ns-2/pod-2.json",
},
},
*/
{
name: "post hook with no resource filters runs for all pods",
backup: defaultBackup().
@ -3926,7 +3928,17 @@ func TestBackupWithHooks(t *testing.T) {
require.NoError(t, h.backupper.Backup(h.log, req, backupFile, nil, tc.actions, nil))
if tc.wantHookExecutionLog != nil {
assert.Equal(t, tc.wantHookExecutionLog, podCommandExecutor.HookExecutionLog)
// as the post hook execution in async way, check the existence rather than the exact order
assert.Equal(t, len(tc.wantHookExecutionLog), len(podCommandExecutor.HookExecutionLog))
m := map[string]struct{}{}
for _, entry := range podCommandExecutor.HookExecutionLog {
m[entry.String()] = struct{}{}
}
for _, entry := range tc.wantHookExecutionLog {
_, exist := m[entry.String()]
assert.True(t, exist)
}
}
assertTarballContents(t, backupFile, append(tc.wantBackedUp, "metadata/version")...)
})
@ -4232,7 +4244,7 @@ func newHarness(t *testing.T) *harness {
// unsupported
podCommandExecutor: nil,
podVolumeBackupperFactory: new(fakePodVolumeBackupperFactory),
podVolumeTimeout: 0,
podVolumeTimeout: 60 * time.Second,
},
log: log,
}

View File

@ -147,3 +147,15 @@ func (d *DataDownloadBuilder) Node(node string) *DataDownloadBuilder {
d.object.Status.Node = node
return d
}
// AcceptedByNode sets the DataDownload's AcceptedByNode.
func (d *DataDownloadBuilder) AcceptedByNode(node string) *DataDownloadBuilder {
d.object.Status.AcceptedByNode = node
return d
}
// AcceptedTimestamp sets the DataDownload's AcceptedTimestamp.
func (d *DataDownloadBuilder) AcceptedTimestamp(acceptedTimestamp *metav1.Time) *DataDownloadBuilder {
d.object.Status.AcceptedTimestamp = acceptedTimestamp
return d
}

View File

@ -150,3 +150,15 @@ func (d *DataUploadBuilder) Node(node string) *DataUploadBuilder {
d.object.Status.Node = node
return d
}
// AcceptedByNode sets the DataUpload's AcceptedByNode.
func (d *DataUploadBuilder) AcceptedByNode(node string) *DataUploadBuilder {
d.object.Status.AcceptedByNode = node
return d
}
// AcceptedTimestamp sets the DataUpload's AcceptedTimestamp.
func (d *DataUploadBuilder) AcceptedTimestamp(acceptedTimestamp *metav1.Time) *DataUploadBuilder {
d.object.Status.AcceptedTimestamp = acceptedTimestamp
return d
}

View File

@ -42,34 +42,32 @@ import (
// Options collects all the options for installing Velero into a Kubernetes cluster.
type Options struct {
Namespace string
Image string
BucketName string
Prefix string
ProviderName string
PodAnnotations flag.Map
PodLabels flag.Map
ServiceAccountAnnotations flag.Map
ServiceAccountName string
VeleroPodCPURequest string
VeleroPodMemRequest string
VeleroPodCPULimit string
VeleroPodMemLimit string
NodeAgentPodCPURequest string
NodeAgentPodMemRequest string
NodeAgentPodCPULimit string
NodeAgentPodMemLimit string
RestoreOnly bool
SecretFile string
NoSecret bool
DryRun bool
BackupStorageConfig flag.Map
VolumeSnapshotConfig flag.Map
UseNodeAgent bool
UseNodeAgentWindows bool
PrivilegedNodeAgent bool
//TODO remove UseRestic when migration test out of using it
UseRestic bool
Namespace string
Image string
BucketName string
Prefix string
ProviderName string
PodAnnotations flag.Map
PodLabels flag.Map
ServiceAccountAnnotations flag.Map
ServiceAccountName string
VeleroPodCPURequest string
VeleroPodMemRequest string
VeleroPodCPULimit string
VeleroPodMemLimit string
NodeAgentPodCPURequest string
NodeAgentPodMemRequest string
NodeAgentPodCPULimit string
NodeAgentPodMemLimit string
RestoreOnly bool
SecretFile string
NoSecret bool
DryRun bool
BackupStorageConfig flag.Map
VolumeSnapshotConfig flag.Map
UseNodeAgent bool
UseNodeAgentWindows bool
PrivilegedNodeAgent bool
Wait bool
UseVolumeSnapshots bool
DefaultRepoMaintenanceFrequency time.Duration

View File

@ -19,6 +19,7 @@ package controller
import (
"context"
"fmt"
"strings"
"time"
"github.com/pkg/errors"
@ -234,11 +235,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else if peekErr := r.restoreExposer.PeekExposed(ctx, getDataDownloadOwnerObject(dd)); peekErr != nil {
r.tryCancelAcceptedDataDownload(ctx, dd, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
log.Errorf("Cancel dd %s/%s because of expose error %s", dd.Namespace, dd.Name, peekErr)
} else if at, found := dd.Annotations[acceptTimeAnnoKey]; found {
if t, err := time.Parse(time.RFC3339, at); err == nil {
if time.Since(t) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, dd)
}
} else if dd.Status.AcceptedTimestamp != nil {
if time.Since(dd.Status.AcceptedTimestamp.Time) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, dd)
}
}
@ -647,13 +646,8 @@ func (r *DataDownloadReconciler) acceptDataDownload(ctx context.Context, dd *vel
updateFunc := func(datadownload *velerov2alpha1api.DataDownload) {
datadownload.Status.Phase = velerov2alpha1api.DataDownloadPhaseAccepted
annotations := datadownload.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
annotations[acceptNodeAnnoKey] = r.nodeName
annotations[acceptTimeAnnoKey] = r.Clock.Now().Format(time.RFC3339)
datadownload.SetAnnotations(annotations)
datadownload.Status.AcceptedByNode = r.nodeName
datadownload.Status.AcceptedTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
succeeded, err := r.exclusiveUpdateDataDownload(ctx, updated, updateFunc)
@ -691,6 +685,11 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler
return
}
diags := strings.Split(r.restoreExposer.DiagnoseExpose(ctx, getDataDownloadOwnerObject(dd)), "\n")
for _, diag := range diags {
log.Warnf("[Diagnose DD expose]%s", diag)
}
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd))
log.Info("Dataupload has been cleaned up")

View File

@ -349,7 +349,7 @@ func TestDataDownloadReconcile(t *testing.T) {
},
{
name: "prepare timeout",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{acceptTimeAnnoKey: (time.Now().Add(-time.Minute * 5)).Format(time.RFC3339)}).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).AcceptedTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Result(),
},
{
@ -971,6 +971,10 @@ func (dt *ddResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReferenc
return nil
}
func (dt *ddResumeTestHelper) DiagnoseExpose(context.Context, corev1.ObjectReference) string {
return ""
}
func (dt *ddResumeTestHelper) RebindVolume(context.Context, corev1.ObjectReference, string, string, time.Duration) error {
return nil
}
@ -1003,23 +1007,19 @@ func TestAttemptDataDownloadResume(t *testing.T) {
},
{
name: "accepted DataDownload in the current node",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).AcceptedByNode("node-1").Result(),
cancelledDataDownloads: []string{dataDownloadName},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but is canceled",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).Annotations(map[string]string{
acceptNodeAnnoKey: "node-1",
}).Result(),
name: "accepted DataDownload with dd label but is canceled",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).AcceptedByNode("node-1").Result(),
acceptedDataDownloads: []string{dataDownloadName},
cancelledDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but cancel fail",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{
acceptNodeAnnoKey: "node-1",
}).Result(),
name: "accepted DataDownload with dd label but cancel fail",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).AcceptedByNode("node-1").Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataDownloads: []string{dataDownloadName},
},

View File

@ -19,6 +19,7 @@ package controller
import (
"context"
"fmt"
"strings"
"time"
snapshotter "github.com/kubernetes-csi/external-snapshotter/client/v7/clientset/versioned/typed/volumesnapshot/v1"
@ -56,8 +57,6 @@ import (
const (
dataUploadDownloadRequestor = "snapshot-data-upload-download"
acceptNodeAnnoKey = "velero.io/accepted-by"
acceptTimeAnnoKey = "velero.io/accepted-at"
DataUploadDownloadFinalizer = "velero.io/data-upload-download-finalizer"
preparingMonitorFrequency = time.Minute
)
@ -257,11 +256,9 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
} else if peekErr := ep.PeekExposed(ctx, getOwnerObject(du)); peekErr != nil {
r.tryCancelAcceptedDataUpload(ctx, du, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
log.Errorf("Cancel du %s/%s because of expose error %s", du.Namespace, du.Name, peekErr)
} else if at, found := du.Annotations[acceptTimeAnnoKey]; found {
if t, err := time.Parse(time.RFC3339, at); err == nil {
if time.Since(t) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, du)
}
} else if du.Status.AcceptedTimestamp != nil {
if time.Since(du.Status.AcceptedTimestamp.Time) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, du)
}
}
@ -705,13 +702,8 @@ func (r *DataUploadReconciler) acceptDataUpload(ctx context.Context, du *velerov
updateFunc := func(dataUpload *velerov2alpha1api.DataUpload) {
dataUpload.Status.Phase = velerov2alpha1api.DataUploadPhaseAccepted
annotations := dataUpload.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
annotations[acceptNodeAnnoKey] = r.nodeName
annotations[acceptTimeAnnoKey] = r.Clock.Now().Format(time.RFC3339)
dataUpload.SetAnnotations(annotations)
dataUpload.Status.AcceptedByNode = r.nodeName
dataUpload.Status.AcceptedTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
succeeded, err := r.exclusiveUpdateDataUpload(ctx, updated, updateFunc)
@ -760,6 +752,11 @@ func (r *DataUploadReconciler) onPrepareTimeout(ctx context.Context, du *velerov
volumeSnapshotName = du.Spec.CSISnapshot.VolumeSnapshot
}
diags := strings.Split(ep.DiagnoseExpose(ctx, getOwnerObject(du)), "\n")
for _, diag := range diags {
log.Warnf("[Diagnose DU expose]%s", diag)
}
ep.CleanUp(ctx, getOwnerObject(du), volumeSnapshotName, du.Spec.SourceNamespace)
log.Info("Dataupload has been cleaned up")

View File

@ -300,6 +300,10 @@ func (f *fakeSnapshotExposer) PeekExposed(ctx context.Context, ownerObject corev
return f.peekErr
}
func (f *fakeSnapshotExposer) DiagnoseExpose(context.Context, corev1.ObjectReference) string {
return ""
}
func (f *fakeSnapshotExposer) CleanUp(context.Context, corev1.ObjectReference, string, string) {
}
@ -475,7 +479,7 @@ func TestReconcile(t *testing.T) {
},
{
name: "prepare timeout",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Annotations(map[string]string{acceptTimeAnnoKey: (time.Now().Add(-time.Minute * 5)).Format(time.RFC3339)}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).AcceptedTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
},
{
@ -1043,6 +1047,10 @@ func (dt *duResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReferenc
return nil
}
func (dt *duResumeTestHelper) DiagnoseExpose(context.Context, corev1.ObjectReference) string {
return ""
}
func (dt *duResumeTestHelper) CleanUp(context.Context, corev1.ObjectReference, string, string) {}
func (dt *duResumeTestHelper) newMicroServiceBRWatcher(kbclient.Client, kubernetes.Interface, manager.Manager, string, string, string, string, string, string,
@ -1071,19 +1079,19 @@ func TestAttemptDataUploadResume(t *testing.T) {
},
{
name: "accepted DataUpload in the current node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).AcceptedByNode("node-1").Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but canceled",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Cancel(true).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).AcceptedByNode("node-1").Cancel(true).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but update error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).AcceptedByNode("node-1").Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataUploads: []string{dataUploadName},
},

View File

@ -308,6 +308,70 @@ func (e *csiSnapshotExposer) PeekExposed(ctx context.Context, ownerObject corev1
return nil
}
func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject corev1.ObjectReference) string {
backupPodName := ownerObject.Name
backupPVCName := ownerObject.Name
backupVSName := ownerObject.Name
diag := "begin diagnose CSI exposer\n"
pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, backupPodName, metav1.GetOptions{})
if err != nil {
pod = nil
diag += fmt.Sprintf("error getting backup pod %s, err: %v\n", backupPodName, err)
}
pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, backupPVCName, metav1.GetOptions{})
if err != nil {
pvc = nil
diag += fmt.Sprintf("error getting backup pvc %s, err: %v\n", backupPVCName, err)
}
vs, err := e.csiSnapshotClient.VolumeSnapshots(ownerObject.Namespace).Get(ctx, backupVSName, metav1.GetOptions{})
if err != nil {
vs = nil
diag += fmt.Sprintf("error getting backup vs %s, err: %v\n", backupVSName, err)
}
if pod != nil {
diag += kube.DiagnosePod(pod)
if pod.Spec.NodeName != "" {
if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil {
diag += fmt.Sprintf("node-agent is not running in node %s, err: %v\n", pod.Spec.NodeName, err)
}
}
}
if pvc != nil {
diag += kube.DiagnosePVC(pvc)
if pvc.Spec.VolumeName != "" {
if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil {
diag += fmt.Sprintf("error getting backup pv %s, err: %v\n", pvc.Spec.VolumeName, err)
} else {
diag += kube.DiagnosePV(pv)
}
}
}
if vs != nil {
diag += csi.DiagnoseVS(vs)
if vs.Status != nil && vs.Status.BoundVolumeSnapshotContentName != nil && *vs.Status.BoundVolumeSnapshotContentName != "" {
if vsc, err := e.csiSnapshotClient.VolumeSnapshotContents().Get(ctx, *vs.Status.BoundVolumeSnapshotContentName, metav1.GetOptions{}); err != nil {
diag += fmt.Sprintf("error getting backup vsc %s, err: %v\n", *vs.Status.BoundVolumeSnapshotContentName, err)
} else {
diag += csi.DiagnoseVSC(vsc)
}
}
}
diag += "end diagnose CSI exposer"
return diag
}
const cleanUpTimeout = time.Minute
func (e *csiSnapshotExposer) CleanUp(ctx context.Context, ownerObject corev1.ObjectReference, vsName string, sourceNamespace string) {

View File

@ -959,3 +959,394 @@ func Test_csiSnapshotExposer_createBackupPVC(t *testing.T) {
})
}
}
func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) {
backup := &velerov1.Backup{
TypeMeta: metav1.TypeMeta{
APIVersion: velerov1.SchemeGroupVersion.String(),
Kind: "Backup",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
UID: "fake-uid",
},
}
backupPodWithoutNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
backupPodWithNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
backupPVCWithoutVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
backupPVCWithVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Spec: corev1.PersistentVolumeClaimSpec{
VolumeName: "fake-pv",
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
backupPV := corev1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pv",
},
Status: corev1.PersistentVolumeStatus{
Phase: corev1.VolumePending,
Message: "fake-pv-message",
},
}
readyToUse := false
vscMessage := "fake-vsc-message"
backupVSC := snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &vscMessage,
},
},
}
backupVSWithoutStatus := snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
}
backupVSWithoutVSC := snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: &snapshotv1api.VolumeSnapshotStatus{},
}
vsMessage := "fake-vs-message"
backupVSWithVSC := snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &backupVSC.Name,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &vsMessage,
},
},
}
nodeAgentPod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "node-agent-pod-1",
Labels: map[string]string{"name": "node-agent"},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
},
}
tests := []struct {
name string
ownerBackup *velerov1.Backup
kubeClientObj []runtime.Object
snapshotClientObj []runtime.Object
expected string
}{
{
name: "no pod, pvc, vs",
ownerBackup: backup,
expected: `begin diagnose CSI exposer
error getting backup pod fake-backup, err: pods "fake-backup" not found
error getting backup pvc fake-backup, err: persistentvolumeclaims "fake-backup" not found
error getting backup vs fake-backup, err: volumesnapshots.snapshot.storage.k8s.io "fake-backup" not found
end diagnose CSI exposer`,
},
{
name: "pod without node name, pvc without volume name, vs without status",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithoutNodeName,
&backupPVCWithoutVolumeName,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutStatus,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pod without node name, pvc without volume name, vs without VSC",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithoutNodeName,
&backupPVCWithoutVolumeName,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pod with node name, no node agent",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithoutVolumeName,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
node-agent is not running in node fake-node, err: daemonset pod not found in running state in node fake-node
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pod with node name, node agent is running",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithoutVolumeName,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pvc with volume name, no pv",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
error getting backup pv fake-pv, err: persistentvolumes "fake-pv" not found
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pvc with volume name, pv exists",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&backupPV,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "vs with vsc, vsc doesn't exist",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&backupPV,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message
error getting backup vsc fake-vsc, err: volumesnapshotcontents.snapshot.storage.k8s.io "fake-vsc" not found
end diagnose CSI exposer`,
},
{
name: "vs with vsc, vsc exists",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&backupPV,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithVSC,
&backupVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message
VSC fake-vsc, readyToUse false, errMessage fake-vsc-message, handle
end diagnose CSI exposer`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(tt.kubeClientObj...)
fakeSnapshotClient := snapshotFake.NewSimpleClientset(tt.snapshotClientObj...)
e := &csiSnapshotExposer{
kubeClient: fakeKubeClient,
csiSnapshotClient: fakeSnapshotClient.SnapshotV1(),
log: velerotest.NewLogger(),
}
var ownerObject corev1.ObjectReference
if tt.ownerBackup != nil {
ownerObject = corev1.ObjectReference{
Kind: tt.ownerBackup.Kind,
Namespace: tt.ownerBackup.Namespace,
Name: tt.ownerBackup.Name,
UID: tt.ownerBackup.UID,
APIVersion: tt.ownerBackup.APIVersion,
}
}
diag := e.DiagnoseExpose(context.Background(), ownerObject)
assert.Equal(t, tt.expected, diag)
})
}
}

View File

@ -30,6 +30,7 @@ import (
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
@ -49,6 +50,10 @@ type GenericRestoreExposer interface {
// Otherwise, it returns nil immediately.
PeekExposed(context.Context, corev1.ObjectReference) error
// DiagnoseExpose generate the diagnostic info when the expose is not finished for a long time.
// If it finds any problem, it returns an string about the problem.
DiagnoseExpose(context.Context, corev1.ObjectReference) string
// RebindVolume unexposes the restored PV and rebind it to the target PVC
RebindVolume(context.Context, corev1.ObjectReference, string, string, time.Duration) error
@ -195,6 +200,51 @@ func (e *genericRestoreExposer) PeekExposed(ctx context.Context, ownerObject cor
return nil
}
func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject corev1.ObjectReference) string {
restorePodName := ownerObject.Name
restorePVCName := ownerObject.Name
diag := "begin diagnose restore exposer\n"
pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, restorePodName, metav1.GetOptions{})
if err != nil {
pod = nil
diag += fmt.Sprintf("error getting restore pod %s, err: %v\n", restorePodName, err)
}
pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, restorePVCName, metav1.GetOptions{})
if err != nil {
pvc = nil
diag += fmt.Sprintf("error getting restore pvc %s, err: %v\n", restorePVCName, err)
}
if pod != nil {
diag += kube.DiagnosePod(pod)
if pod.Spec.NodeName != "" {
if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil {
diag += fmt.Sprintf("node-agent is not running in node %s, err: %v\n", pod.Spec.NodeName, err)
}
}
}
if pvc != nil {
diag += kube.DiagnosePVC(pvc)
if pvc.Spec.VolumeName != "" {
if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil {
diag += fmt.Sprintf("error getting restore pv %s, err: %v\n", pvc.Spec.VolumeName, err)
} else {
diag += kube.DiagnosePV(pv)
}
}
}
diag += "end diagnose restore exposer"
return diag
}
func (e *genericRestoreExposer) CleanUp(ctx context.Context, ownerObject corev1.ObjectReference) {
restorePodName := ownerObject.Name
restorePVCName := ownerObject.Name

View File

@ -507,3 +507,258 @@ func TestRestorePeekExpose(t *testing.T) {
})
}
}
func Test_ReastoreDiagnoseExpose(t *testing.T) {
restore := &velerov1.Restore{
TypeMeta: metav1.TypeMeta{
APIVersion: velerov1.SchemeGroupVersion.String(),
Kind: "Restore",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
UID: "fake-uid",
},
}
restorePodWithoutNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
restorePodWithNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
restorePVCWithoutVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
restorePVCWithVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Spec: corev1.PersistentVolumeClaimSpec{
VolumeName: "fake-pv",
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
restorePV := corev1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pv",
},
Status: corev1.PersistentVolumeStatus{
Phase: corev1.VolumePending,
Message: "fake-pv-message",
},
}
nodeAgentPod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "node-agent-pod-1",
Labels: map[string]string{"name": "node-agent"},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
},
}
tests := []struct {
name string
ownerRestore *velerov1.Restore
kubeClientObj []runtime.Object
expected string
}{
{
name: "no pod, pvc",
ownerRestore: restore,
expected: `begin diagnose restore exposer
error getting restore pod fake-restore, err: pods "fake-restore" not found
error getting restore pvc fake-restore, err: persistentvolumeclaims "fake-restore" not found
end diagnose restore exposer`,
},
{
name: "pod without node name, pvc without volume name, vs without status",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithoutNodeName,
&restorePVCWithoutVolumeName,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pod without node name, pvc without volume name",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithoutNodeName,
&restorePVCWithoutVolumeName,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pod with node name, no node agent",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithoutVolumeName,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
node-agent is not running in node fake-node, err: daemonset pod not found in running state in node fake-node
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pod with node name, node agent is running",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithoutVolumeName,
&nodeAgentPod,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pvc with volume name, no pv",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithVolumeName,
&nodeAgentPod,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to fake-pv
error getting restore pv fake-pv, err: persistentvolumes "fake-pv" not found
end diagnose restore exposer`,
},
{
name: "pvc with volume name, pv exists",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithVolumeName,
&restorePV,
&nodeAgentPod,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
end diagnose restore exposer`,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
e := genericRestoreExposer{
kubeClient: fakeKubeClient,
log: velerotest.NewLogger(),
}
var ownerObject corev1api.ObjectReference
if test.ownerRestore != nil {
ownerObject = corev1api.ObjectReference{
Kind: test.ownerRestore.Kind,
Namespace: test.ownerRestore.Namespace,
Name: test.ownerRestore.Name,
UID: test.ownerRestore.UID,
APIVersion: test.ownerRestore.APIVersion,
}
}
diag := e.DiagnoseExpose(context.Background(), ownerObject)
assert.Equal(t, test.expected, diag)
})
}
}

View File

@ -26,6 +26,24 @@ func (_m *GenericRestoreExposer) CleanUp(_a0 context.Context, _a1 v1.ObjectRefer
_m.Called(_a0, _a1)
}
// DiagnoseExpose provides a mock function with given fields: _a0, _a1
func (_m *GenericRestoreExposer) DiagnoseExpose(_a0 context.Context, _a1 v1.ObjectReference) string {
ret := _m.Called(_a0, _a1)
if len(ret) == 0 {
panic("no return value specified for DiagnoseExpose")
}
var r0 string
if rf, ok := ret.Get(0).(func(context.Context, v1.ObjectReference) string); ok {
r0 = rf(_a0, _a1)
} else {
r0 = ret.Get(0).(string)
}
return r0
}
// Expose provides a mock function with given fields: _a0, _a1, _a2, _a3, _a4, _a5, _a6
func (_m *GenericRestoreExposer) Expose(_a0 context.Context, _a1 v1.ObjectReference, _a2 string, _a3 string, _a4 map[string]string, _a5 v1.ResourceRequirements, _a6 time.Duration) error {
ret := _m.Called(_a0, _a1, _a2, _a3, _a4, _a5, _a6)

View File

@ -37,6 +37,10 @@ type SnapshotExposer interface {
// Otherwise, it returns nil immediately.
PeekExposed(context.Context, corev1.ObjectReference) error
// DiagnoseExpose generate the diagnostic info when the expose is not finished for a long time.
// If it finds any problem, it returns an string about the problem.
DiagnoseExpose(context.Context, corev1.ObjectReference) string
// CleanUp cleans up any objects generated during the snapshot expose
CleanUp(context.Context, corev1.ObjectReference, string, string)
}

View File

@ -100,8 +100,17 @@ func IsRunning(ctx context.Context, kubeClient kubernetes.Interface, namespace s
}
}
// IsRunningInNode checks if the node agent pod is running properly in a specified node. If not, return the error found
// KbClientIsRunningInNode checks if the node agent pod is running properly in a specified node through kube client. If not, return the error found
func KbClientIsRunningInNode(ctx context.Context, namespace string, nodeName string, kubeClient kubernetes.Interface) error {
return isRunningInNode(ctx, namespace, nodeName, nil, kubeClient)
}
// IsRunningInNode checks if the node agent pod is running properly in a specified node through controller client. If not, return the error found
func IsRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client) error {
return isRunningInNode(ctx, namespace, nodeName, crClient, nil)
}
func isRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client, kubeClient kubernetes.Interface) error {
if nodeName == "" {
return errors.New("node name is empty")
}
@ -112,7 +121,12 @@ func IsRunningInNode(ctx context.Context, namespace string, nodeName string, crC
return errors.Wrap(err, "fail to parse selector")
}
err = crClient.List(ctx, pods, &ctrlclient.ListOptions{LabelSelector: parsedSelector})
if crClient != nil {
err = crClient.List(ctx, pods, &ctrlclient.ListOptions{LabelSelector: parsedSelector})
} else {
pods, err = kubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: parsedSelector.String()})
}
if err != nil {
return errors.Wrap(err, "failed to list daemonset pods")
}

View File

@ -124,13 +124,24 @@ func GetS3Credentials(config map[string]string) (*aws.Credentials, error) {
// GetAWSBucketRegion returns the AWS region that a bucket is in, or an error
// if the region cannot be determined.
func GetAWSBucketRegion(bucket string) (string, error) {
cfg, err := awsconfig.LoadDefaultConfig(context.Background())
// It will use us-east-1 as hinting server and requires config param to use as credentials
func GetAWSBucketRegion(bucket string, config map[string]string) (string, error) {
cfg, err := awsconfig.LoadDefaultConfig(context.Background(), awsconfig.WithCredentialsProvider(
aws.CredentialsProviderFunc(
func(context.Context) (aws.Credentials, error) {
s3creds, err := GetS3Credentials(config)
if s3creds == nil {
return aws.Credentials{}, err
}
return *s3creds, err
},
),
))
if err != nil {
return "", errors.WithStack(err)
}
client := s3.NewFromConfig(cfg)
region, err := s3manager.GetBucketRegion(context.Background(), client, bucket)
region, err := s3manager.GetBucketRegion(context.Background(), client, bucket, func(o *s3.Options) { o.Region = "us-east-1" })
if err != nil {
return "", errors.WithStack(err)
}

View File

@ -72,7 +72,7 @@ func getRepoPrefix(location *velerov1api.BackupStorageLocation) (string, error)
var err error
region := location.Spec.Config["region"]
if region == "" {
region, err = getAWSBucketRegion(bucket)
region, err = getAWSBucketRegion(bucket, location.Spec.Config)
}
if err != nil {
return "", errors.Wrapf(err, "failed to detect the region via bucket: %s", bucket)

View File

@ -30,7 +30,7 @@ func TestGetRepoIdentifier(t *testing.T) {
name string
bsl *velerov1api.BackupStorageLocation
repoName string
getAWSBucketRegion func(string) (string, error)
getAWSBucketRegion func(s string, config map[string]string) (string, error)
expected string
expectedErr string
}{
@ -101,7 +101,7 @@ func TestGetRepoIdentifier(t *testing.T) {
},
},
repoName: "repo-1",
getAWSBucketRegion: func(string) (string, error) {
getAWSBucketRegion: func(s string, config map[string]string) (string, error) {
return "", errors.New("no region found")
},
expected: "",
@ -120,7 +120,7 @@ func TestGetRepoIdentifier(t *testing.T) {
},
},
repoName: "repo-1",
getAWSBucketRegion: func(string) (string, error) {
getAWSBucketRegion: func(string, map[string]string) (string, error) {
return "eu-west-1", nil
},
expected: "s3:s3-eu-west-1.amazonaws.com/bucket/restic/repo-1",
@ -139,7 +139,7 @@ func TestGetRepoIdentifier(t *testing.T) {
},
},
repoName: "repo-1",
getAWSBucketRegion: func(string) (string, error) {
getAWSBucketRegion: func(s string, config map[string]string) (string, error) {
return "eu-west-1", nil
},
expected: "s3:s3-eu-west-1.amazonaws.com/bucket/prefix/restic/repo-1",
@ -161,7 +161,7 @@ func TestGetRepoIdentifier(t *testing.T) {
},
},
repoName: "repo-1",
getAWSBucketRegion: func(string) (string, error) {
getAWSBucketRegion: func(s string, config map[string]string) (string, error) {
return "eu-west-1", nil
},
expected: "s3:alternate-url/bucket/prefix/restic/repo-1",
@ -183,7 +183,7 @@ func TestGetRepoIdentifier(t *testing.T) {
},
},
repoName: "aws-repo",
getAWSBucketRegion: func(string) (string, error) {
getAWSBucketRegion: func(s string, config map[string]string) (string, error) {
return "eu-west-1", nil
},
expected: "s3:s3-us-west-1.amazonaws.com/bucket/prefix/restic/aws-repo",
@ -205,7 +205,7 @@ func TestGetRepoIdentifier(t *testing.T) {
},
},
repoName: "aws-repo",
getAWSBucketRegion: func(string) (string, error) {
getAWSBucketRegion: func(s string, config map[string]string) (string, error) {
return "eu-west-1", nil
},
expected: "s3:alternate-url-with-trailing-slash/bucket/prefix/restic/aws-repo",

View File

@ -529,7 +529,7 @@ func getStorageVariables(backupLocation *velerov1api.BackupStorageLocation, repo
var err error
if s3URL == "" {
if region == "" {
region, err = getS3BucketRegion(bucket)
region, err = getS3BucketRegion(bucket, config)
if err != nil {
return map[string]string{}, errors.Wrap(err, "error get s3 bucket region")
}

View File

@ -222,7 +222,7 @@ func TestGetStorageVariables(t *testing.T) {
repoName string
repoBackend string
repoConfig map[string]string
getS3BucketRegion func(string) (string, error)
getS3BucketRegion func(bucket string, config map[string]string) (string, error)
expected map[string]string
expectedErr string
}{
@ -291,7 +291,7 @@ func TestGetStorageVariables(t *testing.T) {
},
},
},
getS3BucketRegion: func(bucket string) (string, error) {
getS3BucketRegion: func(bucket string, config map[string]string) (string, error) {
return "region from bucket: " + bucket, nil
},
repoBackend: "fake-repo-type",
@ -313,7 +313,7 @@ func TestGetStorageVariables(t *testing.T) {
Config: map[string]string{},
},
},
getS3BucketRegion: func(bucket string) (string, error) {
getS3BucketRegion: func(bucket string, config map[string]string) (string, error) {
return "", errors.New("fake error")
},
expected: map[string]string{},
@ -339,7 +339,7 @@ func TestGetStorageVariables(t *testing.T) {
},
},
},
getS3BucketRegion: func(bucket string) (string, error) {
getS3BucketRegion: func(bucket string, config map[string]string) (string, error) {
return "region from bucket: " + bucket, nil
},
repoBackend: "fake-repo-type",
@ -374,7 +374,7 @@ func TestGetStorageVariables(t *testing.T) {
},
},
},
getS3BucketRegion: func(bucket string) (string, error) {
getS3BucketRegion: func(bucket string, config map[string]string) (string, error) {
return "region from bucket: " + bucket, nil
},
repoBackend: "fake-repo-type",

View File

@ -16,6 +16,9 @@ limitations under the License.
package test
import (
"fmt"
"strings"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/mock"
@ -33,6 +36,10 @@ type HookExecutionEntry struct {
HookCommand []string
}
func (h HookExecutionEntry) String() string {
return fmt.Sprintf("%s.%s.%s.%s", h.Namespace, h.Name, h.HookName, strings.Join(h.HookCommand, ","))
}
func (e *MockPodCommandExecutor) ExecutePodCommand(log logrus.FieldLogger, item map[string]interface{}, namespace, name, hookName string, hook *v1.ExecHook) error {
e.HookExecutionLog = append(e.HookExecutionLog, HookExecutionEntry{
Namespace: namespace,

View File

@ -167,8 +167,9 @@ func EnsureDeleteVS(ctx context.Context, snapshotClient snapshotter.SnapshotV1In
return errors.Wrap(err, "error to delete volume snapshot")
}
var updated *snapshotv1api.VolumeSnapshot
err = wait.PollUntilContextTimeout(ctx, waitInternal, timeout, true, func(ctx context.Context) (bool, error) {
_, err := snapshotClient.VolumeSnapshots(vsNamespace).Get(ctx, vsName, metav1.GetOptions{})
vs, err := snapshotClient.VolumeSnapshots(vsNamespace).Get(ctx, vsName, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
return true, nil
@ -177,11 +178,16 @@ func EnsureDeleteVS(ctx context.Context, snapshotClient snapshotter.SnapshotV1In
return false, errors.Wrapf(err, fmt.Sprintf("error to get VolumeSnapshot %s", vsName))
}
updated = vs
return false, nil
})
if err != nil {
return errors.Wrapf(err, "error to assure VolumeSnapshot is deleted, %s", vsName)
if errors.Is(err, context.DeadlineExceeded) {
return errors.Errorf("timeout to assure VolumeSnapshot %s is deleted, finalizers in VS %v", vsName, updated.Finalizers)
} else {
return errors.Wrapf(err, "error to assure VolumeSnapshot is deleted, %s", vsName)
}
}
return nil
@ -219,8 +225,10 @@ func EnsureDeleteVSC(ctx context.Context, snapshotClient snapshotter.SnapshotV1I
if err != nil && !apierrors.IsNotFound(err) {
return errors.Wrap(err, "error to delete volume snapshot content")
}
var updated *snapshotv1api.VolumeSnapshotContent
err = wait.PollUntilContextTimeout(ctx, waitInternal, timeout, true, func(ctx context.Context) (bool, error) {
_, err := snapshotClient.VolumeSnapshotContents().Get(ctx, vscName, metav1.GetOptions{})
vsc, err := snapshotClient.VolumeSnapshotContents().Get(ctx, vscName, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
return true, nil
@ -229,11 +237,16 @@ func EnsureDeleteVSC(ctx context.Context, snapshotClient snapshotter.SnapshotV1I
return false, errors.Wrapf(err, fmt.Sprintf("error to get VolumeSnapshotContent %s", vscName))
}
updated = vsc
return false, nil
})
if err != nil {
return errors.Wrapf(err, "error to assure VolumeSnapshotContent is deleted, %s", vscName)
if errors.Is(err, context.DeadlineExceeded) {
return errors.Errorf("timeout to assure VolumeSnapshotContent %s is deleted, finalizers in VSC %v", vscName, updated.Finalizers)
} else {
return errors.Wrapf(err, "error to assure VolumeSnapshotContent is deleted, %s", vscName)
}
}
return nil
@ -773,3 +786,51 @@ func WaitUntilVSCHandleIsReady(
return vsc, nil
}
func DiagnoseVS(vs *snapshotv1api.VolumeSnapshot) string {
vscName := ""
readyToUse := false
errMessage := ""
if vs.Status != nil {
if vs.Status.BoundVolumeSnapshotContentName != nil {
vscName = *vs.Status.BoundVolumeSnapshotContentName
}
if vs.Status.ReadyToUse != nil {
readyToUse = *vs.Status.ReadyToUse
}
if vs.Status.Error != nil && vs.Status.Error.Message != nil {
errMessage = *vs.Status.Error.Message
}
}
diag := fmt.Sprintf("VS %s/%s, bind to %s, readyToUse %v, errMessage %s\n", vs.Namespace, vs.Name, vscName, readyToUse, errMessage)
return diag
}
func DiagnoseVSC(vsc *snapshotv1api.VolumeSnapshotContent) string {
handle := ""
readyToUse := false
errMessage := ""
if vsc.Status != nil {
if vsc.Status.SnapshotHandle != nil {
handle = *vsc.Status.SnapshotHandle
}
if vsc.Status.ReadyToUse != nil {
readyToUse = *vsc.Status.ReadyToUse
}
if vsc.Status.Error != nil && vsc.Status.Error.Message != nil {
errMessage = *vsc.Status.Error.Message
}
}
diag := fmt.Sprintf("VSC %s, readyToUse %v, errMessage %s, handle %s\n", vsc.Name, readyToUse, errMessage, handle)
return diag
}

View File

@ -304,6 +304,14 @@ func TestEnsureDeleteVS(t *testing.T) {
},
}
vsObjWithFinalizer := &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
Finalizers: []string{"fake-finalizer-1", "fake-finalizer-2"},
},
}
tests := []struct {
name string
clientObj []runtime.Object
@ -334,6 +342,38 @@ func TestEnsureDeleteVS(t *testing.T) {
},
err: "error to assure VolumeSnapshot is deleted, fake-vs: error to get VolumeSnapshot fake-vs: fake-get-error",
},
{
name: "wait timeout",
vsName: "fake-vs",
namespace: "fake-ns",
clientObj: []runtime.Object{vsObjWithFinalizer},
reactors: []reactor{
{
verb: "delete",
resource: "volumesnapshots",
reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, nil
},
},
},
err: "timeout to assure VolumeSnapshot fake-vs is deleted, finalizers in VS [fake-finalizer-1 fake-finalizer-2]",
},
{
name: "wait timeout, no finalizer",
vsName: "fake-vs",
namespace: "fake-ns",
clientObj: []runtime.Object{vsObj},
reactors: []reactor{
{
verb: "delete",
resource: "volumesnapshots",
reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, nil
},
},
},
err: "timeout to assure VolumeSnapshot fake-vs is deleted, finalizers in VS []",
},
{
name: "success",
vsName: "fake-vs",
@ -367,6 +407,13 @@ func TestEnsureDeleteVSC(t *testing.T) {
},
}
vscObjWithFinalizer := &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
Finalizers: []string{"fake-finalizer-1", "fake-finalizer-2"},
},
}
tests := []struct {
name string
clientObj []runtime.Object
@ -408,6 +455,36 @@ func TestEnsureDeleteVSC(t *testing.T) {
},
err: "error to assure VolumeSnapshotContent is deleted, fake-vsc: error to get VolumeSnapshotContent fake-vsc: fake-get-error",
},
{
name: "wait timeout",
vscName: "fake-vsc",
clientObj: []runtime.Object{vscObjWithFinalizer},
reactors: []reactor{
{
verb: "delete",
resource: "volumesnapshotcontents",
reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, nil
},
},
},
err: "timeout to assure VolumeSnapshotContent fake-vsc is deleted, finalizers in VSC [fake-finalizer-1 fake-finalizer-2]",
},
{
name: "wait timeout, no finalizer",
vscName: "fake-vsc",
clientObj: []runtime.Object{vscObj},
reactors: []reactor{
{
verb: "delete",
resource: "volumesnapshotcontents",
reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, nil
},
},
},
err: "timeout to assure VolumeSnapshotContent fake-vsc is deleted, finalizers in VSC []",
},
{
name: "success",
vscName: "fake-vsc",
@ -1655,3 +1732,197 @@ func TestWaitUntilVSCHandleIsReady(t *testing.T) {
})
}
}
func TestDiagnoseVS(t *testing.T) {
vscName := "fake-vsc"
readyToUse := true
message := "fake-message"
testCases := []struct {
name string
vs *snapshotv1api.VolumeSnapshot
expected string
}{
{
name: "VS with no status",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
},
expected: "VS fake-ns/fake-vs, bind to , readyToUse false, errMessage \n",
},
{
name: "VS with empty status",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{},
},
expected: "VS fake-ns/fake-vs, bind to , readyToUse false, errMessage \n",
},
{
name: "VS with VSC name",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse false, errMessage \n",
},
{
name: "VS with VSC name+ready",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
ReadyToUse: &readyToUse,
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \n",
},
{
name: "VS with VSC name+ready+empty error",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
ReadyToUse: &readyToUse,
Error: &snapshotv1api.VolumeSnapshotError{},
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \n",
},
{
name: "VS with VSC name+ready+error",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
ReadyToUse: &readyToUse,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &message,
},
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage fake-message\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnoseVS(tc.vs)
assert.Equal(t, tc.expected, diag)
})
}
}
func TestDiagnoseVSC(t *testing.T) {
readyToUse := true
message := "fake-message"
handle := "fake-handle"
testCases := []struct {
name string
vsc *snapshotv1api.VolumeSnapshotContent
expected string
}{
{
name: "VS with no status",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
},
expected: "VSC fake-vsc, readyToUse false, errMessage , handle \n",
},
{
name: "VSC with empty status",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{},
},
expected: "VSC fake-vsc, readyToUse false, errMessage , handle \n",
},
{
name: "VSC with ready",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage , handle \n",
},
{
name: "VSC with ready+handle",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
SnapshotHandle: &handle,
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage , handle fake-handle\n",
},
{
name: "VSC with ready+handle+empty error",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
SnapshotHandle: &handle,
Error: &snapshotv1api.VolumeSnapshotError{},
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage , handle fake-handle\n",
},
{
name: "VSC with ready+handle+error",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
SnapshotHandle: &handle,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &message,
},
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage fake-message, handle fake-handle\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnoseVSC(tc.vsc)
assert.Equal(t, tc.expected, diag)
})
}
}

View File

@ -105,8 +105,9 @@ func EnsureDeletePod(ctx context.Context, podGetter corev1client.CoreV1Interface
return errors.Wrapf(err, "error to delete pod %s", pod)
}
var updated *corev1api.Pod
err = wait.PollUntilContextTimeout(ctx, waitInternal, timeout, true, func(ctx context.Context) (bool, error) {
_, err := podGetter.Pods(namespace).Get(ctx, pod, metav1.GetOptions{})
po, err := podGetter.Pods(namespace).Get(ctx, pod, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
return true, nil
@ -115,11 +116,16 @@ func EnsureDeletePod(ctx context.Context, podGetter corev1client.CoreV1Interface
return false, errors.Wrapf(err, "error to get pod %s", pod)
}
updated = po
return false, nil
})
if err != nil {
return errors.Wrapf(err, "error to assure pod is deleted, %s", pod)
if errors.Is(err, context.DeadlineExceeded) {
return errors.Errorf("timeout to assure pod %s is deleted, finalizers in pod %v", pod, updated.Finalizers)
} else {
return errors.Wrapf(err, "error to assure pod is deleted, %s", pod)
}
}
return nil
@ -257,3 +263,13 @@ func ToSystemAffinity(loadAffinities []*LoadAffinity) *corev1api.Affinity {
return nil
}
func DiagnosePod(pod *corev1api.Pod) string {
diag := fmt.Sprintf("Pod %s/%s, phase %s, node name %s\n", pod.Namespace, pod.Name, pod.Status.Phase, pod.Spec.NodeName)
for _, condition := range pod.Status.Conditions {
diag += fmt.Sprintf("Pod condition %s, status %s, reason %s, message %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}
return diag
}

View File

@ -47,6 +47,14 @@ func TestEnsureDeletePod(t *testing.T) {
},
}
podObjectWithFinalizer := &corev1api.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns",
Name: "fake-pod",
Finalizers: []string{"fake-finalizer-1", "fake-finalizer-2"},
},
}
tests := []struct {
name string
clientObj []runtime.Object
@ -61,6 +69,38 @@ func TestEnsureDeletePod(t *testing.T) {
namespace: "fake-ns",
err: "error to delete pod fake-pod: pods \"fake-pod\" not found",
},
{
name: "wait timeout",
podName: "fake-pod",
namespace: "fake-ns",
clientObj: []runtime.Object{podObjectWithFinalizer},
reactors: []reactor{
{
verb: "delete",
resource: "pods",
reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, nil
},
},
},
err: "timeout to assure pod fake-pod is deleted, finalizers in pod [fake-finalizer-1 fake-finalizer-2]",
},
{
name: "wait timeout, no finalizer",
podName: "fake-pod",
namespace: "fake-ns",
clientObj: []runtime.Object{podObject},
reactors: []reactor{
{
verb: "delete",
resource: "pods",
reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, nil
},
},
},
err: "timeout to assure pod fake-pod is deleted, finalizers in pod []",
},
{
name: "wait fail",
podName: "fake-pod",
@ -846,3 +886,49 @@ func TestToSystemAffinity(t *testing.T) {
})
}
}
func TestDiagnosePod(t *testing.T) {
testCases := []struct {
name string
pod *corev1api.Pod
expected string
}{
{
name: "pod with all info",
pod: &corev1api.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pod",
Namespace: "fake-ns",
},
Spec: corev1api.PodSpec{
NodeName: "fake-node",
},
Status: corev1api.PodStatus{
Phase: corev1api.PodPending,
Conditions: []corev1api.PodCondition{
{
Type: corev1api.PodInitialized,
Status: corev1api.ConditionTrue,
Reason: "fake-reason-1",
Message: "fake-message-1",
},
{
Type: corev1api.PodScheduled,
Status: corev1api.ConditionFalse,
Reason: "fake-reason-2",
Message: "fake-message-2",
},
},
},
},
expected: "Pod fake-ns/fake-pod, phase Pending, node name fake-node\nPod condition Initialized, status True, reason fake-reason-1, message fake-message-1\nPod condition PodScheduled, status False, reason fake-reason-2, message fake-message-2\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnosePod(tc.pod)
assert.Equal(t, tc.expected, diag)
})
}
}

View File

@ -120,31 +120,37 @@ func DeletePVIfAny(ctx context.Context, pvGetter corev1client.CoreV1Interface, p
// EnsureDeletePVC asserts the existence of a PVC by name, deletes it and waits for its disappearance and returns errors on any failure
// If timeout is 0, it doesn't wait and return nil
func EnsureDeletePVC(ctx context.Context, pvcGetter corev1client.CoreV1Interface, pvc string, namespace string, timeout time.Duration) error {
err := pvcGetter.PersistentVolumeClaims(namespace).Delete(ctx, pvc, metav1.DeleteOptions{})
func EnsureDeletePVC(ctx context.Context, pvcGetter corev1client.CoreV1Interface, pvcName string, namespace string, timeout time.Duration) error {
err := pvcGetter.PersistentVolumeClaims(namespace).Delete(ctx, pvcName, metav1.DeleteOptions{})
if err != nil {
return errors.Wrapf(err, "error to delete pvc %s", pvc)
return errors.Wrapf(err, "error to delete pvc %s", pvcName)
}
if timeout == 0 {
return nil
}
var updated *corev1api.PersistentVolumeClaim
err = wait.PollUntilContextTimeout(ctx, waitInternal, timeout, true, func(ctx context.Context) (bool, error) {
_, err := pvcGetter.PersistentVolumeClaims(namespace).Get(ctx, pvc, metav1.GetOptions{})
pvc, err := pvcGetter.PersistentVolumeClaims(namespace).Get(ctx, pvcName, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
return true, nil
}
return false, errors.Wrapf(err, "error to get pvc %s", pvc)
return false, errors.Wrapf(err, "error to get pvc %s", pvcName)
}
updated = pvc
return false, nil
})
if err != nil {
return errors.Wrapf(err, "error to ensure pvc deleted for %s", pvc)
if errors.Is(err, context.DeadlineExceeded) {
return errors.Errorf("timeout to assure pvc %s is deleted, finalizers in pvc %v", pvcName, updated.Finalizers)
} else {
return errors.Wrapf(err, "error to ensure pvc deleted for %s", pvcName)
}
}
return nil
@ -412,3 +418,12 @@ func GetPVCForPodVolume(vol *corev1api.Volume, pod *corev1api.Pod, crClient crcl
return pvc, nil
}
func DiagnosePVC(pvc *corev1api.PersistentVolumeClaim) string {
return fmt.Sprintf("PVC %s/%s, phase %s, binding to %s\n", pvc.Namespace, pvc.Name, pvc.Status.Phase, pvc.Spec.VolumeName)
}
func DiagnosePV(pv *corev1api.PersistentVolume) string {
diag := fmt.Sprintf("PV %s, phase %s, reason %s, message %s\n", pv.Name, pv.Status.Phase, pv.Status.Reason, pv.Status.Message)
return diag
}

View File

@ -462,7 +462,7 @@ func TestDeletePVCIfAny(t *testing.T) {
},
},
ensureTimeout: time.Second,
logMessage: "failed to delete pvc fake-namespace/fake-pvc with err error to ensure pvc deleted for fake-pvc: context deadline exceeded",
logMessage: "failed to delete pvc fake-namespace/fake-pvc with err timeout to assure pvc fake-pvc is deleted, finalizers in pvc []",
logLevel: "level=warning",
},
{
@ -584,6 +584,14 @@ func TestEnsureDeletePVC(t *testing.T) {
},
}
pvcObjectWithFinalizer := &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns",
Name: "fake-pvc",
Finalizers: []string{"fake-finalizer-1", "fake-finalizer-2"},
},
}
tests := []struct {
name string
clientObj []runtime.Object
@ -635,6 +643,23 @@ func TestEnsureDeletePVC(t *testing.T) {
name: "wait timeout",
pvcName: "fake-pvc",
namespace: "fake-ns",
clientObj: []runtime.Object{pvcObjectWithFinalizer},
timeout: time.Millisecond,
reactors: []reactor{
{
verb: "delete",
resource: "persistentvolumeclaims",
reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) {
return true, pvcObject, nil
},
},
},
err: "timeout to assure pvc fake-pvc is deleted, finalizers in pvc [fake-finalizer-1 fake-finalizer-2]",
},
{
name: "wait timeout, no finalizer",
pvcName: "fake-pvc",
namespace: "fake-ns",
clientObj: []runtime.Object{pvcObject},
timeout: time.Millisecond,
reactors: []reactor{
@ -646,7 +671,7 @@ func TestEnsureDeletePVC(t *testing.T) {
},
},
},
err: "error to ensure pvc deleted for fake-pvc: context deadline exceeded",
err: "timeout to assure pvc fake-pvc is deleted, finalizers in pvc []",
},
}
@ -1463,3 +1488,65 @@ func TestMakePodPVCAttachment(t *testing.T) {
})
}
}
func TestDiagnosePVC(t *testing.T) {
testCases := []struct {
name string
pvc *corev1api.PersistentVolumeClaim
expected string
}{
{
name: "pvc with all info",
pvc: &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pvc",
Namespace: "fake-ns",
},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "fake-pv",
},
Status: corev1api.PersistentVolumeClaimStatus{
Phase: corev1api.ClaimPending,
},
},
expected: "PVC fake-ns/fake-pvc, phase Pending, binding to fake-pv\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnosePVC(tc.pvc)
assert.Equal(t, tc.expected, diag)
})
}
}
func TestDiagnosePV(t *testing.T) {
testCases := []struct {
name string
pv *corev1api.PersistentVolume
expected string
}{
{
name: "pv with all info",
pv: &corev1api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pv",
},
Status: corev1api.PersistentVolumeStatus{
Phase: corev1api.VolumePending,
Message: "fake-message",
Reason: "fake-reason",
},
},
expected: "PV fake-pv, phase Pending, reason fake-reason, message fake-message\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnosePV(tc.pv)
assert.Equal(t, tc.expected, diag)
})
}
}

View File

@ -575,9 +575,9 @@ var _ = Describe(
)
var _ = Describe(
"Migrate resources between clusters by Restic",
Label("Migration", "Restic"),
MigrationWithRestic,
"Migrate resources between clusters by FileSystem backup",
Label("Migration", "FSB"),
MigrationWithFS,
)
var _ = Describe(
"Migrate resources between clusters by snapshot",

View File

@ -17,388 +17,455 @@ package migration
import (
"context"
"flag"
"fmt"
"strings"
"time"
"github.com/google/uuid"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
. "github.com/vmware-tanzu/velero/test"
"github.com/vmware-tanzu/velero/test"
framework "github.com/vmware-tanzu/velero/test/e2e/test"
util "github.com/vmware-tanzu/velero/test/util/csi"
. "github.com/vmware-tanzu/velero/test/util/k8s"
. "github.com/vmware-tanzu/velero/test/util/kibishii"
. "github.com/vmware-tanzu/velero/test/util/providers"
. "github.com/vmware-tanzu/velero/test/util/velero"
k8sutil "github.com/vmware-tanzu/velero/test/util/k8s"
"github.com/vmware-tanzu/velero/test/util/kibishii"
"github.com/vmware-tanzu/velero/test/util/providers"
veleroutil "github.com/vmware-tanzu/velero/test/util/velero"
)
var migrationNamespace string
var veleroCfg VeleroConfig
type migrationE2E struct {
framework.TestCase
useVolumeSnapshots bool
veleroCLI2Version test.VeleroCLI2Version
kibishiiData kibishii.KibishiiData
}
func MigrationWithSnapshots() {
veleroCfg = VeleroCfg
for _, veleroCLI2Version := range GetVersionList(veleroCfg.MigrateFromVeleroCLI, veleroCfg.MigrateFromVeleroVersion) {
MigrationTest(true, veleroCLI2Version)
for _, veleroCLI2Version := range veleroutil.GetVersionList(
test.VeleroCfg.MigrateFromVeleroCLI,
test.VeleroCfg.MigrateFromVeleroVersion) {
framework.TestFunc(
&migrationE2E{
useVolumeSnapshots: true,
veleroCLI2Version: veleroCLI2Version,
},
)()
}
}
func MigrationWithRestic() {
veleroCfg = VeleroCfg
for _, veleroCLI2Version := range GetVersionList(veleroCfg.MigrateFromVeleroCLI, veleroCfg.MigrateFromVeleroVersion) {
MigrationTest(false, veleroCLI2Version)
func MigrationWithFS() {
for _, veleroCLI2Version := range veleroutil.GetVersionList(
test.VeleroCfg.MigrateFromVeleroCLI,
test.VeleroCfg.MigrateFromVeleroVersion) {
framework.TestFunc(
&migrationE2E{
useVolumeSnapshots: false,
veleroCLI2Version: veleroCLI2Version,
},
)()
}
}
func MigrationTest(useVolumeSnapshots bool, veleroCLI2Version VeleroCLI2Version) {
var (
backupName, restoreName string
backupScName, restoreScName string
kibishiiWorkerCount int
err error
)
BeforeEach(func() {
kibishiiWorkerCount = 3
veleroCfg = VeleroCfg
UUIDgen, err = uuid.NewRandom()
migrationNamespace = "migration-" + UUIDgen.String()
if useVolumeSnapshots && veleroCfg.CloudProvider == Kind {
Skip(fmt.Sprintf("Volume snapshots not supported on %s", Kind))
}
if veleroCfg.DefaultClusterContext == "" && veleroCfg.StandbyClusterContext == "" {
func (m *migrationE2E) Init() error {
By("Skip check", func() {
if m.VeleroCfg.DefaultClusterContext == "" && m.VeleroCfg.StandbyClusterContext == "" {
Skip("Migration test needs 2 clusters")
}
// need to uninstall Velero first in case of the affection of the existing global velero installation
if InstallVelero {
By("Uninstall Velero", func() {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
Expect(VeleroUninstall(ctx, veleroCfg)).To(Succeed())
})
if m.useVolumeSnapshots && m.VeleroCfg.CloudProvider == test.Kind {
Skip(fmt.Sprintf("Volume snapshots not supported on %s", test.Kind))
}
if m.VeleroCfg.SnapshotMoveData && !m.useVolumeSnapshots {
Skip("FSB migration test is not needed in data mover scenario")
}
})
AfterEach(func() {
if CurrentSpecReport().Failed() && veleroCfg.FailFast {
fmt.Println("Test case failed and fail fast is enabled. Skip resource clean up.")
By("Call the base E2E init", func() {
Expect(m.TestCase.Init()).To(Succeed())
})
m.kibishiiData = *kibishii.DefaultKibishiiData
m.kibishiiData.ExpectedNodes = 3
m.CaseBaseName = "migration-" + m.UUIDgen
m.BackupName = m.CaseBaseName + "-backup"
m.RestoreName = m.CaseBaseName + "-restore"
m.NSIncluded = &[]string{m.CaseBaseName}
m.RestoreArgs = []string{
"create", "--namespace", m.VeleroCfg.VeleroNamespace,
"restore", m.RestoreName,
"--from-backup", m.BackupName, "--wait",
}
// Message output by ginkgo
m.TestMsg = &framework.TestMSG{
Desc: "Test migration workload on two clusters",
FailedMSG: "Fail to test migrate between two clusters",
Text: "Test back up on default cluster, restore on standby cluster",
}
// Need to uninstall Velero on the default cluster.
if test.InstallVelero {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
Expect(veleroutil.VeleroUninstall(ctx, m.VeleroCfg)).To(Succeed())
}
return nil
}
func (m *migrationE2E) Backup() error {
OriginVeleroCfg := m.VeleroCfg
var err error
if m.veleroCLI2Version.VeleroCLI == "" {
//Assume tag of velero server image is identical to velero CLI version
//Download velero CLI if it's empty according to velero CLI version
By(
fmt.Sprintf("Install the expected version Velero CLI %s",
m.veleroCLI2Version.VeleroVersion),
func() {
// "self" represents 1.14.x and future versions
if m.veleroCLI2Version.VeleroVersion == "self" {
m.veleroCLI2Version.VeleroCLI = m.VeleroCfg.VeleroCLI
} else {
OriginVeleroCfg, err = veleroutil.SetImagesToDefaultValues(
OriginVeleroCfg,
m.veleroCLI2Version.VeleroVersion,
)
Expect(err).To(Succeed(),
"Fail to set images for the migrate-from Velero installation.")
m.veleroCLI2Version.VeleroCLI, err = veleroutil.InstallVeleroCLI(
m.veleroCLI2Version.VeleroVersion)
Expect(err).To(Succeed())
}
},
)
}
By(fmt.Sprintf("Install Velero on default cluster (%s)", m.VeleroCfg.DefaultClusterContext),
func() {
Expect(k8sutil.KubectlConfigUseContext(
m.Ctx, m.VeleroCfg.DefaultClusterContext)).To(Succeed())
OriginVeleroCfg.MigrateFromVeleroVersion = m.veleroCLI2Version.VeleroVersion
OriginVeleroCfg.VeleroCLI = m.veleroCLI2Version.VeleroCLI
OriginVeleroCfg.ClientToInstallVelero = OriginVeleroCfg.DefaultClient
OriginVeleroCfg.ClusterToInstallVelero = m.VeleroCfg.DefaultClusterName
OriginVeleroCfg.ServiceAccountNameToInstall = m.VeleroCfg.DefaultCLSServiceAccountName
OriginVeleroCfg.UseVolumeSnapshots = m.useVolumeSnapshots
OriginVeleroCfg.UseNodeAgent = !m.useVolumeSnapshots
version, err := veleroutil.GetVeleroVersion(m.Ctx, OriginVeleroCfg.VeleroCLI, true)
Expect(err).To(Succeed(), "Fail to get Velero version")
OriginVeleroCfg.VeleroVersion = version
if OriginVeleroCfg.SnapshotMoveData {
OriginVeleroCfg.UseNodeAgent = true
}
Expect(veleroutil.VeleroInstall(m.Ctx, &OriginVeleroCfg, false)).To(Succeed())
if m.veleroCLI2Version.VeleroVersion != "self" {
Expect(veleroutil.CheckVeleroVersion(
m.Ctx,
OriginVeleroCfg.VeleroCLI,
OriginVeleroCfg.MigrateFromVeleroVersion,
)).To(Succeed())
}
},
)
By("Create namespace for sample workload", func() {
Expect(k8sutil.CreateNamespace(
m.Ctx,
*m.VeleroCfg.DefaultClient,
m.CaseBaseName,
)).To(Succeed(),
fmt.Sprintf("Failed to create namespace %s to install Kibishii workload",
m.CaseBaseName))
})
By("Deploy sample workload of Kibishii", func() {
Expect(kibishii.KibishiiPrepareBeforeBackup(
m.Ctx,
*OriginVeleroCfg.DefaultClient,
OriginVeleroCfg.CloudProvider,
m.CaseBaseName,
OriginVeleroCfg.RegistryCredentialFile,
OriginVeleroCfg.Features,
OriginVeleroCfg.KibishiiDirectory,
OriginVeleroCfg.UseVolumeSnapshots,
&m.kibishiiData,
)).To(Succeed())
})
By(fmt.Sprintf("Backup namespace %s", m.CaseBaseName), func() {
m.BackupArgs = []string{
"create", "--namespace", m.VeleroCfg.VeleroNamespace,
"backup", m.BackupName,
"--include-namespaces", strings.Join(*m.NSIncluded, ","),
"--wait",
}
if m.useVolumeSnapshots {
m.BackupArgs = append(m.BackupArgs, "--snapshot-volumes=true")
} else {
By(fmt.Sprintf("Uninstall Velero on cluster %s", veleroCfg.DefaultClusterContext), func() {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.DefaultClusterContext)).To(Succeed())
veleroCfg.ClientToInstallVelero = veleroCfg.DefaultClient
veleroCfg.ClusterToInstallVelero = veleroCfg.DefaultClusterName
Expect(VeleroUninstall(ctx, veleroCfg)).To(Succeed())
By(fmt.Sprintf("Delete sample workload namespace %s", migrationNamespace), func() {
Expect(
DeleteNamespace(
context.Background(),
*veleroCfg.DefaultClient,
migrationNamespace,
true),
).To(Succeed())
})
})
By(fmt.Sprintf("Uninstall Velero on cluster %s", veleroCfg.StandbyClusterContext), func() {
ctx, ctxCancel := context.WithTimeout(context.Background(), time.Minute*5)
defer ctxCancel()
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.StandbyClusterContext)).To(Succeed())
veleroCfg.ClientToInstallVelero = veleroCfg.StandbyClient
veleroCfg.ClusterToInstallVelero = veleroCfg.StandbyClusterName
By("Delete StorageClasses created by E2E")
Expect(
DeleteStorageClass(
ctx,
*veleroCfg.ClientToInstallVelero,
StorageClassName,
),
).To(Succeed())
Expect(
DeleteStorageClass(
ctx,
*veleroCfg.ClientToInstallVelero,
StorageClassName2,
),
).To(Succeed())
if strings.EqualFold(veleroCfg.Features, FeatureCSI) &&
veleroCfg.UseVolumeSnapshots {
By("Delete VolumeSnapshotClass created by E2E")
Expect(
KubectlDeleteByFile(
ctx,
fmt.Sprintf("../testdata/volume-snapshot-class/%s.yaml", veleroCfg.CloudProvider),
),
).To(Succeed())
}
Expect(VeleroUninstall(ctx, veleroCfg)).To(Succeed())
By(fmt.Sprintf("Delete sample workload namespace %s", migrationNamespace), func() {
Expect(
DeleteNamespace(
context.Background(),
*veleroCfg.StandbyClient,
migrationNamespace,
true,
),
).To(Succeed())
})
})
By(fmt.Sprintf("Switch to default KubeConfig context %s", veleroCfg.DefaultClusterContext), func() {
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.DefaultClusterContext)).To(Succeed())
veleroCfg.ClientToInstallVelero = veleroCfg.DefaultClient
veleroCfg.ClusterToInstallVelero = veleroCfg.DefaultClusterName
})
m.BackupArgs = append(m.BackupArgs, "--default-volumes-to-fs-backup")
}
})
When("kibishii is the sample workload", func() {
It("should be successfully backed up and restored to the default BackupStorageLocation", func() {
var backupNames []string
if veleroCfg.SnapshotMoveData {
if !useVolumeSnapshots {
Skip("FSB migration test is not needed in data mover scenario")
}
}
oneHourTimeout, ctxCancel := context.WithTimeout(context.Background(), time.Minute*60)
defer ctxCancel()
flag.Parse()
UUIDgen, err = uuid.NewRandom()
Expect(err).To(Succeed())
supportUploaderType, err := IsSupportUploaderType(veleroCLI2Version.VeleroVersion)
Expect(err).To(Succeed())
OriginVeleroCfg := veleroCfg
if veleroCLI2Version.VeleroCLI == "" {
//Assume tag of velero server image is identical to velero CLI version
//Download velero CLI if it's empty according to velero CLI version
By(fmt.Sprintf("Install the expected version Velero CLI (%s) for installing Velero",
veleroCLI2Version.VeleroVersion), func() {
//"self" represents 1.14.x and future versions
if veleroCLI2Version.VeleroVersion == "self" {
veleroCLI2Version.VeleroCLI = veleroCfg.VeleroCLI
} else {
OriginVeleroCfg, err = SetImagesToDefaultValues(
OriginVeleroCfg,
veleroCLI2Version.VeleroVersion,
)
Expect(err).To(Succeed(), "Fail to set images for the migrate-from Velero installation.")
if OriginVeleroCfg.SnapshotMoveData {
m.BackupArgs = append(m.BackupArgs, "--snapshot-move-data")
}
veleroCLI2Version.VeleroCLI, err = InstallVeleroCLI(veleroCLI2Version.VeleroVersion)
Expect(err).To(Succeed())
}
})
}
By(fmt.Sprintf("Install Velero in cluster-A (%s) to backup workload", veleroCfg.DefaultClusterContext), func() {
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.DefaultClusterContext)).To(Succeed())
OriginVeleroCfg.MigrateFromVeleroVersion = veleroCLI2Version.VeleroVersion
OriginVeleroCfg.VeleroCLI = veleroCLI2Version.VeleroCLI
OriginVeleroCfg.ClientToInstallVelero = OriginVeleroCfg.DefaultClient
OriginVeleroCfg.ClusterToInstallVelero = veleroCfg.DefaultClusterName
OriginVeleroCfg.ServiceAccountNameToInstall = veleroCfg.DefaultCLSServiceAccountName
OriginVeleroCfg.UseVolumeSnapshots = useVolumeSnapshots
OriginVeleroCfg.UseNodeAgent = !useVolumeSnapshots
version, err := GetVeleroVersion(oneHourTimeout, OriginVeleroCfg.VeleroCLI, true)
Expect(err).To(Succeed(), "Fail to get Velero version")
OriginVeleroCfg.VeleroVersion = version
if OriginVeleroCfg.SnapshotMoveData {
OriginVeleroCfg.UseNodeAgent = true
}
Expect(VeleroInstall(context.Background(), &OriginVeleroCfg, false)).To(Succeed())
if veleroCLI2Version.VeleroVersion != "self" {
Expect(CheckVeleroVersion(context.Background(), OriginVeleroCfg.VeleroCLI,
OriginVeleroCfg.MigrateFromVeleroVersion)).To(Succeed())
}
})
backupName = "backup-" + UUIDgen.String()
backupScName = backupName + "-sc"
restoreName = "restore-" + UUIDgen.String()
restoreScName = restoreName + "-sc"
By("Create namespace for sample workload", func() {
Expect(CreateNamespace(oneHourTimeout, *veleroCfg.DefaultClient, migrationNamespace)).To(Succeed(),
fmt.Sprintf("Failed to create namespace %s to install Kibishii workload", migrationNamespace))
})
KibishiiData := *DefaultKibishiiData
By("Deploy sample workload of Kibishii", func() {
KibishiiData.ExpectedNodes = kibishiiWorkerCount
Expect(KibishiiPrepareBeforeBackup(oneHourTimeout, *veleroCfg.DefaultClient, veleroCfg.CloudProvider,
migrationNamespace, veleroCfg.RegistryCredentialFile, veleroCfg.Features,
veleroCfg.KibishiiDirectory, useVolumeSnapshots, &KibishiiData)).To(Succeed())
})
By(fmt.Sprintf("Backup namespace %s", migrationNamespace), func() {
var BackupStorageClassCfg BackupConfig
BackupStorageClassCfg.BackupName = backupScName
BackupStorageClassCfg.IncludeResources = "StorageClass"
BackupStorageClassCfg.IncludeClusterResources = true
//TODO Remove UseRestic parameter once minor version is 1.10 or upper
BackupStorageClassCfg.UseResticIfFSBackup = !supportUploaderType
Expect(VeleroBackupNamespace(context.Background(), OriginVeleroCfg.VeleroCLI,
OriginVeleroCfg.VeleroNamespace, BackupStorageClassCfg)).To(Succeed(), func() string {
RunDebug(context.Background(), veleroCfg.VeleroCLI, veleroCfg.VeleroNamespace, BackupStorageClassCfg.BackupName, "")
return "Fail to backup workload"
})
backupNames = append(backupNames, BackupStorageClassCfg.BackupName)
var BackupCfg BackupConfig
BackupCfg.BackupName = backupName
BackupCfg.Namespace = migrationNamespace
BackupCfg.UseVolumeSnapshots = useVolumeSnapshots
BackupCfg.BackupLocation = ""
BackupCfg.Selector = ""
BackupCfg.DefaultVolumesToFsBackup = !useVolumeSnapshots
//TODO Remove UseRestic parameter once minor version is 1.10 or upper
BackupCfg.UseResticIfFSBackup = !supportUploaderType
BackupCfg.SnapshotMoveData = OriginVeleroCfg.SnapshotMoveData
Expect(VeleroBackupNamespace(context.Background(), OriginVeleroCfg.VeleroCLI,
OriginVeleroCfg.VeleroNamespace, BackupCfg)).To(Succeed(), func() string {
RunDebug(context.Background(), OriginVeleroCfg.VeleroCLI, OriginVeleroCfg.VeleroNamespace, BackupCfg.BackupName, "")
return "Fail to backup workload"
})
backupNames = append(backupNames, BackupCfg.BackupName)
})
if useVolumeSnapshots {
// Only wait for the snapshots.backupdriver.cnsdp.vmware.com
// when the vSphere plugin is used.
if veleroCfg.HasVspherePlugin {
By("Waiting for vSphere uploads to complete", func() {
Expect(WaitForVSphereUploadCompletion(context.Background(), time.Hour,
migrationNamespace, kibishiiWorkerCount)).To(Succeed())
})
}
var snapshotCheckPoint SnapshotCheckPoint
snapshotCheckPoint.NamespaceBackedUp = migrationNamespace
if OriginVeleroCfg.SnapshotMoveData {
//VolumeSnapshotContent should be deleted after data movement
_, err := util.CheckVolumeSnapshotCR(*veleroCfg.DefaultClient, map[string]string{"namespace": migrationNamespace}, 0)
Expect(err).NotTo(HaveOccurred(), "VSC count is not as expected 0")
} else {
// the snapshots of AWS may be still in pending status when do the restore, wait for a while
// to avoid this https://github.com/vmware-tanzu/velero/issues/1799
// TODO remove this after https://github.com/vmware-tanzu/velero/issues/3533 is fixed
if veleroCfg.CloudProvider == Azure && strings.EqualFold(veleroCfg.Features, FeatureCSI) || veleroCfg.CloudProvider == AWS {
By("Sleep 5 minutes to avoid snapshot recreated by unknown reason ", func() {
time.Sleep(5 * time.Minute)
})
}
By("Snapshot should be created in cloud object store with retain policy", func() {
snapshotCheckPoint, err = GetSnapshotCheckPoint(*veleroCfg.DefaultClient, veleroCfg, kibishiiWorkerCount,
migrationNamespace, backupName, GetKibishiiPVCNameList(kibishiiWorkerCount))
Expect(err).NotTo(HaveOccurred(), "Fail to get snapshot checkpoint")
Expect(CheckSnapshotsInProvider(
veleroCfg,
backupName,
snapshotCheckPoint,
false,
)).To(Succeed())
})
}
}
By(fmt.Sprintf("Install Velero in cluster-B (%s) to restore workload", veleroCfg.StandbyClusterContext), func() {
//Ensure workload of "migrationNamespace" existed in cluster-A
ns, err := GetNamespace(context.Background(), *veleroCfg.DefaultClient, migrationNamespace)
Expect(ns.Name).To(Equal(migrationNamespace))
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("get namespace in source cluster err: %v", err))
//Ensure cluster-B is the target cluster
Expect(KubectlConfigUseContext(context.Background(), veleroCfg.StandbyClusterContext)).To(Succeed())
_, err = GetNamespace(context.Background(), *veleroCfg.StandbyClient, migrationNamespace)
Expect(err).To(HaveOccurred(), fmt.Sprintf("get namespace in dst cluster successfully, it's not as expected: %s", migrationNamespace))
fmt.Println(err)
Expect(strings.Contains(fmt.Sprint(err), "namespaces \""+migrationNamespace+"\" not found")).Should(BeTrue())
veleroCfg.ClientToInstallVelero = veleroCfg.StandbyClient
veleroCfg.ClusterToInstallVelero = veleroCfg.StandbyClusterName
veleroCfg.ServiceAccountNameToInstall = veleroCfg.StandbyCLSServiceAccountName
veleroCfg.UseNodeAgent = !useVolumeSnapshots
veleroCfg.UseRestic = false
if veleroCfg.SnapshotMoveData {
veleroCfg.UseNodeAgent = true
// For SnapshotMoveData pipelines, we should use standby cluster setting for Velero installation
// In nightly CI, StandbyClusterPlugins is set properly if pipeline is for SnapshotMoveData.
veleroCfg.Plugins = veleroCfg.StandbyClusterPlugins
veleroCfg.ObjectStoreProvider = veleroCfg.StandbyClusterObjectStoreProvider
}
By("Install StorageClass for E2E.")
Expect(InstallStorageClasses(veleroCfg.StandbyClusterCloudProvider)).To(Succeed())
if strings.EqualFold(veleroCfg.Features, FeatureCSI) &&
veleroCfg.UseVolumeSnapshots {
By("Install VolumeSnapshotClass for E2E.")
Expect(
KubectlApplyByFile(
context.Background(),
fmt.Sprintf("../testdata/volume-snapshot-class/%s.yaml", veleroCfg.StandbyClusterCloudProvider),
),
).To(Succeed())
}
Expect(VeleroInstall(context.Background(), &veleroCfg, true)).To(Succeed())
})
By(fmt.Sprintf("Waiting for backups sync to Velero in cluster-B (%s)", veleroCfg.StandbyClusterContext), func() {
Expect(WaitForBackupToBeCreated(context.Background(), backupName, 5*time.Minute, &veleroCfg)).To(Succeed())
Expect(WaitForBackupToBeCreated(context.Background(), backupScName, 5*time.Minute, &veleroCfg)).To(Succeed())
})
By(fmt.Sprintf("Restore %s", migrationNamespace), func() {
if OriginVeleroCfg.SnapshotMoveData {
cmName := "datamover-storage-class-config"
labels := map[string]string{"velero.io/change-storage-class": "RestoreItemAction",
"velero.io/plugin-config": ""}
data := map[string]string{KibishiiStorageClassName: StorageClassName}
By(fmt.Sprintf("Create ConfigMap %s in namespace %s", cmName, veleroCfg.VeleroNamespace), func() {
_, err := CreateConfigMap(veleroCfg.StandbyClient.ClientGo, veleroCfg.VeleroNamespace, cmName, labels, data)
Expect(err).To(Succeed(), fmt.Sprintf("failed to create configmap in the namespace %q", veleroCfg.VeleroNamespace))
})
} else {
Expect(VeleroRestore(context.Background(), veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace, restoreScName, backupScName, "StorageClass")).To(Succeed(), func() string {
RunDebug(context.Background(), veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace, "", restoreName)
return "Fail to restore workload"
})
}
Expect(VeleroRestore(context.Background(), veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace, restoreName, backupName, "")).To(Succeed(), func() string {
RunDebug(context.Background(), veleroCfg.VeleroCLI,
veleroCfg.VeleroNamespace, "", restoreName)
return "Fail to restore workload"
})
})
By(fmt.Sprintf("Verify workload %s after restore ", migrationNamespace), func() {
Expect(KibishiiVerifyAfterRestore(*veleroCfg.StandbyClient, migrationNamespace,
oneHourTimeout, &KibishiiData, "")).To(Succeed(), "Fail to verify workload after restore")
})
// TODO: delete backup created by case self, not all
By("Clean backups after test", func() {
veleroCfg.ClientToInstallVelero = veleroCfg.DefaultClient
Expect(DeleteBackups(context.Background(), backupNames, &veleroCfg)).To(Succeed())
})
Expect(veleroutil.VeleroBackupExec(
m.Ctx,
OriginVeleroCfg.VeleroCLI,
OriginVeleroCfg.VeleroNamespace,
m.BackupName,
m.BackupArgs,
)).To(Succeed(), func() string {
veleroutil.RunDebug(
context.Background(),
OriginVeleroCfg.VeleroCLI,
OriginVeleroCfg.VeleroNamespace,
m.BackupName,
"",
)
return "Failed to backup resources"
})
})
if m.useVolumeSnapshots {
// Only wait for the snapshots.backupdriver.cnsdp.vmware.com
// when the vSphere plugin is used.
if OriginVeleroCfg.HasVspherePlugin {
By("Waiting for vSphere uploads to complete", func() {
Expect(
veleroutil.WaitForVSphereUploadCompletion(
context.Background(),
time.Hour,
m.CaseBaseName,
m.kibishiiData.ExpectedNodes,
),
).To(Succeed())
})
}
var snapshotCheckPoint test.SnapshotCheckPoint
snapshotCheckPoint.NamespaceBackedUp = m.CaseBaseName
if OriginVeleroCfg.SnapshotMoveData {
//VolumeSnapshotContent should be deleted after data movement
_, err := util.CheckVolumeSnapshotCR(
*m.VeleroCfg.DefaultClient,
map[string]string{"namespace": m.CaseBaseName},
0,
)
By("Check the VSC account", func() {
Expect(err).NotTo(HaveOccurred(), "VSC count is not as expected 0")
})
} else {
// the snapshots of AWS may be still in pending status when do the restore.
// wait for a while to avoid this https://github.com/vmware-tanzu/velero/issues/1799
if OriginVeleroCfg.CloudProvider == test.Azure &&
strings.EqualFold(OriginVeleroCfg.Features, test.FeatureCSI) ||
OriginVeleroCfg.CloudProvider == test.AWS {
By("Sleep 5 minutes to avoid snapshot recreated by unknown reason ", func() {
time.Sleep(5 * time.Minute)
})
}
By("Snapshot should be created in cloud object store with retain policy", func() {
snapshotCheckPoint, err = veleroutil.GetSnapshotCheckPoint(
*OriginVeleroCfg.DefaultClient,
OriginVeleroCfg,
m.kibishiiData.ExpectedNodes,
m.CaseBaseName,
m.BackupName,
kibishii.GetKibishiiPVCNameList(m.kibishiiData.ExpectedNodes),
)
Expect(err).NotTo(HaveOccurred(), "Fail to get snapshot checkpoint")
Expect(providers.CheckSnapshotsInProvider(
OriginVeleroCfg,
m.BackupName,
snapshotCheckPoint,
false,
)).To(Succeed())
})
}
}
return nil
}
func (m *migrationE2E) Restore() error {
StandbyVeleroCfg := m.VeleroCfg
By("Install Velero in standby cluster.", func() {
// Ensure cluster-B is the target cluster
Expect(k8sutil.KubectlConfigUseContext(
m.Ctx, m.VeleroCfg.StandbyClusterContext)).To(Succeed())
// Check the workload namespace not exist in standby cluster.
_, err := k8sutil.GetNamespace(
m.Ctx, *m.VeleroCfg.StandbyClient, m.CaseBaseName)
Expect(err).To(HaveOccurred(), fmt.Sprintf(
"get namespace in dst cluster successfully, it's not as expected: %s", m.CaseBaseName))
Expect(strings.Contains(fmt.Sprint(err), "namespaces \""+m.CaseBaseName+"\" not found")).
Should(BeTrue())
By("Install StorageClass for E2E.")
Expect(veleroutil.InstallStorageClasses(
m.VeleroCfg.StandbyClusterCloudProvider)).To(Succeed())
if strings.EqualFold(m.VeleroCfg.Features, test.FeatureCSI) &&
m.VeleroCfg.UseVolumeSnapshots {
By("Install VolumeSnapshotClass for E2E.")
Expect(
k8sutil.KubectlApplyByFile(
m.Ctx,
fmt.Sprintf("../testdata/volume-snapshot-class/%s.yaml",
m.VeleroCfg.StandbyClusterCloudProvider),
),
).To(Succeed())
}
StandbyVeleroCfg.ClientToInstallVelero = m.VeleroCfg.StandbyClient
StandbyVeleroCfg.ClusterToInstallVelero = m.VeleroCfg.StandbyClusterName
StandbyVeleroCfg.ServiceAccountNameToInstall = m.VeleroCfg.StandbyCLSServiceAccountName
StandbyVeleroCfg.UseNodeAgent = !m.useVolumeSnapshots
if StandbyVeleroCfg.SnapshotMoveData {
StandbyVeleroCfg.UseNodeAgent = true
// For SnapshotMoveData pipelines, we should use standby cluster setting
// for Velero installation.
// In nightly CI, StandbyClusterPlugins is set properly
// if pipeline is for SnapshotMoveData.
StandbyVeleroCfg.Plugins = m.VeleroCfg.StandbyClusterPlugins
StandbyVeleroCfg.ObjectStoreProvider = m.VeleroCfg.StandbyClusterObjectStoreProvider
}
Expect(veleroutil.VeleroInstall(
context.Background(), &StandbyVeleroCfg, true)).To(Succeed())
})
By("Waiting for backups sync to Velero in standby cluster", func() {
Expect(veleroutil.WaitForBackupToBeCreated(
m.Ctx, m.BackupName, 5*time.Minute, &StandbyVeleroCfg)).To(Succeed())
})
By(fmt.Sprintf("Restore %s", m.CaseBaseName), func() {
if m.VeleroCfg.SnapshotMoveData {
cmName := "datamover-storage-class-config"
labels := map[string]string{"velero.io/change-storage-class": "RestoreItemAction",
"velero.io/plugin-config": ""}
data := map[string]string{kibishii.KibishiiStorageClassName: test.StorageClassName}
By(fmt.Sprintf("Create ConfigMap %s in namespace %s",
cmName, StandbyVeleroCfg.VeleroNamespace), func() {
_, err := k8sutil.CreateConfigMap(
StandbyVeleroCfg.StandbyClient.ClientGo,
StandbyVeleroCfg.VeleroNamespace,
cmName,
labels,
data,
)
Expect(err).To(Succeed(), fmt.Sprintf(
"failed to create ConfigMap in the namespace %q",
StandbyVeleroCfg.VeleroNamespace))
})
}
Expect(veleroutil.VeleroRestore(
m.Ctx,
StandbyVeleroCfg.VeleroCLI,
StandbyVeleroCfg.VeleroNamespace,
m.RestoreName,
m.BackupName,
"",
)).To(Succeed(), func() string {
veleroutil.RunDebug(
m.Ctx, StandbyVeleroCfg.VeleroCLI,
StandbyVeleroCfg.VeleroNamespace, "", m.RestoreName)
return "Fail to restore workload"
})
})
return nil
}
func (m *migrationE2E) Verify() error {
By(fmt.Sprintf("Verify workload %s after restore on standby cluster", m.CaseBaseName), func() {
Expect(kibishii.KibishiiVerifyAfterRestore(
*m.VeleroCfg.StandbyClient,
m.CaseBaseName,
m.Ctx,
&m.kibishiiData,
"",
)).To(Succeed(), "Fail to verify workload after restore")
})
return nil
}
func (m *migrationE2E) Clean() error {
By("Clean resource on default cluster.", func() {
Expect(m.TestCase.Clean()).To(Succeed())
})
By("Clean resource on standby cluster.", func() {
Expect(k8sutil.KubectlConfigUseContext(
m.Ctx, m.VeleroCfg.StandbyClusterContext)).To(Succeed())
m.VeleroCfg.ClientToInstallVelero = m.VeleroCfg.StandbyClient
m.VeleroCfg.ClusterToInstallVelero = m.VeleroCfg.StandbyClusterName
By("Delete StorageClasses created by E2E")
Expect(
k8sutil.DeleteStorageClass(
m.Ctx,
*m.VeleroCfg.ClientToInstallVelero,
test.StorageClassName,
),
).To(Succeed())
Expect(
k8sutil.DeleteStorageClass(
m.Ctx,
*m.VeleroCfg.ClientToInstallVelero,
test.StorageClassName2,
),
).To(Succeed())
if strings.EqualFold(m.VeleroCfg.Features, test.FeatureCSI) &&
m.VeleroCfg.UseVolumeSnapshots {
By("Delete VolumeSnapshotClass created by E2E")
Expect(
k8sutil.KubectlDeleteByFile(
m.Ctx,
fmt.Sprintf("../testdata/volume-snapshot-class/%s.yaml",
m.VeleroCfg.StandbyClusterCloudProvider),
),
).To(Succeed())
}
Expect(veleroutil.VeleroUninstall(m.Ctx, m.VeleroCfg)).To(Succeed())
Expect(
k8sutil.DeleteNamespace(
m.Ctx,
*m.VeleroCfg.StandbyClient,
m.CaseBaseName,
true,
),
).To(Succeed())
})
By("Switch to default KubeConfig context", func() {
Expect(k8sutil.KubectlConfigUseContext(
m.Ctx,
m.VeleroCfg.DefaultClusterContext,
)).To(Succeed())
})
return nil
}

View File

@ -91,8 +91,9 @@ func TestFuncWithMultiIt(tests []VeleroBackupRestoreTest) func() {
}
func TestIt(test VeleroBackupRestoreTest) error {
test.Init()
It(test.GetTestMsg().Text, func() {
It("Run E2E test case", func() {
Expect(test.Init()).To(Succeed())
Expect(RunTestCase(test)).To(Succeed(), test.GetTestMsg().FailedMSG)
})
return nil
@ -213,6 +214,7 @@ func RunTestCase(test VeleroBackupRestoreTest) error {
if test == nil {
return errors.New("No case should be tested")
}
fmt.Println("Running case: ", test.GetTestMsg().Text)
test.Start()
defer test.GetTestCase().CtxCancel()

View File

@ -138,13 +138,7 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC
tmpCfgForOldVeleroInstall.VeleroVersion = version
tmpCfgForOldVeleroInstall.UseVolumeSnapshots = useVolumeSnapshots
if supportUploaderType {
tmpCfgForOldVeleroInstall.UseRestic = false
tmpCfgForOldVeleroInstall.UseNodeAgent = !useVolumeSnapshots
} else {
tmpCfgForOldVeleroInstall.UseRestic = !useVolumeSnapshots
tmpCfgForOldVeleroInstall.UseNodeAgent = false
}
tmpCfgForOldVeleroInstall.UseNodeAgent = !useVolumeSnapshots
Expect(VeleroInstall(context.Background(), &tmpCfgForOldVeleroInstall, false)).To(Succeed())
Expect(CheckVeleroVersion(context.Background(), tmpCfgForOldVeleroInstall.VeleroCLI,
@ -230,7 +224,6 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC
By(fmt.Sprintf("Upgrade Velero by CLI %s", tmpCfg.VeleroCLI), func() {
tmpCfg.GCFrequency = ""
tmpCfg.UseRestic = false
tmpCfg.UseNodeAgent = !useVolumeSnapshots
Expect(err).To(Succeed())
if supportUploaderType {

View File

@ -113,8 +113,8 @@ func GetVolumeSnapshotContentNameByPod(client TestClient, podName, namespace, ba
if len(pvList) != 1 {
return "", errors.New(fmt.Sprintf("Only 1 PV of PVC %s pod %s should be found under namespace %s", pvcList[0], podName, namespace))
}
pv_value, err := GetPersistentVolume(context.Background(), client, "", pvList[0])
fmt.Println(pv_value.Annotations["pv.kubernetes.io/provisioned-by"])
pvValue, err := GetPersistentVolume(context.Background(), client, "", pvList[0])
fmt.Println(pvValue.Annotations["pv.kubernetes.io/provisioned-by"])
if err != nil {
return "", err
}
@ -148,14 +148,10 @@ func CheckVolumeSnapshotCR(client TestClient, index map[string]string, expectedC
if len(apiVersion) == 0 {
return nil, errors.New("Fail to get APIVersion")
}
// if apiVersion[0] == "v1beta1" {
// if snapshotContentNameList, err = GetCsiSnapshotHandle(client, apiVersion[0], index); err != nil {
// return nil, errors.Wrap(err, "Fail to get Azure CSI snapshot content")
// }
// } else
if apiVersion[0] == "v1" {
if snapshotContentNameList, err = GetCsiSnapshotHandle(client, apiVersion[0], index); err != nil {
return nil, errors.Wrap(err, "Fail to get Azure CSI snapshot content")
return nil, errors.Wrap(err, "Fail to get CSI snapshot content")
}
} else {
return nil, errors.New("API version is invalid")

View File

@ -271,9 +271,6 @@ func installVeleroServer(ctx context.Context, cli, cloudProvider string, options
if len(options.Image) > 0 {
args = append(args, "--image", options.Image)
}
if options.UseRestic {
args = append(args, "--use-restic")
}
if options.UseNodeAgent {
args = append(args, "--use-node-agent")
}

View File

@ -255,10 +255,7 @@ func getProviderVeleroInstallOptions(veleroCfg *VeleroConfig,
io.DefaultVolumesToFsBackup = veleroCfg.DefaultVolumesToFsBackup
io.UseVolumeSnapshots = veleroCfg.UseVolumeSnapshots
if !veleroCfg.UseRestic {
io.UseNodeAgent = veleroCfg.UseNodeAgent
}
io.UseRestic = veleroCfg.UseRestic
io.UseNodeAgent = veleroCfg.UseNodeAgent
io.Image = veleroCfg.VeleroImage
io.Namespace = veleroCfg.VeleroNamespace
io.UploaderType = veleroCfg.UploaderType