Merge pull request #8952 from Lyndon-Li/dm-controller-refactor-for-cancel

DM controller refactor for cancel
pull/8968/head
lyndon-li 2025-05-28 15:09:33 +08:00 committed by GitHub
commit 7db3eeac58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 1094 additions and 890 deletions

View File

@ -0,0 +1 @@
Fix issue #8534, refactor dm controllers to tolerate cancel request in more cases, e.g., node restart, node drain

View File

@ -165,3 +165,15 @@ func (d *DataDownloadBuilder) AcceptedTimestamp(acceptedTimestamp *metav1.Time)
d.object.Status.AcceptedTimestamp = acceptedTimestamp
return d
}
// Finalizers sets the DataDownload's Finalizers.
func (d *DataDownloadBuilder) Finalizers(finalizers []string) *DataDownloadBuilder {
d.object.Finalizers = finalizers
return d
}
// Message sets the DataDownload's Message.
func (d *DataDownloadBuilder) Message(msg string) *DataDownloadBuilder {
d.object.Status.Message = msg
return d
}

View File

@ -168,3 +168,15 @@ func (d *DataUploadBuilder) AcceptedTimestamp(acceptedTimestamp *metav1.Time) *D
d.object.Status.AcceptedTimestamp = acceptedTimestamp
return d
}
// Finalizers sets the DataUpload's Finalizers.
func (d *DataUploadBuilder) Finalizers(finalizers []string) *DataUploadBuilder {
d.object.Finalizers = finalizers
return d
}
// Message sets the DataUpload's Message.
func (d *DataUploadBuilder) Message(msg string) *DataUploadBuilder {
d.object.Status.Message = msg
return d
}

View File

@ -68,6 +68,7 @@ type DataDownloadReconciler struct {
podResources corev1api.ResourceRequirements
preparingTimeout time.Duration
metrics *metrics.ServerMetrics
cancelledDataDownload map[string]time.Time
}
func NewDataDownloadReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager,
@ -86,6 +87,7 @@ func NewDataDownloadReconciler(client client.Client, mgr manager.Manager, kubeCl
podResources: podResources,
preparingTimeout: preparingTimeout,
metrics: metrics,
cancelledDataDownload: make(map[string]time.Time),
}
}
@ -118,26 +120,27 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, nil
}
if r.restoreExposer == nil {
return r.errorOut(ctx, dd, errors.New("uninitialized generic exposer"), "uninitialized exposer", log)
// Logic for clear resources when datadownload been deleted
if !isDataDownloadInFinalState(dd) {
if !controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
if err := UpdateDataDownloadWithRetry(ctx, r.client, req.NamespacedName, log, func(dd *velerov2alpha1api.DataDownload) bool {
if controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
return false
}
// Add finalizer
// Logic for clear resources when datadownload been deleted
if dd.DeletionTimestamp.IsZero() { // add finalizer for all cr at beginning
if !isDataDownloadInFinalState(dd) && !controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
succeeded, err := r.exclusiveUpdateDataDownload(ctx, dd, func(dd *velerov2alpha1api.DataDownload) {
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
})
if err != nil {
log.Errorf("failed to add finalizer with error %s for %s/%s", err.Error(), dd.Namespace, dd.Name)
return true
}); err != nil {
log.WithError(err).Errorf("failed to add finalizer for dd %s/%s", dd.Namespace, dd.Name)
return ctrl.Result{}, err
} else if !succeeded {
log.Warnf("failed to add finalizer for %s/%s and will requeue later", dd.Namespace, dd.Name)
return ctrl.Result{Requeue: true}, nil
}
return ctrl.Result{}, nil
}
} else if controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
if !dd.DeletionTimestamp.IsZero() {
if !dd.Spec.Cancel {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
log.Warnf("Cancel dd under phase %s because it is being deleted", dd.Status.Phase)
@ -152,9 +155,56 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return true
}); err != nil {
log.Errorf("failed to set cancel flag with error %s for %s/%s", err.Error(), dd.Namespace, dd.Name)
log.WithError(err).Errorf("failed to set cancel flag for dd %s/%s", dd.Namespace, dd.Name)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
}
} else {
delete(r.cancelledDataDownload, dd.Name)
// put the finalizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state.
// remove finalizer no matter whether the cr is being deleted or not for it is no longer needed when internal resources are all cleaned up
// also in final status cr won't block the direct delete of the velero namespace
if controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
if err := UpdateDataDownloadWithRetry(ctx, r.client, req.NamespacedName, log, func(dd *velerov2alpha1api.DataDownload) bool {
if !controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
return false
}
controllerutil.RemoveFinalizer(dd, DataUploadDownloadFinalizer)
return true
}); err != nil {
log.WithError(err).Error("error to remove finalizer")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
}
if dd.Spec.Cancel {
if spotted, found := r.cancelledDataDownload[dd.Name]; !found {
r.cancelledDataDownload[dd.Name] = r.Clock.Now()
} else {
delay := cancelDelayOthers
if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
delay = cancelDelayInProgress
}
if time.Since(spotted) > delay {
log.Infof("Data download %s is canceled in Phase %s but not handled in rasonable time", dd.GetName(), dd.Status.Phase)
if r.tryCancelDataDownload(ctx, dd, "") {
delete(r.cancelledDataDownload, dd.Name)
}
return ctrl.Result{}, nil
}
}
}
if dd.Status.Phase == "" || dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseNew {
@ -167,7 +217,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
accepted, err := r.acceptDataDownload(ctx, dd)
if err != nil {
return r.errorOut(ctx, dd, err, "error to accept the data download", log)
return ctrl.Result{}, errors.Wrapf(err, "error accepting the data download %s", dd.Name)
}
if !accepted {
@ -193,44 +243,14 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
// And then only the controller who is in the same node could do the rest work.
err = r.restoreExposer.Expose(ctx, getDataDownloadOwnerObject(dd), exposeParam)
if err != nil {
if err := r.client.Get(ctx, req.NamespacedName, dd); err != nil {
if !apierrors.IsNotFound(err) {
return ctrl.Result{}, errors.Wrap(err, "getting DataUpload")
}
}
if isDataDownloadInFinalState(dd) {
log.Warnf("expose snapshot with err %v but it may caused by clean up resources in cancel action", err)
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd))
return ctrl.Result{}, nil
} else {
return r.errorOut(ctx, dd, err, "error to expose snapshot", log)
}
}
log.Info("Restore is exposed")
// we need to get CR again for it may canceled by datadownload controller on other
// nodes when doing expose action, if detectd cancel action we need to clear up the internal
// resources created by velero during backup.
if err := r.client.Get(ctx, req.NamespacedName, dd); err != nil {
if apierrors.IsNotFound(err) {
log.Debug("Unable to find datadownload")
return ctrl.Result{}, nil
}
return ctrl.Result{}, errors.Wrap(err, "getting datadownload")
}
// we need to clean up resources as resources created in Expose it may later than cancel action or prepare time
// and need to clean up resources again
if isDataDownloadInFinalState(dd) {
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd))
}
return ctrl.Result{}, nil
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseAccepted {
if dd.Spec.Cancel {
log.Debugf("Data download is been canceled %s in Phase %s", dd.GetName(), dd.Status.Phase)
r.tryCancelAcceptedDataDownload(ctx, dd, "")
} else if peekErr := r.restoreExposer.PeekExposed(ctx, getDataDownloadOwnerObject(dd)); peekErr != nil {
r.tryCancelAcceptedDataDownload(ctx, dd, fmt.Sprintf("found a datadownload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
if peekErr := r.restoreExposer.PeekExposed(ctx, getDataDownloadOwnerObject(dd)); peekErr != nil {
r.tryCancelDataDownload(ctx, dd, fmt.Sprintf("found a datadownload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
log.Errorf("Cancel dd %s/%s because of expose error %s", dd.Namespace, dd.Name, peekErr)
} else if dd.Status.AcceptedTimestamp != nil {
if time.Since(dd.Status.AcceptedTimestamp.Time) >= r.preparingTimeout {
@ -240,7 +260,11 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, nil
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhasePrepared {
log.Info("Data download is prepared")
log.Infof("Data download is prepared and should be processed by %s (%s)", dd.Status.Node, r.nodeName)
if dd.Status.Node != r.nodeName {
return ctrl.Result{}, nil
}
if dd.Spec.Cancel {
log.Debugf("Data download is been canceled %s in Phase %s", dd.GetName(), dd.Status.Phase)
@ -259,13 +283,12 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
if err != nil {
return r.errorOut(ctx, dd, err, "restore exposer is not ready", log)
} else if result == nil {
log.Debug("Get empty restore exposer")
return ctrl.Result{}, nil
return r.errorOut(ctx, dd, errors.New("no expose result is available for the current node"), "exposed snapshot is not ready", log)
}
log.Info("Restore PVC is ready and creating data path routine")
// Need to first create file system BR and get data path instance then update data upload status
// Need to first create file system BR and get data path instance then update data download status
callbacks := datapath.Callbacks{
OnCompleted: r.OnDataDownloadCompleted,
OnFailed: r.OnDataDownloadFailed,
@ -292,16 +315,30 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
}
// Update status to InProgress
original := dd.DeepCopy()
terminated := false
if err := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, log, func(dd *velerov2alpha1api.DataDownload) bool {
if isDataDownloadInFinalState(dd) {
terminated = true
return false
}
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseInProgress
dd.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
if err := r.client.Patch(ctx, dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Warnf("Failed to update datadownload %s to InProgress, will close data path and retry", dd.Name)
return true
}); err != nil {
log.WithError(err).Warnf("Failed to update datadownload %s to InProgress, will data path close and retry", dd.Name)
r.closeDataPath(ctx, dd.Name)
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
}
if terminated {
log.Warnf("datadownload %s is terminated during transition from prepared", dd.Name)
r.closeDataPath(ctx, dd.Name)
return ctrl.Result{}, nil
}
log.Info("Data download is marked as in progress")
if err := r.startCancelableDataPath(asyncBR, dd, result, log); err != nil {
@ -313,46 +350,42 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, nil
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
log.Info("Data download is in progress")
if dd.Spec.Cancel {
log.Info("Data download is being canceled")
if dd.Status.Node != r.nodeName {
return ctrl.Result{}, nil
}
log.Info("In progress data download is being canceled")
asyncBR := r.dataPathMgr.GetAsyncBR(dd.Name)
if asyncBR == nil {
if r.nodeName == dd.Status.Node {
r.OnDataDownloadCancelled(ctx, dd.GetNamespace(), dd.GetName())
} else {
log.Info("Data path is not started in this node and will not canceled by current node")
}
return ctrl.Result{}, nil
}
// Update status to Canceling.
original := dd.DeepCopy()
if err := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, log, func(dd *velerov2alpha1api.DataDownload) bool {
if isDataDownloadInFinalState(dd) {
log.Warnf("datadownload %s is terminated, abort setting it to canceling", dd.Name)
return false
}
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseCanceling
if err := r.client.Patch(ctx, dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("error updating data download status")
return true
}); err != nil {
log.WithError(err).Error("error updating data download into canceling status")
return ctrl.Result{}, err
}
asyncBR.Cancel()
return ctrl.Result{}, nil
}
return ctrl.Result{}, nil
} else {
// put the finalizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state
// remove finalizer no matter whether the cr is being deleted or not for it is no longer needed when internal resources are all cleaned up
// also in final status cr won't block the direct delete of the velero namespace
if isDataDownloadInFinalState(dd) && controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
original := dd.DeepCopy()
controllerutil.RemoveFinalizer(dd, DataUploadDownloadFinalizer)
if err := r.client.Patch(ctx, dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("error to remove finalizer")
}
}
return ctrl.Result{}, nil
}
}
func (r *DataDownloadReconciler) initCancelableDataPath(ctx context.Context, asyncBR datapath.AsyncBR, res *exposer.ExposeResult, log logrus.FieldLogger) error {
log.Info("Init cancelable dataDownload")
@ -401,10 +434,16 @@ func (r *DataDownloadReconciler) OnDataDownloadCompleted(ctx context.Context, na
log.Info("Cleaning up exposed environment")
r.restoreExposer.CleanUp(ctx, objRef)
original := dd.DeepCopy()
if err := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, log, func(dd *velerov2alpha1api.DataDownload) bool {
if isDataDownloadInFinalState(dd) {
return false
}
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseCompleted
dd.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
if err := r.client.Patch(ctx, &dd, client.MergeFrom(original)); err != nil {
return true
}); err != nil {
log.WithError(err).Error("error updating data download status")
} else {
log.Infof("Data download is marked as %s", dd.Status.Phase)
@ -437,29 +476,34 @@ func (r *DataDownloadReconciler) OnDataDownloadCancelled(ctx context.Context, na
var dd velerov2alpha1api.DataDownload
if getErr := r.client.Get(ctx, types.NamespacedName{Name: ddName, Namespace: namespace}, &dd); getErr != nil {
log.WithError(getErr).Warn("Failed to get datadownload on cancel")
} else {
return
}
// cleans up any objects generated during the snapshot expose
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(&dd))
original := dd.DeepCopy()
if err := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, log, func(dd *velerov2alpha1api.DataDownload) bool {
if isDataDownloadInFinalState(dd) {
return false
}
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseCanceled
if dd.Status.StartTimestamp.IsZero() {
dd.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
dd.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
if err := r.client.Patch(ctx, &dd, client.MergeFrom(original)); err != nil {
return true
}); err != nil {
log.WithError(err).Error("error updating data download status")
} else {
r.metrics.RegisterDataDownloadCancel(r.nodeName)
}
delete(r.cancelledDataDownload, dd.Name)
}
}
func (r *DataDownloadReconciler) tryCancelAcceptedDataDownload(ctx context.Context, dd *velerov2alpha1api.DataDownload, message string) {
func (r *DataDownloadReconciler) tryCancelDataDownload(ctx context.Context, dd *velerov2alpha1api.DataDownload, message string) bool {
log := r.logger.WithField("datadownload", dd.Name)
log.Warn("Accepted data download is canceled")
succeeded, err := r.exclusiveUpdateDataDownload(ctx, dd, func(dataDownload *velerov2alpha1api.DataDownload) {
succeeded, err := funcExclusiveUpdateDataDownload(ctx, r.client, dd, func(dataDownload *velerov2alpha1api.DataDownload) {
dataDownload.Status.Phase = velerov2alpha1api.DataDownloadPhaseCanceled
if dataDownload.Status.StartTimestamp.IsZero() {
dataDownload.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
@ -473,31 +517,29 @@ func (r *DataDownloadReconciler) tryCancelAcceptedDataDownload(ctx context.Conte
if err != nil {
log.WithError(err).Error("error updating datadownload status")
return
return false
} else if !succeeded {
log.Warn("conflict in updating datadownload status and will try it again later")
return
return false
}
// success update
r.metrics.RegisterDataDownloadCancel(r.nodeName)
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd))
log.Warn("data download is canceled")
return true
}
func (r *DataDownloadReconciler) OnDataDownloadProgress(ctx context.Context, namespace string, ddName string, progress *uploader.Progress) {
log := r.logger.WithField("datadownload", ddName)
var dd velerov2alpha1api.DataDownload
if err := r.client.Get(ctx, types.NamespacedName{Name: ddName, Namespace: namespace}, &dd); err != nil {
log.WithError(err).Warn("Failed to get data download on progress")
return
}
original := dd.DeepCopy()
if err := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: namespace, Name: ddName}, log, func(dd *velerov2alpha1api.DataDownload) bool {
dd.Status.Progress = shared.DataMoveOperationProgress{TotalBytes: progress.TotalBytes, BytesDone: progress.BytesDone}
if err := r.client.Patch(ctx, &dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("Failed to update restore snapshot progress")
return true
}); err != nil {
log.WithError(err).Error("Failed to update progress")
}
}
@ -509,7 +551,19 @@ func (r *DataDownloadReconciler) OnDataDownloadProgress(ctx context.Context, nam
func (r *DataDownloadReconciler) SetupWithManager(mgr ctrl.Manager) error {
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
dd := object.(*velerov2alpha1api.DataDownload)
return (dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseAccepted)
if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseAccepted {
return true
}
if dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
return true
}
if isDataDownloadInFinalState(dd) && !dd.DeletionTimestamp.IsZero() {
return true
}
return false
})
s := kube.NewPeriodicalEnqueueSource(r.logger.WithField("controller", constant.ControllerDataDownload), r.client, &velerov2alpha1api.DataDownloadList{}, preparingMonitorFrequency, kube.PeriodicalEnqueueSourceOption{
Predicates: []predicate.Predicate{gp},
@ -568,10 +622,17 @@ func (r *DataDownloadReconciler) findSnapshotRestoreForPod(ctx context.Context,
if pod.Status.Phase == corev1api.PodRunning {
log.Info("Preparing data download")
// we don't expect anyone else update the CR during the Prepare process
updated, err := r.exclusiveUpdateDataDownload(context.Background(), dd, r.prepareDataDownload)
if err != nil || !updated {
log.WithField("updated", updated).WithError(err).Warn("failed to update datadownload, prepare will halt for this datadownload")
if err = UpdateDataDownloadWithRetry(context.Background(), r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, log,
func(dd *velerov2alpha1api.DataDownload) bool {
if isDataDownloadInFinalState(dd) {
log.Warnf("datadownload %s is terminated, abort setting it to prepared", dd.Name)
return false
}
r.prepareDataDownload(dd)
return true
}); err != nil {
log.WithError(err).Warn("failed to update dataudownload, prepare will halt for this dataudownload")
return []reconcile.Request{}
}
} else if unrecoverable, reason := kube.IsPodUnrecoverable(pod, log); unrecoverable {
@ -618,13 +679,19 @@ func (r *DataDownloadReconciler) errorOut(ctx context.Context, dd *velerov2alpha
}
func (r *DataDownloadReconciler) updateStatusToFailed(ctx context.Context, dd *velerov2alpha1api.DataDownload, err error, msg string, log logrus.FieldLogger) error {
log.Infof("update data download status to %v", dd.Status.Phase)
original := dd.DeepCopy()
log.Info("update data download status to Failed")
if patchErr := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, log, func(dd *velerov2alpha1api.DataDownload) bool {
if isDataDownloadInFinalState(dd) {
return false
}
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseFailed
dd.Status.Message = errors.WithMessage(err, msg).Error()
dd.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
if patchErr := r.client.Patch(ctx, dd, client.MergeFrom(original)); patchErr != nil {
return true
}); patchErr != nil {
log.WithError(patchErr).Error("error updating DataDownload status")
} else {
r.metrics.RegisterDataDownloadFailure(r.nodeName)
@ -647,7 +714,7 @@ func (r *DataDownloadReconciler) acceptDataDownload(ctx context.Context, dd *vel
datadownload.Status.AcceptedTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
succeeded, err := r.exclusiveUpdateDataDownload(ctx, updated, updateFunc)
succeeded, err := funcExclusiveUpdateDataDownload(ctx, r.client, updated, updateFunc)
if err != nil {
return false, err
@ -667,7 +734,7 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler
log := r.logger.WithField("DataDownload", dd.Name)
log.Info("Timeout happened for preparing datadownload")
succeeded, err := r.exclusiveUpdateDataDownload(ctx, dd, func(dd *velerov2alpha1api.DataDownload) {
succeeded, err := funcExclusiveUpdateDataDownload(ctx, r.client, dd, func(dd *velerov2alpha1api.DataDownload) {
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseFailed
dd.Status.Message = "timeout on preparing data download"
})
@ -678,7 +745,7 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler
}
if !succeeded {
log.Warn("Dataupload has been updated by others")
log.Warn("Datadownload has been updated by others")
return
}
@ -689,16 +756,18 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd))
log.Info("Dataupload has been cleaned up")
log.Info("Datadownload has been cleaned up")
r.metrics.RegisterDataDownloadFailure(r.nodeName)
}
func (r *DataDownloadReconciler) exclusiveUpdateDataDownload(ctx context.Context, dd *velerov2alpha1api.DataDownload,
var funcExclusiveUpdateDataDownload = exclusiveUpdateDataDownload
func exclusiveUpdateDataDownload(ctx context.Context, cli client.Client, dd *velerov2alpha1api.DataDownload,
updateFunc func(*velerov2alpha1api.DataDownload)) (bool, error) {
updateFunc(dd)
err := r.client.Update(ctx, dd)
err := cli.Update(ctx, dd)
if err == nil {
return true, nil
@ -805,7 +874,7 @@ func isDataDownloadInFinalState(dd *velerov2alpha1api.DataDownload) bool {
dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseCompleted
}
func UpdateDataDownloadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log *logrus.Entry, updateFunc func(*velerov2alpha1api.DataDownload) bool) error {
func UpdateDataDownloadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log logrus.FieldLogger, updateFunc func(*velerov2alpha1api.DataDownload) bool) error {
return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) {
dd := &velerov2alpha1api.DataDownload{}
if err := client.Get(ctx, namespacedName, dd); err != nil {
@ -839,11 +908,7 @@ func (r *DataDownloadReconciler) AttemptDataDownloadResume(ctx context.Context,
for i := range dataDownloads.Items {
dd := &dataDownloads.Items[i]
if dd.Status.Phase == velerov2alpha1api.DataDownloadPhasePrepared {
// keep doing nothing let controller re-download the data
// the Prepared CR could be still handled by datadownload controller after node-agent restart
logger.WithField("datadownload", dd.GetName()).Debug("find a datadownload with status prepared")
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
if dd.Status.Node != r.nodeName {
logger.WithField("dd", dd.Name).WithField("current node", r.nodeName).Infof("DD should be resumed by another node %s", dd.Status.Node)
continue
@ -870,25 +935,12 @@ func (r *DataDownloadReconciler) AttemptDataDownloadResume(ctx context.Context,
return true
})
if err != nil {
logger.WithError(errors.WithStack(err)).WithError(errors.WithStack(err)).Error("Failed to trigger dataupload cancel")
}
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseAccepted {
r.logger.WithField("datadownload", dd.GetName()).Warn("Cancel dd under Accepted phase")
err := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name},
r.logger.WithField("datadownload", dd.Name), func(dataDownload *velerov2alpha1api.DataDownload) bool {
if dataDownload.Spec.Cancel {
return false
}
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = "Datadownload is in Accepted status during the node-agent starting, mark it as cancel"
return true
})
if err != nil {
r.logger.WithField("datadownload", dd.GetName()).WithError(err).Errorf("Failed to trigger dataupload cancel")
logger.WithError(errors.WithStack(err)).WithError(errors.WithStack(err)).Error("Failed to trigger datadownload cancel")
}
} else {
// the Prepared CR could be still handled by datadownload controller after node-agent restart
// the accepted CR may also suvived from node-agent restart as long as the intermediate objects are all done
logger.WithField("datadownload", dd.GetName()).Infof("find a datadownload with status %s", dd.Status.Phase)
}
}
@ -914,7 +966,7 @@ func (r *DataDownloadReconciler) resumeCancellableDataPath(ctx context.Context,
OnProgress: r.OnDataDownloadProgress,
}
asyncBR, err := r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, r.kubeClient, r.mgr, datapath.TaskTypeBackup, dd.Name, dd.Namespace, res.ByPod.HostingPod.Name, res.ByPod.HostingContainer, dd.Name, callbacks, true, log)
asyncBR, err := r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, r.kubeClient, r.mgr, datapath.TaskTypeRestore, dd.Name, dd.Namespace, res.ByPod.HostingPod.Name, res.ByPod.HostingContainer, dd.Name, callbacks, true, log)
if err != nil {
return errors.Wrapf(err, "error to create asyncBR watcher for dd %s", dd.Name)
}

View File

@ -37,7 +37,6 @@ import (
clientgofake "k8s.io/client-go/kubernetes/fake"
ctrl "sigs.k8s.io/controller-runtime"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
@ -68,7 +67,7 @@ func dataDownloadBuilder() *builder.DataDownloadBuilder {
PV: "test-pv",
PVC: "test-pvc",
Namespace: "test-ns",
}).NodeOS(velerov2alpha1api.NodeOS("linux"))
})
}
func initDataDownloadReconciler(objects []runtime.Object, needError ...bool) (*DataDownloadReconciler, error) {
@ -173,16 +172,18 @@ func TestDataDownloadReconcile(t *testing.T) {
tests := []struct {
name string
dd *velerov2alpha1api.DataDownload
notCreateDD bool
targetPVC *corev1api.PersistentVolumeClaim
dataMgr *datapath.Manager
needErrs []bool
needCreateFSBR bool
needDelete bool
sportTime *metav1.Time
isExposeErr bool
isGetExposeErr bool
isGetExposeNil bool
isPeekExposeErr bool
isNilExposer bool
isFSBRInitErr bool
isFSBRRestoreErr bool
notNilExpose bool
notMockCleanUp bool
mockInit bool
@ -191,69 +192,182 @@ func TestDataDownloadReconcile(t *testing.T) {
mockStartErr error
mockCancel bool
mockClose bool
needExclusiveUpdateError error
expected *velerov2alpha1api.DataDownload
expectedStatusMsg string
checkFunc func(du velerov2alpha1api.DataDownload) bool
expectDeleted bool
expectCancelRecord bool
expectedResult *ctrl.Result
expectedErr string
expectDataPath bool
}{
{
name: "dd not found",
dd: dataDownloadBuilder().Result(),
notCreateDD: true,
},
{
name: "dd not created in velero default namespace",
dd: builder.ForDataDownload("test-ns", dataDownloadName).Result(),
},
{
name: "get dd fail",
dd: dataDownloadBuilder().Result(),
needErrs: []bool{true, false, false, false},
expectedErr: "Get error",
},
{
name: "dd is not for built-in dm",
dd: dataDownloadBuilder().DataMover("other").Result(),
},
{
name: "add finalizer to dd",
dd: dataDownloadBuilder().Result(),
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "add finalizer to dd failed",
dd: dataDownloadBuilder().Result(),
needErrs: []bool{false, false, true, false},
expectedErr: "error updating datadownload velero/datadownload-1: Update error",
},
{
name: "dd is under deletion",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
needDelete: true,
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Result(),
},
{
name: "dd is under deletion but cancel failed",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
needErrs: []bool{false, false, true, false},
needDelete: true,
expectedErr: "error updating datadownload velero/datadownload-1: Update error",
},
{
name: "dd is under deletion and in terminal state",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataDownloadPhaseFailed).Result(),
sportTime: &metav1.Time{Time: time.Now()},
needDelete: true,
expectDeleted: true,
},
{
name: "dd is under deletion and in terminal state, but remove finalizer failed",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataDownloadPhaseFailed).Result(),
needErrs: []bool{false, false, true, false},
needDelete: true,
expectedErr: "error updating datadownload velero/datadownload-1: Update error",
},
{
name: "delay cancel negative for others",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
sportTime: &metav1.Time{Time: time.Now()},
expectCancelRecord: true,
},
{
name: "delay cancel negative for inProgress",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Result(),
sportTime: &metav1.Time{Time: time.Now().Add(-time.Minute * 58)},
expectCancelRecord: true,
},
{
name: "delay cancel affirmative for others",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
sportTime: &metav1.Time{Time: time.Now().Add(-time.Minute * 5)},
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Result(),
},
{
name: "delay cancel affirmative for inProgress",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Result(),
sportTime: &metav1.Time{Time: time.Now().Add(-time.Hour)},
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Result(),
},
{
name: "delay cancel failed",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Result(),
needErrs: []bool{false, false, true, false},
sportTime: &metav1.Time{Time: time.Now().Add(-time.Hour)},
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Result(),
expectCancelRecord: true,
},
{
name: "Unknown data download status",
dd: dataDownloadBuilder().Phase("Unknown").Cancel(true).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
dd: dataDownloadBuilder().Phase("Unknown").Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "Cancel data downloand in progress and patch data download error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needErrs: []bool{false, false, false, true},
needCreateFSBR: true,
expectedStatusMsg: "Patch error",
name: "new dd but no target PVC",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectedResult: &ctrl.Result{Requeue: true},
},
{
name: "Cancel data downloand in progress with empty FSBR",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Result(),
name: "new dd but accept failed",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
mockCancel: true,
needExclusiveUpdateError: errors.New("exclusive-update-error"),
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectedErr: "error accepting the data download datadownload-1: exclusive-update-error",
},
{
name: "Cancel data downloand in progress with create FSBR",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Result()
dd.Status.Node = "test-node"
return dd
}(),
name: "dd is cancel on accepted",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needCreateFSBR: true,
mockCancel: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceling).Result(),
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Result(),
},
{
name: "Cancel data downloand in progress without create FSBR",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Result()
dd.Status.Node = "test-node"
return dd
}(),
name: "dd is accepted but setup expose param failed",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).NodeOS("xxx").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needCreateFSBR: false,
mockCancel: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Result(),
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).NodeOS("xxx").Phase(velerov2alpha1api.DataDownloadPhaseFailed).Message("failed to set exposer parameters").Result(),
expectedErr: "no appropriate node to run datadownload velero/datadownload-1: node with OS xxx doesn't exist",
},
{
name: "Cancel data downloand in progress in different node",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Result()
dd.Status.Node = "different-node"
return dd
}(),
name: "dd expose failed",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needCreateFSBR: false,
mockCancel: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Result(),
isExposeErr: true,
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataDownloadPhaseFailed).Message("error to expose snapshot").Result(),
expectedErr: "Error to expose restore exposer",
},
{
name: "dd succeeds for accepted",
dd: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
expected: dataDownloadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
},
{
name: "prepare timeout on accepted",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Finalizers([]string{DataUploadDownloadFinalizer}).AcceptedTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 30)}).Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataDownloadPhaseFailed).Message("timeout on preparing data download").Result(),
},
{
name: "peek error on accepted",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
isPeekExposeErr: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Message("found a datadownload velero/datadownload-1 with expose error: fake-peek-error. mark it as cancel").Result(),
},
{
name: "cancel on prepared",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Cancel(true).Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Result(),
},
{
name: "Failed to get restore expose on prepared",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
isGetExposeErr: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("restore exposer is not ready").Result(),
expectedErr: "Error to get restore exposer",
},
{
name: "Get nil restore expose on prepared",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
isGetExposeNil: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("exposed snapshot is not ready").Result(),
expectedErr: "no expose result is available for the current node",
},
{
name: "Error in data path is concurrent limited",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
dataMgr: datapath.NewManager(0),
notNilExpose: true,
@ -262,157 +376,103 @@ func TestDataDownloadReconcile(t *testing.T) {
},
{
name: "data path init error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
mockInit: true,
mockInitErr: errors.New("fake-data-path-init-error"),
mockClose: true,
notNilExpose: true,
expectedStatusMsg: "error initializing asyncBR: fake-data-path-init-error",
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("error initializing data path").Result(),
expectedErr: "error initializing asyncBR: fake-data-path-init-error",
},
{
name: "Unable to update status to in progress for data download",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needErrs: []bool{false, false, false, true},
needErrs: []bool{false, false, true, false},
mockInit: true,
mockClose: true,
notNilExpose: true,
notMockCleanUp: true,
expectedResult: &ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "data path start error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
mockInit: true,
mockStart: true,
mockStartErr: errors.New("fake-data-path-start-error"),
mockClose: true,
notNilExpose: true,
expectedStatusMsg: "error starting async restore for pod test-name, volume test-pvc: fake-data-path-start-error",
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("error starting data path").Result(),
expectedErr: "error starting async restore for pod test-name, volume test-pvc: fake-data-path-start-error",
},
{
name: "accept DataDownload error",
dd: dataDownloadBuilder().Result(),
name: "Prepare succeeds",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
mockInit: true,
mockStart: true,
notNilExpose: true,
notMockCleanUp: true,
expectDataPath: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "In progress dd is not handled by the current node",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "In progress dd is not set as cancel",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "Cancel data downloand in progress with empty FSBR",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "Cancel data downloand in progress and patch data download error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
needErrs: []bool{false, false, true, false},
expectedStatusMsg: "Update error",
needCreateFSBR: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectedErr: "error updating datadownload velero/datadownload-1: Update error",
expectCancelRecord: true,
expectDataPath: true,
},
{
name: "Not create target pvc",
dd: dataDownloadBuilder().Result(),
},
{
name: "Uninitialized dataDownload",
dd: dataDownloadBuilder().Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
isNilExposer: true,
expectedStatusMsg: "uninitialized generic exposer",
},
{
name: "DataDownload not created in velero default namespace",
dd: builder.ForDataDownload("test-ns", dataDownloadName).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
},
{
name: "Failed to get dataDownload",
dd: builder.ForDataDownload("test-ns", dataDownloadName).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needErrs: []bool{true, false, false, false},
expectedStatusMsg: "Get error",
},
{
name: "Unsupported dataDownload type",
dd: dataDownloadBuilder().DataMover("Unsuppoorted type").Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
},
{
name: "Restore is exposed",
dd: dataDownloadBuilder().NodeOS(velerov2alpha1api.NodeOSLinux).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
},
{
name: "Expected node doesn't exist",
dd: dataDownloadBuilder().NodeOS(velerov2alpha1api.NodeOSWindows).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
expectedStatusMsg: "no appropriate node to run datadownload",
},
{
name: "Get empty restore exposer",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
},
{
name: "Failed to get restore exposer",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
expectedStatusMsg: "Error to get restore exposer",
isGetExposeErr: true,
},
{
name: "Error to start restore expose",
dd: dataDownloadBuilder().Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
expectedStatusMsg: "Error to expose restore exposer",
isExposeErr: true,
},
{
name: "prepare timeout",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).AcceptedTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Result(),
},
{
name: "peek error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
isPeekExposeErr: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceled).Result(),
},
{
name: "dataDownload with enabled cancel",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result()
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
checkFunc: func(du velerov2alpha1api.DataDownload) bool {
return du.Spec.Cancel
},
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
},
{
name: "dataDownload with remove finalizer and should not be retrieved",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Cancel(true).Result()
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
checkFunc: func(dd velerov2alpha1api.DataDownload) bool {
return !controllerutil.ContainsFinalizer(&dd, DataUploadDownloadFinalizer)
},
name: "Cancel data downloand in progress succeeds",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
needCreateFSBR: true,
mockCancel: true,
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseCanceling).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectDataPath: true,
expectCancelRecord: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
objs := []runtime.Object{daemonSet, node}
if test.targetPVC != nil {
objs = append(objs, test.targetPVC)
}
r, err := initDataDownloadReconciler(objs, test.needErrs...)
require.NoError(t, err)
defer func() {
r.client.Delete(ctx, test.dd, &kbclient.DeleteOptions{})
if test.targetPVC != nil {
r.client.Delete(ctx, test.targetPVC, &kbclient.DeleteOptions{})
}
}()
ctx := context.Background()
if test.dd.Namespace == velerov1api.DefaultNamespace {
err = r.client.Create(ctx, test.dd)
if !test.notCreateDD {
err = r.client.Create(context.Background(), test.dd)
require.NoError(t, err)
}
if test.needDelete {
err = r.client.Delete(context.Background(), test.dd)
require.NoError(t, err)
}
@ -422,6 +482,17 @@ func TestDataDownloadReconcile(t *testing.T) {
r.dataPathMgr = datapath.NewManager(1)
}
if test.sportTime != nil {
r.cancelledDataDownload[test.dd.Name] = test.sportTime.Time
}
funcExclusiveUpdateDataDownload = exclusiveUpdateDataDownload
if test.needExclusiveUpdateError != nil {
funcExclusiveUpdateDataDownload = func(context.Context, kbclient.Client, *velerov2alpha1api.DataDownload, func(*velerov2alpha1api.DataDownload)) (bool, error) {
return false, test.needExclusiveUpdateError
}
}
datapath.MicroServiceBRWatcherCreator = func(kbclient.Client, kubernetes.Interface, manager.Manager, string, string,
string, string, string, string, datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
asyncBR := datapathmockes.NewAsyncBR(t)
@ -444,7 +515,7 @@ func TestDataDownloadReconcile(t *testing.T) {
return asyncBR
}
if test.isExposeErr || test.isGetExposeErr || test.isPeekExposeErr || test.isNilExposer || test.notNilExpose {
if test.isExposeErr || test.isGetExposeErr || test.isGetExposeNil || test.isPeekExposeErr || test.isNilExposer || test.notNilExpose {
if test.isNilExposer {
r.restoreExposer = nil
} else {
@ -458,6 +529,8 @@ func TestDataDownloadReconcile(t *testing.T) {
ep.On("GetExposed", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(&exposer.ExposeResult{ByPod: exposer.ExposeByPod{HostingPod: hostingPod, VolumeName: "test-pvc"}}, nil)
} else if test.isGetExposeErr {
ep.On("GetExposed", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("Error to get restore exposer"))
} else if test.isGetExposeNil {
ep.On("GetExposed", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, nil)
} else if test.isPeekExposeErr {
ep.On("PeekExposed", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(errors.New("fake-peek-error"))
}
@ -485,48 +558,47 @@ func TestDataDownloadReconcile(t *testing.T) {
},
})
if test.expectedStatusMsg != "" {
require.ErrorContains(t, err, test.expectedStatusMsg)
if test.expectedErr != "" {
assert.EqualError(t, err, test.expectedErr)
} else {
require.NoError(t, err)
assert.NoError(t, err)
}
require.NotNil(t, actualResult)
if test.expectedResult != nil {
assert.Equal(t, test.expectedResult.Requeue, actualResult.Requeue)
assert.Equal(t, test.expectedResult.RequeueAfter, actualResult.RequeueAfter)
}
if test.expected != nil || test.expectDeleted {
dd := velerov2alpha1api.DataDownload{}
err = r.client.Get(ctx, kbclient.ObjectKey{
Name: test.dd.Name,
Namespace: test.dd.Namespace,
}, &dd)
if test.expected != nil {
if test.expectDeleted {
assert.True(t, apierrors.IsNotFound(err))
} else {
require.NoError(t, err)
assert.Equal(t, test.expected.Status.Phase, dd.Status.Phase)
assert.Contains(t, dd.Status.Message, test.expected.Status.Message)
assert.Equal(t, dd.Finalizers, test.expected.Finalizers)
assert.Equal(t, dd.Spec.Cancel, test.expected.Spec.Cancel)
}
}
if test.isGetExposeErr {
assert.Contains(t, dd.Status.Message, test.expectedStatusMsg)
}
if test.dd.Namespace == velerov1api.DefaultNamespace {
if controllerutil.ContainsFinalizer(test.dd, DataUploadDownloadFinalizer) {
assert.True(t, true, apierrors.IsNotFound(err)) //nolint:testifylint //FIXME
} else {
require.NoError(t, err)
}
} else {
assert.True(t, true, apierrors.IsNotFound(err)) //nolint:testifylint //FIXME
}
if !test.needCreateFSBR {
if !test.expectDataPath {
assert.Nil(t, r.dataPathMgr.GetAsyncBR(test.dd.Name))
} else {
assert.NotNil(t, r.dataPathMgr.GetAsyncBR(test.dd.Name))
}
t.Logf("%s: \n %v \n", test.name, dd)
if test.expectCancelRecord {
assert.Contains(t, r.cancelledDataDownload, test.dd.Name)
} else {
assert.Empty(t, r.cancelledDataDownload)
}
})
}
}
@ -661,7 +733,7 @@ func TestOnDataDownloadProgress(t *testing.T) {
{
name: "failed to patch datadownload",
dd: dataDownloadBuilder().Result(),
needErrs: []bool{false, false, false, true},
needErrs: []bool{false, false, true, false},
},
}
for _, test := range tests {
@ -883,7 +955,7 @@ func TestTryCancelDataDownload(t *testing.T) {
err = r.client.Create(ctx, test.dd)
require.NoError(t, err)
r.tryCancelAcceptedDataDownload(ctx, test.dd, "")
r.tryCancelDataDownload(ctx, test.dd, "")
if test.expectedErr == "" {
assert.NoError(t, err)
@ -1010,33 +1082,12 @@ func TestAttemptDataDownloadResume(t *testing.T) {
expectedError string
}{
{
name: "accepted DataDownload with no dd label",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
cancelledDataDownloads: []string{dataDownloadName},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload in the current node",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).AcceptedByNode("node-1").Result(),
cancelledDataDownloads: []string{dataDownloadName},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but is canceled",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).AcceptedByNode("node-1").Result(),
acceptedDataDownloads: []string{dataDownloadName},
cancelledDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but cancel fail",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).AcceptedByNode("node-1").Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "prepared DataDownload",
name: "Other DataDownload",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
prepareddDataDownloads: []string{dataDownloadName},
},
{
name: "Other DataDownload",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
},
{
name: "InProgress DataDownload, not the current node",

View File

@ -59,6 +59,8 @@ const (
dataUploadDownloadRequestor = "snapshot-data-upload-download"
DataUploadDownloadFinalizer = "velero.io/data-upload-download-finalizer"
preparingMonitorFrequency = time.Minute
cancelDelayInProgress = time.Hour
cancelDelayOthers = time.Minute * 5
)
// DataUploadReconciler reconciles a DataUpload object
@ -77,6 +79,7 @@ type DataUploadReconciler struct {
podResources corev1api.ResourceRequirements
preparingTimeout time.Duration
metrics *metrics.ServerMetrics
cancelledDataUpload map[string]time.Time
}
func NewDataUploadReconciler(
@ -115,6 +118,7 @@ func NewDataUploadReconciler(
podResources: podResources,
preparingTimeout: preparingTimeout,
metrics: metrics,
cancelledDataUpload: make(map[string]time.Time),
}
}
@ -144,27 +148,27 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}
ep, ok := r.snapshotExposerList[du.Spec.SnapshotType]
if !ok {
return r.errorOut(ctx, du, errors.Errorf("%s type of snapshot exposer is not exist", du.Spec.SnapshotType), "not exist type of exposer", log)
}
// Logic for clear resources when dataupload been deleted
if du.DeletionTimestamp.IsZero() { // add finalizer for all cr at beginning
if !isDataUploadInFinalState(du) && !controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) {
succeeded, err := r.exclusiveUpdateDataUpload(ctx, du, func(du *velerov2alpha1api.DataUpload) {
controllerutil.AddFinalizer(du, DataUploadDownloadFinalizer)
})
if !isDataUploadInFinalState(du) {
if !controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) {
if err := UpdateDataUploadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataUpload *velerov2alpha1api.DataUpload) bool {
if controllerutil.ContainsFinalizer(dataUpload, DataUploadDownloadFinalizer) {
return false
}
if err != nil {
log.Errorf("failed to add finalizer with error %s for %s/%s", err.Error(), du.Namespace, du.Name)
controllerutil.AddFinalizer(dataUpload, DataUploadDownloadFinalizer)
return true
}); err != nil {
log.WithError(err).Errorf("failed to add finalizer for du %s/%s", du.Namespace, du.Name)
return ctrl.Result{}, err
} else if !succeeded {
log.Warnf("failed to add finalizer for %s/%s and will requeue later", du.Namespace, du.Name)
return ctrl.Result{Requeue: true}, nil
}
return ctrl.Result{}, nil
}
} else if controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) && !du.Spec.Cancel && !isDataUploadInFinalState(du) {
if !du.DeletionTimestamp.IsZero() {
if !du.Spec.Cancel {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
log.Warnf("Cancel du under phase %s because it is being deleted", du.Status.Phase)
@ -179,9 +183,61 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return true
}); err != nil {
log.Errorf("failed to set cancel flag with error %s for %s/%s", err.Error(), du.Namespace, du.Name)
log.WithError(err).Errorf("failed to set cancel flag for du %s/%s", du.Namespace, du.Name)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
}
} else {
delete(r.cancelledDataUpload, du.Name)
// put the finalizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state.
// remove finalizer no matter whether the cr is being deleted or not for it is no longer needed when internal resources are all cleaned up
// also in final status cr won't block the direct delete of the velero namespace
if controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) {
if err := UpdateDataUploadWithRetry(ctx, r.client, req.NamespacedName, log, func(du *velerov2alpha1api.DataUpload) bool {
if !controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) {
return false
}
controllerutil.RemoveFinalizer(du, DataUploadDownloadFinalizer)
return true
}); err != nil {
log.WithError(err).Error("error to remove finalizer")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
}
if du.Spec.Cancel {
if spotted, found := r.cancelledDataUpload[du.Name]; !found {
r.cancelledDataUpload[du.Name] = r.Clock.Now()
} else {
delay := cancelDelayOthers
if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
delay = cancelDelayInProgress
}
if time.Since(spotted) > delay {
log.Infof("Data upload %s is canceled in Phase %s but not handled in reasonable time", du.GetName(), du.Status.Phase)
if r.tryCancelDataUpload(ctx, du, "") {
delete(r.cancelledDataUpload, du.Name)
}
return ctrl.Result{}, nil
}
}
}
ep, ok := r.snapshotExposerList[du.Spec.SnapshotType]
if !ok {
return r.errorOut(ctx, du, errors.Errorf("%s type of snapshot exposer is not exist", du.Spec.SnapshotType), "not exist type of exposer", log)
}
if du.Status.Phase == "" || du.Status.Phase == velerov2alpha1api.DataUploadPhaseNew {
@ -189,7 +245,7 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
accepted, err := r.acceptDataUpload(ctx, du)
if err != nil {
return r.errorOut(ctx, du, err, "error to accept the data upload", log)
return ctrl.Result{}, errors.Wrapf(err, "error accepting the data upload %s", du.Name)
}
if !accepted {
@ -213,48 +269,15 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// but the pod maybe is not in the same node of the current controller, so we need to return it here.
// And then only the controller who is in the same node could do the rest work.
if err := ep.Expose(ctx, getOwnerObject(du), exposeParam); err != nil {
if err := r.client.Get(ctx, req.NamespacedName, du); err != nil {
if !apierrors.IsNotFound(err) {
return ctrl.Result{}, errors.Wrap(err, "getting DataUpload")
}
}
if isDataUploadInFinalState(du) {
log.Warnf("expose snapshot with err %v but it may caused by clean up resources in cancel action", err)
r.cleanUp(ctx, du, log)
return ctrl.Result{}, nil
} else {
return r.errorOut(ctx, du, err, "error to expose snapshot", log)
}
return r.errorOut(ctx, du, err, "error exposing snapshot", log)
}
log.Info("Snapshot is exposed")
// we need to get CR again for it may canceled by dataupload controller on other
// nodes when doing expose action, if detectd cancel action we need to clear up the internal
// resources created by velero during backup.
if err := r.client.Get(ctx, req.NamespacedName, du); err != nil {
if apierrors.IsNotFound(err) {
log.Debug("Unable to find DataUpload")
return ctrl.Result{}, nil
}
return ctrl.Result{}, errors.Wrap(err, "getting DataUpload")
}
// we need to clean up resources as resources created in Expose it may later than cancel action or prepare time
// and need to clean up resources again
if isDataUploadInFinalState(du) {
r.cleanUp(ctx, du, log)
}
return ctrl.Result{}, nil
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseAccepted {
if du.Spec.Cancel {
// we don't want to update CR into cancel status forcely as it may conflict with CR update in Expose action
// we could retry when the CR requeue in periodcally
log.Debugf("Data upload is been canceled %s in Phase %s", du.GetName(), du.Status.Phase)
r.tryCancelAcceptedDataUpload(ctx, du, "")
} else if peekErr := ep.PeekExposed(ctx, getOwnerObject(du)); peekErr != nil {
r.tryCancelAcceptedDataUpload(ctx, du, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
if peekErr := ep.PeekExposed(ctx, getOwnerObject(du)); peekErr != nil {
r.tryCancelDataUpload(ctx, du, fmt.Sprintf("found a du %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
log.Errorf("Cancel du %s/%s because of expose error %s", du.Namespace, du.Name, peekErr)
} else if du.Status.AcceptedTimestamp != nil {
if time.Since(du.Status.AcceptedTimestamp.Time) >= r.preparingTimeout {
@ -264,9 +287,14 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhasePrepared {
log.Info("Data upload is prepared")
log.Infof("Data upload is prepared and should be processed by %s (%s)", du.Status.Node, r.nodeName)
if du.Status.Node != r.nodeName {
return ctrl.Result{}, nil
}
if du.Spec.Cancel {
log.Info("Prepared data upload is being canceled")
r.OnDataUploadCancelled(ctx, du.GetNamespace(), du.GetName())
return ctrl.Result{}, nil
}
@ -281,8 +309,7 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
if err != nil {
return r.errorOut(ctx, du, err, "exposed snapshot is not ready", log)
} else if res == nil {
log.Debug("Get empty exposer")
return ctrl.Result{}, nil
return r.errorOut(ctx, du, errors.New("no expose result is available for the current node"), "exposed snapshot is not ready", log)
}
if res.ByPod.NodeOS == nil {
@ -318,17 +345,31 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}
// Update status to InProgress
original := du.DeepCopy()
terminated := false
if err := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, log, func(du *velerov2alpha1api.DataUpload) bool {
if isDataUploadInFinalState(du) {
terminated = true
return false
}
du.Status.Phase = velerov2alpha1api.DataUploadPhaseInProgress
du.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
du.Status.NodeOS = velerov2alpha1api.NodeOS(*res.ByPod.NodeOS)
if err := r.client.Patch(ctx, du, client.MergeFrom(original)); err != nil {
return true
}); err != nil {
log.WithError(err).Warnf("Failed to update dataupload %s to InProgress, will data path close and retry", du.Name)
r.closeDataPath(ctx, du.Name)
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
}
if terminated {
log.Warnf("dataupload %s is terminated during transition from prepared", du.Name)
r.closeDataPath(ctx, du.Name)
return ctrl.Result{}, nil
}
log.Info("Data upload is marked as in progress")
if err := r.startCancelableDataPath(asyncBR, du, res, log); err != nil {
@ -340,45 +381,39 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
log.Info("Data upload is in progress")
if du.Spec.Cancel {
log.Info("Data upload is being canceled")
if du.Status.Node != r.nodeName {
return ctrl.Result{}, nil
}
log.Info("In progress data upload is being canceled")
asyncBR := r.dataPathMgr.GetAsyncBR(du.Name)
if asyncBR == nil {
if du.Status.Node == r.nodeName {
r.OnDataUploadCancelled(ctx, du.GetNamespace(), du.GetName())
} else {
log.Info("Data path is not started in this node and will not canceled by current node")
}
return ctrl.Result{}, nil
}
// Update status to Canceling
original := du.DeepCopy()
if err := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, log, func(du *velerov2alpha1api.DataUpload) bool {
if isDataUploadInFinalState(du) {
log.Warnf("dataupload %s is terminated, abort setting it to canceling", du.Name)
return false
}
du.Status.Phase = velerov2alpha1api.DataUploadPhaseCanceling
if err := r.client.Patch(ctx, du, client.MergeFrom(original)); err != nil {
return true
}); err != nil {
log.WithError(err).Error("error updating data upload into canceling status")
return ctrl.Result{}, err
}
asyncBR.Cancel()
return ctrl.Result{}, nil
}
}
return ctrl.Result{}, nil
} else {
// put the finalizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state.
// remove finalizer no matter whether the cr is being deleted or not for it is no longer needed when internal resources are all cleaned up
// also in final status cr won't block the direct delete of the velero namespace
if isDataUploadInFinalState(du) && controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) {
original := du.DeepCopy()
controllerutil.RemoveFinalizer(du, DataUploadDownloadFinalizer)
if err := r.client.Patch(ctx, du, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("error to remove finalizer")
}
}
return ctrl.Result{}, nil
}
}
func (r *DataUploadReconciler) initCancelableDataPath(ctx context.Context, asyncBR datapath.AsyncBR, res *exposer.ExposeResult, log logrus.FieldLogger) error {
@ -433,7 +468,11 @@ func (r *DataUploadReconciler) OnDataUploadCompleted(ctx context.Context, namesp
}
// Update status to Completed with path & snapshot ID.
original := du.DeepCopy()
if err := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, log, func(du *velerov2alpha1api.DataUpload) bool {
if isDataUploadInFinalState(du) {
return false
}
du.Status.Path = result.Backup.Source.ByPath
du.Status.Phase = velerov2alpha1api.DataUploadPhaseCompleted
du.Status.SnapshotID = result.Backup.SnapshotID
@ -442,7 +481,8 @@ func (r *DataUploadReconciler) OnDataUploadCompleted(ctx context.Context, namesp
du.Status.Message = "volume was empty so no data was upload"
}
if err := r.client.Patch(ctx, &du, client.MergeFrom(original)); err != nil {
return true
}); err != nil {
log.WithError(err).Error("error updating DataUpload status")
} else {
log.Info("Data upload completed")
@ -475,27 +515,34 @@ func (r *DataUploadReconciler) OnDataUploadCancelled(ctx context.Context, namesp
du := &velerov2alpha1api.DataUpload{}
if getErr := r.client.Get(ctx, types.NamespacedName{Name: duName, Namespace: namespace}, du); getErr != nil {
log.WithError(getErr).Warn("Failed to get dataupload on cancel")
} else {
return
}
// cleans up any objects generated during the snapshot expose
r.cleanUp(ctx, du, log)
original := du.DeepCopy()
if err := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, log, func(du *velerov2alpha1api.DataUpload) bool {
if isDataUploadInFinalState(du) {
return false
}
du.Status.Phase = velerov2alpha1api.DataUploadPhaseCanceled
if du.Status.StartTimestamp.IsZero() {
du.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
du.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
if err := r.client.Patch(ctx, du, client.MergeFrom(original)); err != nil {
return true
}); err != nil {
log.WithError(err).Error("error updating DataUpload status")
} else {
r.metrics.RegisterDataUploadCancel(r.nodeName)
}
delete(r.cancelledDataUpload, du.Name)
}
}
func (r *DataUploadReconciler) tryCancelAcceptedDataUpload(ctx context.Context, du *velerov2alpha1api.DataUpload, message string) {
func (r *DataUploadReconciler) tryCancelDataUpload(ctx context.Context, du *velerov2alpha1api.DataUpload, message string) bool {
log := r.logger.WithField("dataupload", du.Name)
log.Warn("Accepted data upload is canceled")
succeeded, err := r.exclusiveUpdateDataUpload(ctx, du, func(dataUpload *velerov2alpha1api.DataUpload) {
succeeded, err := funcExclusiveUpdateDataUpload(ctx, r.client, du, func(dataUpload *velerov2alpha1api.DataUpload) {
dataUpload.Status.Phase = velerov2alpha1api.DataUploadPhaseCanceled
if dataUpload.Status.StartTimestamp.IsZero() {
dataUpload.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
@ -509,16 +556,20 @@ func (r *DataUploadReconciler) tryCancelAcceptedDataUpload(ctx context.Context,
if err != nil {
log.WithError(err).Error("error updating dataupload status")
return
return false
} else if !succeeded {
log.Warn("conflict in updating dataupload status and will try it again later")
return
return false
}
// success update
r.metrics.RegisterDataUploadCancel(r.nodeName)
// cleans up any objects generated during the snapshot expose
r.cleanUp(ctx, du, log)
log.Warn("data upload is canceled")
return true
}
func (r *DataUploadReconciler) cleanUp(ctx context.Context, du *velerov2alpha1api.DataUpload, log logrus.FieldLogger) {
@ -538,16 +589,10 @@ func (r *DataUploadReconciler) cleanUp(ctx context.Context, du *velerov2alpha1ap
func (r *DataUploadReconciler) OnDataUploadProgress(ctx context.Context, namespace string, duName string, progress *uploader.Progress) {
log := r.logger.WithField("dataupload", duName)
var du velerov2alpha1api.DataUpload
if err := r.client.Get(ctx, types.NamespacedName{Name: duName, Namespace: namespace}, &du); err != nil {
log.WithError(err).Warn("Failed to get dataupload on progress")
return
}
original := du.DeepCopy()
if err := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: namespace, Name: duName}, log, func(du *velerov2alpha1api.DataUpload) bool {
du.Status.Progress = shared.DataMoveOperationProgress{TotalBytes: progress.TotalBytes, BytesDone: progress.BytesDone}
if err := r.client.Patch(ctx, &du, client.MergeFrom(original)); err != nil {
return true
}); err != nil {
log.WithError(err).Error("Failed to update progress")
}
}
@ -560,7 +605,19 @@ func (r *DataUploadReconciler) OnDataUploadProgress(ctx context.Context, namespa
func (r *DataUploadReconciler) SetupWithManager(mgr ctrl.Manager) error {
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
du := object.(*velerov2alpha1api.DataUpload)
return (du.Status.Phase == velerov2alpha1api.DataUploadPhaseAccepted)
if du.Status.Phase == velerov2alpha1api.DataUploadPhaseAccepted {
return true
}
if du.Spec.Cancel && !isDataUploadInFinalState(du) {
return true
}
if isDataUploadInFinalState(du) && !du.DeletionTimestamp.IsZero() {
return true
}
return false
})
s := kube.NewPeriodicalEnqueueSource(r.logger.WithField("controller", constant.ControllerDataUpload), r.client, &velerov2alpha1api.DataUploadList{}, preparingMonitorFrequency, kube.PeriodicalEnqueueSourceOption{
Predicates: []predicate.Predicate{gp},
@ -621,10 +678,17 @@ func (r *DataUploadReconciler) findDataUploadForPod(ctx context.Context, podObj
if pod.Status.Phase == corev1api.PodRunning {
log.Info("Preparing dataupload")
// we don't expect anyone else update the CR during the Prepare process
updated, err := r.exclusiveUpdateDataUpload(context.Background(), du, r.prepareDataUpload)
if err != nil || !updated {
log.WithField("updated", updated).WithError(err).Warn("failed to update dataupload, prepare will halt for this dataupload")
if err = UpdateDataUploadWithRetry(context.Background(), r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, log,
func(du *velerov2alpha1api.DataUpload) bool {
if isDataUploadInFinalState(du) {
log.Warnf("dataupload %s is terminated, abort setting it to prepared", du.Name)
return false
}
r.prepareDataUpload(du)
return true
}); err != nil {
log.WithError(err).Warn("failed to update dataupload, prepare will halt for this dataupload")
return []reconcile.Request{}
}
} else if unrecoverable, reason := kube.IsPodUnrecoverable(pod, log); unrecoverable { // let the abnormal backup pod failed early
@ -678,7 +742,13 @@ func (r *DataUploadReconciler) errorOut(ctx context.Context, du *velerov2alpha1a
}
func (r *DataUploadReconciler) updateStatusToFailed(ctx context.Context, du *velerov2alpha1api.DataUpload, err error, msg string, log logrus.FieldLogger) error {
original := du.DeepCopy()
log.Info("update data upload status to Failed")
if patchErr := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, log, func(du *velerov2alpha1api.DataUpload) bool {
if isDataUploadInFinalState(du) {
return false
}
du.Status.Phase = velerov2alpha1api.DataUploadPhaseFailed
du.Status.Message = errors.WithMessage(err, msg).Error()
if du.Status.StartTimestamp.IsZero() {
@ -689,7 +759,9 @@ func (r *DataUploadReconciler) updateStatusToFailed(ctx context.Context, du *vel
du.Status.SnapshotID = dataPathError.GetSnapshotID()
}
du.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
if patchErr := r.client.Patch(ctx, du, client.MergeFrom(original)); patchErr != nil {
return true
}); patchErr != nil {
log.WithError(patchErr).Error("error updating DataUpload status")
} else {
r.metrics.RegisterDataUploadFailure(r.nodeName)
@ -711,7 +783,7 @@ func (r *DataUploadReconciler) acceptDataUpload(ctx context.Context, du *velerov
dataUpload.Status.AcceptedTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
succeeded, err := r.exclusiveUpdateDataUpload(ctx, updated, updateFunc)
succeeded, err := funcExclusiveUpdateDataUpload(ctx, r.client, updated, updateFunc)
if err != nil {
return false, err
@ -732,7 +804,7 @@ func (r *DataUploadReconciler) onPrepareTimeout(ctx context.Context, du *velerov
log.Info("Timeout happened for preparing dataupload")
succeeded, err := r.exclusiveUpdateDataUpload(ctx, du, func(du *velerov2alpha1api.DataUpload) {
succeeded, err := funcExclusiveUpdateDataUpload(ctx, r.client, du, func(du *velerov2alpha1api.DataUpload) {
du.Status.Phase = velerov2alpha1api.DataUploadPhaseFailed
du.Status.Message = "timeout on preparing data upload"
})
@ -770,11 +842,13 @@ func (r *DataUploadReconciler) onPrepareTimeout(ctx context.Context, du *velerov
r.metrics.RegisterDataUploadFailure(r.nodeName)
}
func (r *DataUploadReconciler) exclusiveUpdateDataUpload(ctx context.Context, du *velerov2alpha1api.DataUpload,
var funcExclusiveUpdateDataUpload = exclusiveUpdateDataUpload
func exclusiveUpdateDataUpload(ctx context.Context, cli client.Client, du *velerov2alpha1api.DataUpload,
updateFunc func(*velerov2alpha1api.DataUpload)) (bool, error) {
updateFunc(du)
err := r.client.Update(ctx, du)
err := cli.Update(ctx, du)
if err == nil {
return true, nil
}
@ -905,7 +979,7 @@ func isDataUploadInFinalState(du *velerov2alpha1api.DataUpload) bool {
du.Status.Phase == velerov2alpha1api.DataUploadPhaseCompleted
}
func UpdateDataUploadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log *logrus.Entry, updateFunc func(*velerov2alpha1api.DataUpload) bool) error {
func UpdateDataUploadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log logrus.FieldLogger, updateFunc func(*velerov2alpha1api.DataUpload) bool) error {
return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) {
du := &velerov2alpha1api.DataUpload{}
if err := client.Get(ctx, namespacedName, du); err != nil {
@ -939,11 +1013,7 @@ func (r *DataUploadReconciler) AttemptDataUploadResume(ctx context.Context, logg
for i := range dataUploads.Items {
du := &dataUploads.Items[i]
if du.Status.Phase == velerov2alpha1api.DataUploadPhasePrepared {
// keep doing nothing let controller re-download the data
// the Prepared CR could be still handled by dataupload controller after node-agent restart
logger.WithField("dataupload", du.GetName()).Debug("find a dataupload with status prepared")
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
if du.Status.Node != r.nodeName {
logger.WithField("du", du.Name).WithField("current node", r.nodeName).Infof("DU should be resumed by another node %s", du.Status.Node)
continue
@ -972,23 +1042,10 @@ func (r *DataUploadReconciler) AttemptDataUploadResume(ctx context.Context, logg
if err != nil {
logger.WithField("dataupload", du.GetName()).WithError(errors.WithStack(err)).Error("Failed to trigger dataupload cancel")
}
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseAccepted {
r.logger.WithField("dataupload", du.GetName()).Warn("Cancel du under Accepted phase")
err := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, r.logger.WithField("dataupload", du.Name),
func(dataUpload *velerov2alpha1api.DataUpload) bool {
if dataUpload.Spec.Cancel {
return false
}
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = "Dataupload is in Accepted status during the node-agent starting, mark it as cancel"
return true
})
if err != nil {
r.logger.WithField("dataupload", du.GetName()).WithError(errors.WithStack(err)).Error("Failed to trigger dataupload cancel")
}
} else {
// the Prepared CR could be still handled by dataupload controller after node-agent restart
// the accepted CR may also suvived from node-agent restart as long as the intermediate objects are all done
logger.WithField("dataupload", du.GetName()).Infof("find a dataupload with status %s", du.Status.Phase)
}
}

View File

@ -45,7 +45,6 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
@ -270,35 +269,30 @@ type fakeSnapshotExposer struct {
clock clock.WithTickerAndDelayedExecution
ambiguousNodeOS bool
peekErr error
exposeErr error
getErr error
getNil bool
}
func (f *fakeSnapshotExposer) Expose(ctx context.Context, ownerObject corev1api.ObjectReference, param any) error {
du := velerov2alpha1api.DataUpload{}
err := f.kubeClient.Get(ctx, kbclient.ObjectKey{
Name: dataUploadName,
Namespace: velerov1api.DefaultNamespace,
}, &du)
if err != nil {
return err
if f.exposeErr != nil {
return f.exposeErr
}
original := du
du.Status.Phase = velerov2alpha1api.DataUploadPhasePrepared
du.Status.StartTimestamp = &metav1.Time{Time: f.clock.Now()}
f.kubeClient.Patch(ctx, &du, kbclient.MergeFrom(&original))
return nil
}
func (f *fakeSnapshotExposer) GetExposed(ctx context.Context, du corev1api.ObjectReference, tm time.Duration, para any) (*exposer.ExposeResult, error) {
pod := &corev1api.Pod{}
err := f.kubeClient.Get(ctx, kbclient.ObjectKey{
Name: dataUploadName,
Namespace: velerov1api.DefaultNamespace,
}, pod)
if err != nil {
return nil, err
if f.getErr != nil {
return nil, f.getErr
}
if f.getNil {
return nil, nil
}
pod := &corev1api.Pod{}
nodeOS := "linux"
pNodeOS := &nodeOS
if f.ambiguousNodeOS {
@ -348,212 +342,263 @@ func TestReconcile(t *testing.T) {
tests := []struct {
name string
du *velerov2alpha1api.DataUpload
notCreateDU bool
needDelete bool
sportTime *metav1.Time
pod *corev1api.Pod
pvc *corev1api.PersistentVolumeClaim
snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer
dataMgr *datapath.Manager
expectedProcessed bool
needCreateFSBR bool
needExclusiveUpdateError error
expected *velerov2alpha1api.DataUpload
checkFunc func(velerov2alpha1api.DataUpload) bool
expectedRequeue ctrl.Result
expectedErrMsg string
expectDeleted bool
expectCancelRecord bool
needErrs []bool
removeNode bool
ambiguousNodeOS bool
peekErr error
notCreateFSBR bool
exposeErr error
getExposeErr error
getExposeNil bool
fsBRInitErr error
fsBRStartErr error
expectedErr string
expectedResult *ctrl.Result
expectDataPath bool
}{
{
name: "Dataupload is not initialized",
du: builder.ForDataUpload("unknown-ns", "unknown-name").Result(),
expectedRequeue: ctrl.Result{},
name: "du not found",
du: dataUploadBuilder().Result(),
notCreateDU: true,
},
{
name: "Error get Dataupload",
du: builder.ForDataUpload(velerov1api.DefaultNamespace, "unknown-name").Result(),
expectedRequeue: ctrl.Result{},
expectedErrMsg: "getting DataUpload: Get error",
name: "du not created in velero default namespace",
du: builder.ForDataUpload("test-ns", dataUploadName).Result(),
},
{
name: "get du fail",
du: dataUploadBuilder().Result(),
needErrs: []bool{true, false, false, false},
expectedErr: "getting DataUpload: Get error",
},
{
name: "Unsupported data mover type",
du: dataUploadBuilder().DataMover("unknown type").Result(),
expected: dataUploadBuilder().Phase("").Result(),
expectedRequeue: ctrl.Result{},
name: "du is not for built-in dm",
du: dataUploadBuilder().DataMover("other").Result(),
},
{
name: "add finalizer to du",
du: dataUploadBuilder().Result(),
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "add finalizer to du failed",
du: dataUploadBuilder().Result(),
needErrs: []bool{false, false, true, false},
expectedErr: "error updating dataupload with error velero/dataupload-1: Update error",
},
{
name: "du is under deletion",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
needDelete: true,
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Result(),
},
{
name: "du is under deletion but cancel failed",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
needErrs: []bool{false, false, true, false},
needDelete: true,
expectedErr: "error updating dataupload with error velero/dataupload-1: Update error",
},
{
name: "du is under deletion and in terminal state",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
sportTime: &metav1.Time{Time: time.Now()},
needDelete: true,
expectDeleted: true,
},
{
name: "du is under deletion and in terminal state, but remove finalizer failed",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
needErrs: []bool{false, false, true, false},
needDelete: true,
expectedErr: "error updating dataupload with error velero/dataupload-1: Update error",
},
{
name: "delay cancel negative for others",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
sportTime: &metav1.Time{Time: time.Now()},
expectCancelRecord: true,
},
{
name: "delay cancel negative for inProgress",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
sportTime: &metav1.Time{Time: time.Now().Add(-time.Minute * 58)},
expectCancelRecord: true,
},
{
name: "delay cancel affirmative for others",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
sportTime: &metav1.Time{Time: time.Now().Add(-time.Minute * 5)},
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseCanceled).Result(),
},
{
name: "delay cancel affirmative for inProgress",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
sportTime: &metav1.Time{Time: time.Now().Add(-time.Hour)},
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseCanceled).Result(),
},
{
name: "delay cancel failed",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
needErrs: []bool{false, false, true, false},
sportTime: &metav1.Time{Time: time.Now().Add(-time.Hour)},
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectCancelRecord: true,
},
{
name: "Unknown data upload status",
du: dataUploadBuilder().Phase("Unknown").Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "Unknown type of snapshot exposer is not initialized",
du: dataUploadBuilder().SnapshotType("unknown type").Result(),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
expectedRequeue: ctrl.Result{},
expectedErrMsg: "unknown type type of snapshot exposer is not exist",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).SnapshotType("unknown type").Result(),
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
expectedErr: "unknown type type of snapshot exposer is not exist",
},
{
name: "Dataupload should be accepted",
du: dataUploadBuilder().Result(),
pod: builder.ForPod("fake-ns", dataUploadName).Volumes(&corev1api.Volume{Name: "test-pvc"}).Result(),
name: "new du but accept failed",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
needExclusiveUpdateError: errors.New("exclusive-update-error"),
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectedErr: "error accepting the data upload dataupload-1: exclusive-update-error",
},
{
name: "du is cancel on accepted",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Result(),
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseCanceled).Result(),
},
{
name: "du is accepted but setup expose param failed on getting PVC",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseFailed).Message("failed to set exposer parameters").Result(),
expectedErr: "failed to get PVC fake-ns/test-pvc: persistentvolumeclaims \"test-pvc\" not found",
},
{
name: "du expose failed",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).SnapshotType(fakeSnapshotType).Result(),
pvc: builder.ForPersistentVolumeClaim("fake-ns", "test-pvc").Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
expectedRequeue: ctrl.Result{},
exposeErr: errors.New("fake-expose-error"),
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseFailed).Message("error exposing snapshot").Result(),
expectedErr: "fake-expose-error",
},
{
name: "Dataupload should fail to get PVC information",
du: dataUploadBuilder().Result(),
pod: builder.ForPod("fake-ns", dataUploadName).Volumes(&corev1api.Volume{Name: "wrong-pvc"}).Result(),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
expectedRequeue: ctrl.Result{},
expectedErrMsg: "failed to get PVC",
},
{
name: "Dataupload should fail because expected node doesn't exist",
du: dataUploadBuilder().Result(),
pod: builder.ForPod("fake-ns", dataUploadName).Volumes(&corev1api.Volume{Name: "test-pvc"}).Result(),
name: "du succeeds for accepted",
du: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).SnapshotType(fakeSnapshotType).Result(),
pvc: builder.ForPersistentVolumeClaim("fake-ns", "test-pvc").Result(),
removeNode: true,
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
expectedRequeue: ctrl.Result{},
expectedErrMsg: "no appropriate node to run data upload",
expected: dataUploadBuilder().Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
},
{
name: "Dataupload should be prepared",
du: dataUploadBuilder().SnapshotType(fakeSnapshotType).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
expectedRequeue: ctrl.Result{},
name: "prepare timeout on accepted",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Finalizers([]string{DataUploadDownloadFinalizer}).AcceptedTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 30)}).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseFailed).Message("timeout on preparing data upload").Result(),
},
{
name: "Dataupload prepared should be completed",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
name: "peek error on accepted",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
peekErr: errors.New("fake-peak-error"),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceled).Finalizers([]string{DataUploadDownloadFinalizer}).Phase(velerov2alpha1api.DataUploadPhaseCanceled).Message("found a du velero/dataupload-1 with expose error: fake-peak-error. mark it as cancel").Result(),
},
{
name: "cancel on prepared",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Cancel(true).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceled).Finalizers([]string{DataUploadDownloadFinalizer}).Cancel(true).Phase(velerov2alpha1api.DataUploadPhaseCanceled).Result(),
},
{
name: "Failed to get snapshot expose on prepared",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
getExposeErr: errors.New("fake-get-error"),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("exposed snapshot is not ready: fake-get-error").Result(),
expectedErr: "fake-get-error",
},
{
name: "Get nil restore expose on prepared",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
getExposeNil: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("exposed snapshot is not ready").Result(),
expectedErr: "no expose result is available for the current node",
},
{
name: "Dataupload should fail if expose returns ambiguous nodeOS",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
ambiguousNodeOS: true,
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
expectedErrMsg: "unsupported ambiguous node OS",
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectedErr: "unsupported ambiguous node OS",
},
{
name: "Dataupload with not enabled cancel",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(false).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
},
{
name: "Dataupload should be cancel",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(true).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceling).Result(),
expectedRequeue: ctrl.Result{},
},
{
name: "Dataupload should be cancel with match node",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: func() *velerov2alpha1api.DataUpload {
du := dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(true).Result()
du.Status.Node = "test-node"
return du
}(),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceled).Result(),
expectedRequeue: ctrl.Result{},
notCreateFSBR: true,
},
{
name: "Dataupload should not be cancel with mismatch node",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: func() *velerov2alpha1api.DataUpload {
du := dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(true).Result()
du.Status.Node = "different_node"
return du
}(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
notCreateFSBR: true,
},
{
name: "runCancelableDataUpload is concurrent limited",
name: "Error in data path is concurrent limited",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
dataMgr: datapath.NewManager(0),
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
expectedRequeue: ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
expectedResult: &ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
},
{
name: "data path init error",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
fsBRInitErr: errors.New("fake-data-path-init-error"),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).SnapshotType(fakeSnapshotType).Result(),
expectedErrMsg: "error initializing asyncBR: fake-data-path-init-error",
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("error initializing data path").Result(),
expectedErr: "error initializing asyncBR: fake-data-path-init-error",
},
{
name: "Unable to update status to in progress for data download",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
needErrs: []bool{false, false, false, true},
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expectedRequeue: ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
name: "Unable to update status to in progress for data upload",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
needErrs: []bool{false, false, true, false},
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectedResult: &ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
},
{
name: "data path start error",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
fsBRStartErr: errors.New("fake-data-path-start-error"),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).SnapshotType(fakeSnapshotType).Result(),
expectedErrMsg: "error starting async backup for pod dataupload-1, volume dataupload-1: fake-data-path-start-error",
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Finalizers([]string{DataUploadDownloadFinalizer}).Message("error starting data path").Result(),
expectedErr: "error starting async backup for pod , volume dataupload-1: fake-data-path-start-error",
},
{
name: "prepare timeout",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).AcceptedTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
name: "Prepare succeeds",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectDataPath: true,
},
{
name: "peek error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Result(),
peekErr: errors.New("fake-peek-error"),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceled).Result(),
name: "In progress du is not handled by the current node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "Dataupload with enabled cancel",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: func() *velerov2alpha1api.DataUpload {
du := dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Result()
controllerutil.AddFinalizer(du, DataUploadDownloadFinalizer)
du.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return du
}(),
checkFunc: func(du velerov2alpha1api.DataUpload) bool {
return du.Spec.Cancel
},
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
expectedRequeue: ctrl.Result{},
name: "In progress du is not set as cancel",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
{
name: "Dataupload with remove finalizer and should not be retrieved",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1api.Volume{Name: "dataupload-1"}).Result(),
du: func() *velerov2alpha1api.DataUpload {
du := dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).SnapshotType(fakeSnapshotType).Cancel(true).Result()
controllerutil.AddFinalizer(du, DataUploadDownloadFinalizer)
du.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return du
}(),
checkFunc: func(du velerov2alpha1api.DataUpload) bool {
return !controllerutil.ContainsFinalizer(&du, DataUploadDownloadFinalizer)
name: "Cancel data upload in progress with empty FSBR",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceled).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
},
expectedRequeue: ctrl.Result{},
{
name: "Cancel data upload in progress and patch data upload error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
needErrs: []bool{false, false, true, false},
needCreateFSBR: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectedErr: "error updating dataupload with error velero/dataupload-1: Update error",
expectCancelRecord: true,
expectDataPath: true,
},
{
name: "Cancel data upload in progress succeeds",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Node("test-node").Result(),
needCreateFSBR: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceling).Cancel(true).Finalizers([]string{DataUploadDownloadFinalizer}).Result(),
expectDataPath: true,
expectCancelRecord: true,
},
}
@ -561,24 +606,15 @@ func TestReconcile(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
r, err := initDataUploaderReconciler(test.needErrs...)
require.NoError(t, err)
defer func() {
r.client.Delete(ctx, test.du, &kbclient.DeleteOptions{})
if test.pod != nil {
r.client.Delete(ctx, test.pod, &kbclient.DeleteOptions{})
}
}()
ctx := context.Background()
if test.du.Namespace == velerov1api.DefaultNamespace {
isDeletionTimestampSet := test.du.DeletionTimestamp != nil
err = r.client.Create(ctx, test.du)
require.NoError(t, err)
// because of the changes introduced by https://github.com/kubernetes-sigs/controller-runtime/commit/7a66d580c0c53504f5b509b45e9300cc18a1cc30
// the fake client ignores the DeletionTimestamp when calling the Create(),
// so call Delete() here
if isDeletionTimestampSet {
err = r.client.Delete(ctx, test.du)
if !test.notCreateDU {
err = r.client.Create(context.Background(), test.du)
require.NoError(t, err)
}
if test.needDelete {
err = r.client.Delete(context.Background(), test.du)
require.NoError(t, err)
}
if test.pod != nil {
@ -591,23 +627,29 @@ func TestReconcile(t *testing.T) {
require.NoError(t, err)
}
if test.removeNode {
err = r.kubeClient.CoreV1().Nodes().Delete(ctx, "fake-node", metav1.DeleteOptions{})
require.NoError(t, err)
}
if test.dataMgr != nil {
r.dataPathMgr = test.dataMgr
} else {
r.dataPathMgr = datapath.NewManager(1)
}
if test.sportTime != nil {
r.cancelledDataUpload[test.du.Name] = test.sportTime.Time
}
if test.du.Spec.SnapshotType == fakeSnapshotType {
r.snapshotExposerList = map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{fakeSnapshotType: &fakeSnapshotExposer{r.client, r.Clock, test.ambiguousNodeOS, test.peekErr}}
r.snapshotExposerList = map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{fakeSnapshotType: &fakeSnapshotExposer{r.client, r.Clock, test.ambiguousNodeOS, test.peekErr, test.exposeErr, test.getExposeErr, test.getExposeNil}}
} else if test.du.Spec.SnapshotType == velerov2alpha1api.SnapshotTypeCSI {
r.snapshotExposerList = map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{velerov2alpha1api.SnapshotTypeCSI: exposer.NewCSISnapshotExposer(r.kubeClient, r.csiSnapshotClient, velerotest.NewLogger())}
}
if !test.notCreateFSBR {
funcExclusiveUpdateDataUpload = exclusiveUpdateDataUpload
if test.needExclusiveUpdateError != nil {
funcExclusiveUpdateDataUpload = func(context.Context, kbclient.Client, *velerov2alpha1api.DataUpload, func(*velerov2alpha1api.DataUpload)) (bool, error) {
return false, test.needExclusiveUpdateError
}
}
datapath.MicroServiceBRWatcherCreator = func(kbclient.Client, kubernetes.Interface, manager.Manager, string, string, string, string, string, string, datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
return &fakeDataUploadFSBR{
du: test.du,
@ -617,12 +659,9 @@ func TestReconcile(t *testing.T) {
startErr: test.fsBRStartErr,
}
}
}
testCreateFsBR := false
if test.du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress && !test.notCreateFSBR {
if test.needCreateFSBR {
if fsBR := r.dataPathMgr.GetAsyncBR(test.du.Name); fsBR == nil {
testCreateFsBR = true
_, err := r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, nil, nil, datapath.TaskTypeBackup, test.du.Name, velerov1api.DefaultNamespace, "", "", "", datapath.Callbacks{OnCancelled: r.OnDataUploadCancelled}, false, velerotest.NewLogger())
require.NoError(t, err)
}
@ -635,41 +674,46 @@ func TestReconcile(t *testing.T) {
},
})
assert.Equal(t, test.expectedRequeue, actualResult)
if test.expectedErrMsg == "" {
require.NoError(t, err)
if test.expectedErr != "" {
assert.EqualError(t, err, test.expectedErr)
} else {
require.ErrorContains(t, err, test.expectedErrMsg)
assert.NoError(t, err)
}
if test.expectedResult != nil {
assert.Equal(t, test.expectedResult.Requeue, actualResult.Requeue)
assert.Equal(t, test.expectedResult.RequeueAfter, actualResult.RequeueAfter)
}
if test.expected != nil || test.expectDeleted {
du := velerov2alpha1api.DataUpload{}
err = r.client.Get(ctx, kbclient.ObjectKey{
Name: test.du.Name,
Namespace: test.du.Namespace,
}, &du)
t.Logf("%s: \n %v \n", test.name, du)
// Assertions
if test.expected == nil {
require.Error(t, err)
if test.expectDeleted {
assert.True(t, apierrors.IsNotFound(err))
} else {
require.NoError(t, err)
assert.Equal(t, test.expected.Status.Phase, du.Status.Phase)
assert.Contains(t, du.Status.Message, test.expected.Status.Message)
assert.Equal(t, du.Finalizers, test.expected.Finalizers)
assert.Equal(t, du.Spec.Cancel, test.expected.Spec.Cancel)
}
}
if test.expectedProcessed {
assert.False(t, du.Status.CompletionTimestamp.IsZero())
}
if !test.expectedProcessed {
assert.True(t, du.Status.CompletionTimestamp.IsZero())
}
if test.checkFunc != nil {
assert.True(t, test.checkFunc(du))
}
if !testCreateFsBR && du.Status.Phase != velerov2alpha1api.DataUploadPhaseInProgress {
if !test.expectDataPath {
assert.Nil(t, r.dataPathMgr.GetAsyncBR(test.du.Name))
} else {
assert.NotNil(t, r.dataPathMgr.GetAsyncBR(test.du.Name))
}
if test.expectCancelRecord {
assert.Contains(t, r.cancelledDataUpload, test.du.Name)
} else {
assert.Empty(t, r.cancelledDataUpload)
}
})
}
@ -719,7 +763,7 @@ func TestOnDataUploadProgress(t *testing.T) {
{
name: "failed to patch dataupload",
du: dataUploadBuilder().Result(),
needErrs: []bool{false, false, false, true},
needErrs: []bool{false, false, true, false},
},
}
for _, test := range tests {
@ -992,7 +1036,7 @@ func TestTryCancelDataUpload(t *testing.T) {
err = r.client.Create(ctx, test.dd)
require.NoError(t, err)
r.tryCancelAcceptedDataUpload(ctx, test.dd, "")
r.tryCancelDataUpload(ctx, test.dd, "")
if test.expectedErr == "" {
assert.NoError(t, err)
@ -1114,33 +1158,8 @@ func TestAttemptDataUploadResume(t *testing.T) {
expectedError string
}{
{
name: "accepted DataUpload in other node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).AcceptedByNode("node-1").Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but canceled",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).AcceptedByNode("node-1").Cancel(true).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but update error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).AcceptedByNode("node-1").Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "prepared DataUpload",
name: "Other DataUpload",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
prepareddDataUploads: []string{dataUploadName},
},
{
name: "InProgress DataUpload, not the current node",