Merge pull request #8496 from Lyndon-Li/release-1.15

[1.15] Fix prepare timeout issue
pull/8501/head
lyndon-li 2024-12-09 15:25:53 +08:00 committed by GitHub
commit a72aff2a3b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 49 additions and 29 deletions

View File

@ -0,0 +1 @@
Fix issue #8485, add an accepted time so as to count the prepare timeout

View File

@ -118,6 +118,12 @@ func (d *DataDownloadBuilder) Labels(labels map[string]string) *DataDownloadBuil
return d
}
// Annotations sets the DataDownload's Annotations.
func (d *DataDownloadBuilder) Annotations(annotations map[string]string) *DataDownloadBuilder {
d.object.Annotations = annotations
return d
}
// StartTimestamp sets the DataDownload's StartTimestamp.
func (d *DataDownloadBuilder) StartTimestamp(startTime *metav1.Time) *DataDownloadBuilder {
d.object.Status.StartTimestamp = startTime

View File

@ -133,6 +133,12 @@ func (d *DataUploadBuilder) Labels(labels map[string]string) *DataUploadBuilder
return d
}
// Annotations sets the DataUpload's Annotations.
func (d *DataUploadBuilder) Annotations(annotations map[string]string) *DataUploadBuilder {
d.object.Annotations = annotations
return d
}
// Progress sets the DataUpload's Progress.
func (d *DataUploadBuilder) Progress(progress shared.DataMoveOperationProgress) *DataUploadBuilder {
d.object.Status.Progress = progress

View File

@ -223,9 +223,11 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else if peekErr := r.restoreExposer.PeekExposed(ctx, getDataDownloadOwnerObject(dd)); peekErr != nil {
r.tryCancelAcceptedDataDownload(ctx, dd, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
log.Errorf("Cancel dd %s/%s because of expose error %s", dd.Namespace, dd.Name, peekErr)
} else if dd.Status.StartTimestamp != nil {
if time.Since(dd.Status.StartTimestamp.Time) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, dd)
} else if at, found := dd.Annotations[acceptTimeAnnoKey]; found {
if t, err := time.Parse(time.RFC3339, at); err == nil {
if time.Since(t) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, dd)
}
}
}
@ -632,12 +634,13 @@ func (r *DataDownloadReconciler) acceptDataDownload(ctx context.Context, dd *vel
updateFunc := func(datadownload *velerov2alpha1api.DataDownload) {
datadownload.Status.Phase = velerov2alpha1api.DataDownloadPhaseAccepted
labels := datadownload.GetLabels()
if labels == nil {
labels = make(map[string]string)
annotations := datadownload.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
labels[acceptNodeLabelKey] = r.nodeName
datadownload.SetLabels(labels)
annotations[acceptNodeAnnoKey] = r.nodeName
annotations[acceptTimeAnnoKey] = r.Clock.Now().Format(time.RFC3339)
datadownload.SetAnnotations(annotations)
}
succeeded, err := r.exclusiveUpdateDataDownload(ctx, updated, updateFunc)

View File

@ -349,7 +349,7 @@ func TestDataDownloadReconcile(t *testing.T) {
},
{
name: "prepare timeout",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).StartTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{acceptTimeAnnoKey: (time.Now().Add(-time.Minute * 5)).Format(time.RFC3339)}).Result(),
expected: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Result(),
},
{
@ -496,7 +496,7 @@ func TestDataDownloadReconcile(t *testing.T) {
if test.expected != nil {
require.NoError(t, err)
assert.Equal(t, dd.Status.Phase, test.expected.Status.Phase)
assert.Equal(t, test.expected.Status.Phase, dd.Status.Phase)
}
if test.isGetExposeErr {
@ -1003,22 +1003,22 @@ func TestAttemptDataDownloadResume(t *testing.T) {
},
{
name: "accepted DataDownload in the current node",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
cancelledDataDownloads: []string{dataDownloadName},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but is canceled",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).Labels(map[string]string{
acceptNodeLabelKey: "node-1",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).Annotations(map[string]string{
acceptNodeAnnoKey: "node-1",
}).Result(),
acceptedDataDownloads: []string{dataDownloadName},
cancelledDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but cancel fail",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Labels(map[string]string{
acceptNodeLabelKey: "node-1",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Annotations(map[string]string{
acceptNodeAnnoKey: "node-1",
}).Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataDownloads: []string{dataDownloadName},

View File

@ -55,7 +55,8 @@ import (
const (
dataUploadDownloadRequestor = "snapshot-data-upload-download"
acceptNodeLabelKey = "velero.io/accepted-by"
acceptNodeAnnoKey = "velero.io/accepted-by"
acceptTimeAnnoKey = "velero.io/accepted-at"
DataUploadDownloadFinalizer = "velero.io/data-upload-download-finalizer"
preparingMonitorFrequency = time.Minute
)
@ -255,9 +256,11 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
} else if peekErr := ep.PeekExposed(ctx, getOwnerObject(du)); peekErr != nil {
r.tryCancelAcceptedDataUpload(ctx, du, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
log.Errorf("Cancel du %s/%s because of expose error %s", du.Namespace, du.Name, peekErr)
} else if du.Status.StartTimestamp != nil {
if time.Since(du.Status.StartTimestamp.Time) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, du)
} else if at, found := du.Annotations[acceptTimeAnnoKey]; found {
if t, err := time.Parse(time.RFC3339, at); err == nil {
if time.Since(t) >= r.preparingTimeout {
r.onPrepareTimeout(ctx, du)
}
}
}
@ -699,12 +702,13 @@ func (r *DataUploadReconciler) acceptDataUpload(ctx context.Context, du *velerov
updateFunc := func(dataUpload *velerov2alpha1api.DataUpload) {
dataUpload.Status.Phase = velerov2alpha1api.DataUploadPhaseAccepted
labels := dataUpload.GetLabels()
if labels == nil {
labels = make(map[string]string)
annotations := dataUpload.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
labels[acceptNodeLabelKey] = r.nodeName
dataUpload.SetLabels(labels)
annotations[acceptNodeAnnoKey] = r.nodeName
annotations[acceptTimeAnnoKey] = r.Clock.Now().Format(time.RFC3339)
dataUpload.SetAnnotations(annotations)
}
succeeded, err := r.exclusiveUpdateDataUpload(ctx, updated, updateFunc)

View File

@ -475,7 +475,7 @@ func TestReconcile(t *testing.T) {
},
{
name: "prepare timeout",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).StartTimestamp(&metav1.Time{Time: time.Now().Add(-time.Minute * 5)}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).SnapshotType(fakeSnapshotType).Annotations(map[string]string{acceptTimeAnnoKey: (time.Now().Add(-time.Minute * 5)).Format(time.RFC3339)}).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).Result(),
},
{
@ -607,7 +607,7 @@ func TestReconcile(t *testing.T) {
require.Error(t, err)
} else {
require.NoError(t, err)
assert.Equal(t, du.Status.Phase, test.expected.Status.Phase)
assert.Equal(t, test.expected.Status.Phase, du.Status.Phase)
}
if test.expectedProcessed {
@ -1071,19 +1071,19 @@ func TestAttemptDataUploadResume(t *testing.T) {
},
{
name: "accepted DataUpload in the current node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but canceled",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Cancel(true).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Cancel(true).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but update error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Annotations(map[string]string{acceptNodeAnnoKey: "node-1"}).Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataUploads: []string{dataUploadName},
},