Merge pull request #1389 from skriss/restore-partially-failed
Add PartiallyFailed phase for restorespull/1386/head
commit
bf19623e82
|
@ -82,10 +82,14 @@ const (
|
|||
// RestorePhaseInProgress means the restore is currently executing.
|
||||
RestorePhaseInProgress RestorePhase = "InProgress"
|
||||
|
||||
// RestorePhaseCompleted means the restore has finished executing.
|
||||
// Any relevant warnings or errors will be captured in the Status.
|
||||
// RestorePhaseCompleted means the restore has run successfully
|
||||
// without errors.
|
||||
RestorePhaseCompleted RestorePhase = "Completed"
|
||||
|
||||
// RestorePhasePartiallyFailed means the restore has run to completion
|
||||
// but encountered 1+ errors restoring individual items.
|
||||
RestorePhasePartiallyFailed RestorePhase = "PartiallyFailed"
|
||||
|
||||
// RestorePhaseFailed means the restore was unable to execute.
|
||||
// The failing error is recorded in status.FailureReason.
|
||||
RestorePhaseFailed RestorePhase = "Failed"
|
||||
|
|
|
@ -50,7 +50,10 @@ func NewLogsCommand(f client.Factory) *cobra.Command {
|
|||
cmd.Exit("Error checking for restore %q: %v", restoreName, err)
|
||||
}
|
||||
|
||||
if restore.Status.Phase != v1.RestorePhaseCompleted && restore.Status.Phase != v1.RestorePhaseFailed {
|
||||
switch restore.Status.Phase {
|
||||
case v1.RestorePhaseCompleted, v1.RestorePhaseFailed, v1.RestorePhasePartiallyFailed:
|
||||
// terminal phases, don't exit.
|
||||
default:
|
||||
cmd.Exit("Logs for restore %q are not available until it's finished processing. Please wait "+
|
||||
"until the restore has a phase of Completed or Failed and try again.", restoreName)
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package output
|
|||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
|
@ -35,7 +36,17 @@ func DescribeRestore(restore *v1.Restore, podVolumeRestores []v1.PodVolumeRestor
|
|||
d.DescribeMetadata(restore.ObjectMeta)
|
||||
|
||||
d.Println()
|
||||
d.Printf("Phase:\t%s\n", restore.Status.Phase)
|
||||
phase := restore.Status.Phase
|
||||
if phase == "" {
|
||||
phase = v1.RestorePhaseNew
|
||||
}
|
||||
|
||||
resultsNote := ""
|
||||
if phase == v1.RestorePhaseFailed || phase == v1.RestorePhasePartiallyFailed {
|
||||
resultsNote = fmt.Sprintf(" (run 'velero restore logs %s' for more information)", restore.Name)
|
||||
}
|
||||
|
||||
d.Printf("Phase:\t%s%s\n", restore.Status.Phase, resultsNote)
|
||||
|
||||
if len(restore.Status.ValidationErrors) > 0 {
|
||||
d.Println()
|
||||
|
|
|
@ -259,6 +259,10 @@ func (c *restoreController) processRestore(restore *api.Restore) error {
|
|||
restore.Status.Phase = api.RestorePhaseFailed
|
||||
restore.Status.FailureReason = err.Error()
|
||||
c.metrics.RegisterRestoreFailed(backupScheduleName)
|
||||
} else if restore.Status.Errors > 0 {
|
||||
c.logger.Debug("Restore partially failed")
|
||||
restore.Status.Phase = api.RestorePhasePartiallyFailed
|
||||
c.metrics.RegisterRestorePartialFailure(backupScheduleName)
|
||||
} else {
|
||||
c.logger.Debug("Restore completed")
|
||||
restore.Status.Phase = api.RestorePhaseCompleted
|
||||
|
|
|
@ -300,6 +300,7 @@ func TestProcessQueueItem(t *testing.T) {
|
|||
restorerError: errors.New("blarg"),
|
||||
expectedErr: false,
|
||||
expectedPhase: string(api.RestorePhaseInProgress),
|
||||
expectedFinalPhase: string(api.RestorePhasePartiallyFailed),
|
||||
expectedRestoreErrors: 1,
|
||||
expectedRestorerCall: NewRestore("foo", "bar", "backup-1", "ns-1", "", api.RestorePhaseInProgress).Restore,
|
||||
},
|
||||
|
@ -595,7 +596,7 @@ func TestProcessQueueItem(t *testing.T) {
|
|||
if test.expectedFinalPhase != "" {
|
||||
expected = Patch{
|
||||
Status: StatusPatch{
|
||||
Phase: api.RestorePhaseCompleted,
|
||||
Phase: api.RestorePhase(test.expectedFinalPhase),
|
||||
Errors: test.expectedRestoreErrors,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ const (
|
|||
restoreAttemptTotal = "restore_attempt_total"
|
||||
restoreValidationFailedTotal = "restore_validation_failed_total"
|
||||
restoreSuccessTotal = "restore_success_total"
|
||||
restorePartialFailureTotal = "restore_partial_failure_total"
|
||||
restoreFailedTotal = "restore_failed_total"
|
||||
volumeSnapshotAttemptTotal = "volume_snapshot_attempt_total"
|
||||
volumeSnapshotSuccessTotal = "volume_snapshot_success_total"
|
||||
|
@ -162,6 +163,14 @@ func NewServerMetrics() *ServerMetrics {
|
|||
},
|
||||
[]string{scheduleLabel},
|
||||
),
|
||||
restorePartialFailureTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: restorePartialFailureTotal,
|
||||
Help: "Total number of partially failed restores",
|
||||
},
|
||||
[]string{scheduleLabel},
|
||||
),
|
||||
restoreFailedTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
|
@ -236,6 +245,9 @@ func (m *ServerMetrics) InitSchedule(scheduleName string) {
|
|||
if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Set(0)
|
||||
}
|
||||
if c, ok := m.metrics[restorePartialFailureTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Set(0)
|
||||
}
|
||||
if c, ok := m.metrics[restoreFailedTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Set(0)
|
||||
}
|
||||
|
@ -346,6 +358,13 @@ func (m *ServerMetrics) RegisterRestoreSuccess(backupSchedule string) {
|
|||
}
|
||||
}
|
||||
|
||||
// RegisterRestorePartialFailure records a restore that partially failed.
|
||||
func (m *ServerMetrics) RegisterRestorePartialFailure(backupSchedule string) {
|
||||
if c, ok := m.metrics[restorePartialFailureTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterRestoreFailed records a restore that failed.
|
||||
func (m *ServerMetrics) RegisterRestoreFailed(backupSchedule string) {
|
||||
if c, ok := m.metrics[restoreFailedTotal].(*prometheus.CounterVec); ok {
|
||||
|
|
Loading…
Reference in New Issue