Merge pull request #2196 from skriss/fix-1777
recompute backup_last_successful_timestamp metric during resyncpull/2219/head
commit
ec22f2c88d
|
@ -0,0 +1 @@
|
|||
repopulate backup_last_successful_timestamp metrics for each schedule after server restart
|
|
@ -164,6 +164,12 @@ func (b *BackupBuilder) StartTimestamp(val time.Time) *BackupBuilder {
|
|||
return b
|
||||
}
|
||||
|
||||
// CompletionTimestamp sets the Backup's completion timestamp.
|
||||
func (b *BackupBuilder) CompletionTimestamp(val time.Time) *BackupBuilder {
|
||||
b.object.Status.CompletionTimestamp = &metav1.Time{Time: val}
|
||||
return b
|
||||
}
|
||||
|
||||
// Hooks sets the Backup's hooks.
|
||||
func (b *BackupBuilder) Hooks(hooks velerov1api.BackupHooks) *BackupBuilder {
|
||||
b.object.Spec.Hooks = hooks
|
||||
|
|
|
@ -148,12 +148,44 @@ func NewBackupController(
|
|||
}
|
||||
|
||||
func (c *backupController) resync() {
|
||||
// recompute backup_total metric
|
||||
backups, err := c.lister.List(labels.Everything())
|
||||
if err != nil {
|
||||
c.logger.Error(err, "Error computing backup_total metric")
|
||||
} else {
|
||||
c.metrics.SetBackupTotal(int64(len(backups)))
|
||||
}
|
||||
|
||||
// recompute backup_last_successful_timestamp metric for each
|
||||
// schedule (including the empty schedule, i.e. ad-hoc backups)
|
||||
for schedule, timestamp := range getLastSuccessBySchedule(backups) {
|
||||
c.metrics.SetBackupLastSuccessfulTimestamp(schedule, timestamp)
|
||||
}
|
||||
}
|
||||
|
||||
// getLastSuccessBySchedule finds the most recent completed backup for each schedule
|
||||
// and returns a map of schedule name -> completion time of the most recent completed
|
||||
// backup. This map includes an entry for ad-hoc/non-scheduled backups, where the key
|
||||
// is the empty string.
|
||||
func getLastSuccessBySchedule(backups []*velerov1api.Backup) map[string]time.Time {
|
||||
lastSuccessBySchedule := map[string]time.Time{}
|
||||
for _, backup := range backups {
|
||||
if backup.Status.Phase != velerov1api.BackupPhaseCompleted {
|
||||
continue
|
||||
}
|
||||
if backup.Status.CompletionTimestamp == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
schedule := backup.Labels[velerov1api.ScheduleNameLabel]
|
||||
timestamp := backup.Status.CompletionTimestamp.Time
|
||||
|
||||
if timestamp.After(lastSuccessBySchedule[schedule]) {
|
||||
lastSuccessBySchedule[schedule] = timestamp
|
||||
}
|
||||
}
|
||||
|
||||
return lastSuccessBySchedule
|
||||
}
|
||||
|
||||
func (c *backupController) processBackup(key string) error {
|
||||
|
|
|
@ -738,3 +738,119 @@ func TestValidateAndGetSnapshotLocations(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test_getLastSuccessBySchedule verifies that the getLastSuccessBySchedule helper function correctly returns
|
||||
// the completion timestamp of the most recent completed backup for each schedule, including an entry for ad-hoc
|
||||
// or non-scheduled backups.
|
||||
func Test_getLastSuccessBySchedule(t *testing.T) {
|
||||
buildBackup := func(phase velerov1api.BackupPhase, completion time.Time, schedule string) *velerov1api.Backup {
|
||||
b := builder.ForBackup("", "").
|
||||
ObjectMeta(builder.WithLabels(velerov1api.ScheduleNameLabel, schedule)).
|
||||
Phase(phase)
|
||||
|
||||
if !completion.IsZero() {
|
||||
b.CompletionTimestamp(completion)
|
||||
}
|
||||
|
||||
return b.Result()
|
||||
}
|
||||
|
||||
// create a static "base time" that can be used to easily construct completion timestamps
|
||||
// by using the .Add(...) method.
|
||||
baseTime, err := time.Parse(time.RFC1123, time.RFC1123)
|
||||
require.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
backups []*velerov1api.Backup
|
||||
want map[string]time.Time
|
||||
}{
|
||||
{
|
||||
name: "when backups is nil, an empty map is returned",
|
||||
backups: nil,
|
||||
want: map[string]time.Time{},
|
||||
},
|
||||
{
|
||||
name: "when backups is empty, an empty map is returned",
|
||||
backups: []*velerov1api.Backup{},
|
||||
want: map[string]time.Time{},
|
||||
},
|
||||
{
|
||||
name: "when multiple completed backups for a schedule exist, the latest one is returned",
|
||||
backups: []*velerov1api.Backup{
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(time.Second), "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), "schedule-1"),
|
||||
},
|
||||
want: map[string]time.Time{
|
||||
"schedule-1": baseTime.Add(time.Second),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "when the most recent backup for a schedule is Failed, the timestamp of the most recent Completed one is returned",
|
||||
backups: []*velerov1api.Backup{
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), "schedule-1"),
|
||||
},
|
||||
want: map[string]time.Time{
|
||||
"schedule-1": baseTime,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "when there are no Completed backups for a schedule, it's not returned",
|
||||
backups: []*velerov1api.Backup{
|
||||
buildBackup(velerov1api.BackupPhaseInProgress, baseTime, "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhasePartiallyFailed, baseTime.Add(-time.Second), "schedule-1"),
|
||||
},
|
||||
want: map[string]time.Time{},
|
||||
},
|
||||
{
|
||||
name: "when backups exist without a schedule, the most recent Completed one is returned",
|
||||
backups: []*velerov1api.Backup{
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, ""),
|
||||
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), ""),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), ""),
|
||||
},
|
||||
want: map[string]time.Time{
|
||||
"": baseTime,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "when backups exist for multiple schedules, the most recent Completed timestamp for each schedule is returned",
|
||||
backups: []*velerov1api.Backup{
|
||||
// ad-hoc backups (no schedule)
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(30*time.Minute), ""),
|
||||
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Hour), ""),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), ""),
|
||||
|
||||
// schedule-1
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), "schedule-1"),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), "schedule-1"),
|
||||
|
||||
// schedule-2
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(24*time.Hour), "schedule-2"),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(48*time.Hour), "schedule-2"),
|
||||
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(72*time.Hour), "schedule-2"),
|
||||
|
||||
// schedule-3
|
||||
buildBackup(velerov1api.BackupPhaseNew, baseTime, "schedule-3"),
|
||||
buildBackup(velerov1api.BackupPhaseInProgress, baseTime.Add(time.Minute), "schedule-3"),
|
||||
buildBackup(velerov1api.BackupPhasePartiallyFailed, baseTime.Add(2*time.Minute), "schedule-3"),
|
||||
},
|
||||
want: map[string]time.Time{
|
||||
"": baseTime.Add(30 * time.Minute),
|
||||
"schedule-1": baseTime,
|
||||
"schedule-2": baseTime.Add(72 * time.Hour),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.want, getLastSuccessBySchedule(tc.backups))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -297,9 +297,9 @@ func (m *ServerMetrics) SetBackupTarballSizeBytesGauge(backupSchedule string, si
|
|||
}
|
||||
|
||||
// SetBackupLastSuccessfulTimestamp records the last time a backup ran successfully, Unix timestamp in seconds
|
||||
func (m *ServerMetrics) SetBackupLastSuccessfulTimestamp(backupSchedule string) {
|
||||
func (m *ServerMetrics) SetBackupLastSuccessfulTimestamp(backupSchedule string, time time.Time) {
|
||||
if g, ok := m.metrics[backupLastSuccessfulTimestamp].(*prometheus.GaugeVec); ok {
|
||||
g.WithLabelValues(backupSchedule).Set(float64(time.Now().Unix()))
|
||||
g.WithLabelValues(backupSchedule).Set(float64(time.Unix()))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -322,7 +322,7 @@ func (m *ServerMetrics) RegisterBackupSuccess(backupSchedule string) {
|
|||
if c, ok := m.metrics[backupSuccessTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule).Inc()
|
||||
}
|
||||
m.SetBackupLastSuccessfulTimestamp(backupSchedule)
|
||||
m.SetBackupLastSuccessfulTimestamp(backupSchedule, time.Now())
|
||||
}
|
||||
|
||||
// RegisterBackupPartialFailure records a partially failed backup.
|
||||
|
|
Loading…
Reference in New Issue