Merge pull request #2196 from skriss/fix-1777

recompute backup_last_successful_timestamp metric during resync
pull/2219/head
Ashish Amarnath 2020-01-21 17:01:51 -08:00 committed by GitHub
commit ec22f2c88d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 158 additions and 3 deletions

View File

@ -0,0 +1 @@
repopulate backup_last_successful_timestamp metrics for each schedule after server restart

View File

@ -164,6 +164,12 @@ func (b *BackupBuilder) StartTimestamp(val time.Time) *BackupBuilder {
return b
}
// CompletionTimestamp sets the Backup's completion timestamp.
func (b *BackupBuilder) CompletionTimestamp(val time.Time) *BackupBuilder {
b.object.Status.CompletionTimestamp = &metav1.Time{Time: val}
return b
}
// Hooks sets the Backup's hooks.
func (b *BackupBuilder) Hooks(hooks velerov1api.BackupHooks) *BackupBuilder {
b.object.Spec.Hooks = hooks

View File

@ -148,12 +148,44 @@ func NewBackupController(
}
func (c *backupController) resync() {
// recompute backup_total metric
backups, err := c.lister.List(labels.Everything())
if err != nil {
c.logger.Error(err, "Error computing backup_total metric")
} else {
c.metrics.SetBackupTotal(int64(len(backups)))
}
// recompute backup_last_successful_timestamp metric for each
// schedule (including the empty schedule, i.e. ad-hoc backups)
for schedule, timestamp := range getLastSuccessBySchedule(backups) {
c.metrics.SetBackupLastSuccessfulTimestamp(schedule, timestamp)
}
}
// getLastSuccessBySchedule finds the most recent completed backup for each schedule
// and returns a map of schedule name -> completion time of the most recent completed
// backup. This map includes an entry for ad-hoc/non-scheduled backups, where the key
// is the empty string.
func getLastSuccessBySchedule(backups []*velerov1api.Backup) map[string]time.Time {
lastSuccessBySchedule := map[string]time.Time{}
for _, backup := range backups {
if backup.Status.Phase != velerov1api.BackupPhaseCompleted {
continue
}
if backup.Status.CompletionTimestamp == nil {
continue
}
schedule := backup.Labels[velerov1api.ScheduleNameLabel]
timestamp := backup.Status.CompletionTimestamp.Time
if timestamp.After(lastSuccessBySchedule[schedule]) {
lastSuccessBySchedule[schedule] = timestamp
}
}
return lastSuccessBySchedule
}
func (c *backupController) processBackup(key string) error {

View File

@ -738,3 +738,119 @@ func TestValidateAndGetSnapshotLocations(t *testing.T) {
})
}
}
// Test_getLastSuccessBySchedule verifies that the getLastSuccessBySchedule helper function correctly returns
// the completion timestamp of the most recent completed backup for each schedule, including an entry for ad-hoc
// or non-scheduled backups.
func Test_getLastSuccessBySchedule(t *testing.T) {
buildBackup := func(phase velerov1api.BackupPhase, completion time.Time, schedule string) *velerov1api.Backup {
b := builder.ForBackup("", "").
ObjectMeta(builder.WithLabels(velerov1api.ScheduleNameLabel, schedule)).
Phase(phase)
if !completion.IsZero() {
b.CompletionTimestamp(completion)
}
return b.Result()
}
// create a static "base time" that can be used to easily construct completion timestamps
// by using the .Add(...) method.
baseTime, err := time.Parse(time.RFC1123, time.RFC1123)
require.NoError(t, err)
tests := []struct {
name string
backups []*velerov1api.Backup
want map[string]time.Time
}{
{
name: "when backups is nil, an empty map is returned",
backups: nil,
want: map[string]time.Time{},
},
{
name: "when backups is empty, an empty map is returned",
backups: []*velerov1api.Backup{},
want: map[string]time.Time{},
},
{
name: "when multiple completed backups for a schedule exist, the latest one is returned",
backups: []*velerov1api.Backup{
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, "schedule-1"),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(time.Second), "schedule-1"),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), "schedule-1"),
},
want: map[string]time.Time{
"schedule-1": baseTime.Add(time.Second),
},
},
{
name: "when the most recent backup for a schedule is Failed, the timestamp of the most recent Completed one is returned",
backups: []*velerov1api.Backup{
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, "schedule-1"),
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), "schedule-1"),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), "schedule-1"),
},
want: map[string]time.Time{
"schedule-1": baseTime,
},
},
{
name: "when there are no Completed backups for a schedule, it's not returned",
backups: []*velerov1api.Backup{
buildBackup(velerov1api.BackupPhaseInProgress, baseTime, "schedule-1"),
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), "schedule-1"),
buildBackup(velerov1api.BackupPhasePartiallyFailed, baseTime.Add(-time.Second), "schedule-1"),
},
want: map[string]time.Time{},
},
{
name: "when backups exist without a schedule, the most recent Completed one is returned",
backups: []*velerov1api.Backup{
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, ""),
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), ""),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), ""),
},
want: map[string]time.Time{
"": baseTime,
},
},
{
name: "when backups exist for multiple schedules, the most recent Completed timestamp for each schedule is returned",
backups: []*velerov1api.Backup{
// ad-hoc backups (no schedule)
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(30*time.Minute), ""),
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Hour), ""),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), ""),
// schedule-1
buildBackup(velerov1api.BackupPhaseCompleted, baseTime, "schedule-1"),
buildBackup(velerov1api.BackupPhaseFailed, baseTime.Add(time.Second), "schedule-1"),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(-time.Second), "schedule-1"),
// schedule-2
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(24*time.Hour), "schedule-2"),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(48*time.Hour), "schedule-2"),
buildBackup(velerov1api.BackupPhaseCompleted, baseTime.Add(72*time.Hour), "schedule-2"),
// schedule-3
buildBackup(velerov1api.BackupPhaseNew, baseTime, "schedule-3"),
buildBackup(velerov1api.BackupPhaseInProgress, baseTime.Add(time.Minute), "schedule-3"),
buildBackup(velerov1api.BackupPhasePartiallyFailed, baseTime.Add(2*time.Minute), "schedule-3"),
},
want: map[string]time.Time{
"": baseTime.Add(30 * time.Minute),
"schedule-1": baseTime,
"schedule-2": baseTime.Add(72 * time.Hour),
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.want, getLastSuccessBySchedule(tc.backups))
})
}
}

View File

@ -297,9 +297,9 @@ func (m *ServerMetrics) SetBackupTarballSizeBytesGauge(backupSchedule string, si
}
// SetBackupLastSuccessfulTimestamp records the last time a backup ran successfully, Unix timestamp in seconds
func (m *ServerMetrics) SetBackupLastSuccessfulTimestamp(backupSchedule string) {
func (m *ServerMetrics) SetBackupLastSuccessfulTimestamp(backupSchedule string, time time.Time) {
if g, ok := m.metrics[backupLastSuccessfulTimestamp].(*prometheus.GaugeVec); ok {
g.WithLabelValues(backupSchedule).Set(float64(time.Now().Unix()))
g.WithLabelValues(backupSchedule).Set(float64(time.Unix()))
}
}
@ -322,7 +322,7 @@ func (m *ServerMetrics) RegisterBackupSuccess(backupSchedule string) {
if c, ok := m.metrics[backupSuccessTotal].(*prometheus.CounterVec); ok {
c.WithLabelValues(backupSchedule).Inc()
}
m.SetBackupLastSuccessfulTimestamp(backupSchedule)
m.SetBackupLastSuccessfulTimestamp(backupSchedule, time.Now())
}
// RegisterBackupPartialFailure records a partially failed backup.