diff --git a/changelogs/unreleased/1353-fabito b/changelogs/unreleased/1353-fabito new file mode 100644 index 000000000..9249ce9cc --- /dev/null +++ b/changelogs/unreleased/1353-fabito @@ -0,0 +1 @@ +Add gauge metrics for number of existing backups and restores \ No newline at end of file diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index d3cb2d664..589f41fc8 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -111,6 +111,8 @@ func NewBackupController( backupLocationInformer.Informer().HasSynced, volumeSnapshotLocationInformer.Informer().HasSynced, ) + c.resyncFunc = c.resync + c.resyncPeriod = time.Minute backupInformer.Informer().AddEventHandler( cache.ResourceEventHandlerFuncs{ @@ -141,6 +143,15 @@ func NewBackupController( return c } +func (c *backupController) resync() { + backups, err := c.lister.List(labels.Everything()) + if err != nil { + c.logger.Error(err, "Error computing backup_total metric") + } else { + c.metrics.SetBackupTotal(int64(len(backups))) + } +} + func (c *backupController) processBackup(key string) error { log := c.logger.WithField("key", key) diff --git a/pkg/controller/restore_controller.go b/pkg/controller/restore_controller.go index 2dabf5b94..a0eb3a2c3 100644 --- a/pkg/controller/restore_controller.go +++ b/pkg/controller/restore_controller.go @@ -25,6 +25,7 @@ import ( "io/ioutil" "os" "sort" + "time" jsonpatch "github.com/evanphx/json-patch" "github.com/pkg/errors" @@ -133,6 +134,8 @@ func NewRestoreController( backupLocationInformer.Informer().HasSynced, snapshotLocationInformer.Informer().HasSynced, ) + c.resyncFunc = c.resync + c.resyncPeriod = time.Minute restoreInformer.Informer().AddEventHandler( cache.ResourceEventHandlerFuncs{ @@ -163,6 +166,15 @@ func NewRestoreController( return c } +func (c *restoreController) resync() { + restores, err := c.restoreLister.List(labels.Everything()) + if err != nil { + c.logger.Error(err, "Error computing restore_total metric") + } else { + c.metrics.SetRestoreTotal(int64(len(restores))) + } +} + func (c *restoreController) processQueueItem(key string) error { log := c.logger.WithField("key", key) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index b1ee18f2f..c3e6173b6 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -30,6 +30,7 @@ type ServerMetrics struct { const ( metricNamespace = "velero" backupTarballSizeBytesGauge = "backup_tarball_size_bytes" + backupTotal = "backup_total" backupAttemptTotal = "backup_attempt_total" backupSuccessTotal = "backup_success_total" backupFailureTotal = "backup_failure_total" @@ -37,6 +38,7 @@ const ( backupDeletionAttemptTotal = "backup_deletion_attempt_total" backupDeletionSuccessTotal = "backup_deletion_success_total" backupDeletionFailureTotal = "backup_deletion_failure_total" + restoreTotal = "restore_total" restoreAttemptTotal = "restore_attempt_total" restoreValidationFailedTotal = "restore_validation_failed_total" restoreSuccessTotal = "restore_success_total" @@ -63,6 +65,13 @@ func NewServerMetrics() *ServerMetrics { }, []string{scheduleLabel}, ), + backupTotal: prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: metricNamespace, + Name: backupTotal, + Help: "Current number of existent backups", + }, + ), backupAttemptTotal: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metricNamespace, @@ -130,6 +139,13 @@ func NewServerMetrics() *ServerMetrics { }, []string{scheduleLabel}, ), + restoreTotal: prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: metricNamespace, + Name: restoreTotal, + Help: "Current number of existent restores", + }, + ), restoreAttemptTotal: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metricNamespace, @@ -247,6 +263,13 @@ func (m *ServerMetrics) SetBackupTarballSizeBytesGauge(backupSchedule string, si } } +// SetBackupTotal records the current number of existent backups. +func (m *ServerMetrics) SetBackupTotal(numberOfBackups int64) { + if g, ok := m.metrics[backupTotal].(prometheus.Gauge); ok { + g.Set(float64(numberOfBackups)) + } +} + // RegisterBackupAttempt records an backup attempt. func (m *ServerMetrics) RegisterBackupAttempt(backupSchedule string) { if c, ok := m.metrics[backupAttemptTotal].(*prometheus.CounterVec); ok { @@ -302,6 +325,13 @@ func toSeconds(d time.Duration) float64 { return float64(d / time.Second) } +// SetRestoreTotal records the current number of existent restores. +func (m *ServerMetrics) SetRestoreTotal(numberOfRestores int64) { + if g, ok := m.metrics[restoreTotal].(prometheus.Gauge); ok { + g.Set(float64(numberOfRestores)) + } +} + // RegisterRestoreAttempt records an attempt to restore a backup. func (m *ServerMetrics) RegisterRestoreAttempt(backupSchedule string) { if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {