Merge pull request #1353 from fabito/gauge-for-number-of-backups-and-restores
Add gauge metrics for number of existing backups and restorespull/1388/head
commit
8870281afc
changelogs/unreleased
pkg
metrics
|
@ -0,0 +1 @@
|
||||||
|
Add gauge metrics for number of existing backups and restores
|
|
@ -111,6 +111,8 @@ func NewBackupController(
|
||||||
backupLocationInformer.Informer().HasSynced,
|
backupLocationInformer.Informer().HasSynced,
|
||||||
volumeSnapshotLocationInformer.Informer().HasSynced,
|
volumeSnapshotLocationInformer.Informer().HasSynced,
|
||||||
)
|
)
|
||||||
|
c.resyncFunc = c.resync
|
||||||
|
c.resyncPeriod = time.Minute
|
||||||
|
|
||||||
backupInformer.Informer().AddEventHandler(
|
backupInformer.Informer().AddEventHandler(
|
||||||
cache.ResourceEventHandlerFuncs{
|
cache.ResourceEventHandlerFuncs{
|
||||||
|
@ -141,6 +143,15 @@ func NewBackupController(
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *backupController) resync() {
|
||||||
|
backups, err := c.lister.List(labels.Everything())
|
||||||
|
if err != nil {
|
||||||
|
c.logger.Error(err, "Error computing backup_total metric")
|
||||||
|
} else {
|
||||||
|
c.metrics.SetBackupTotal(int64(len(backups)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *backupController) processBackup(key string) error {
|
func (c *backupController) processBackup(key string) error {
|
||||||
log := c.logger.WithField("key", key)
|
log := c.logger.WithField("key", key)
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
|
"time"
|
||||||
|
|
||||||
jsonpatch "github.com/evanphx/json-patch"
|
jsonpatch "github.com/evanphx/json-patch"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
@ -133,6 +134,8 @@ func NewRestoreController(
|
||||||
backupLocationInformer.Informer().HasSynced,
|
backupLocationInformer.Informer().HasSynced,
|
||||||
snapshotLocationInformer.Informer().HasSynced,
|
snapshotLocationInformer.Informer().HasSynced,
|
||||||
)
|
)
|
||||||
|
c.resyncFunc = c.resync
|
||||||
|
c.resyncPeriod = time.Minute
|
||||||
|
|
||||||
restoreInformer.Informer().AddEventHandler(
|
restoreInformer.Informer().AddEventHandler(
|
||||||
cache.ResourceEventHandlerFuncs{
|
cache.ResourceEventHandlerFuncs{
|
||||||
|
@ -163,6 +166,15 @@ func NewRestoreController(
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *restoreController) resync() {
|
||||||
|
restores, err := c.restoreLister.List(labels.Everything())
|
||||||
|
if err != nil {
|
||||||
|
c.logger.Error(err, "Error computing restore_total metric")
|
||||||
|
} else {
|
||||||
|
c.metrics.SetRestoreTotal(int64(len(restores)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *restoreController) processQueueItem(key string) error {
|
func (c *restoreController) processQueueItem(key string) error {
|
||||||
log := c.logger.WithField("key", key)
|
log := c.logger.WithField("key", key)
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ type ServerMetrics struct {
|
||||||
const (
|
const (
|
||||||
metricNamespace = "velero"
|
metricNamespace = "velero"
|
||||||
backupTarballSizeBytesGauge = "backup_tarball_size_bytes"
|
backupTarballSizeBytesGauge = "backup_tarball_size_bytes"
|
||||||
|
backupTotal = "backup_total"
|
||||||
backupAttemptTotal = "backup_attempt_total"
|
backupAttemptTotal = "backup_attempt_total"
|
||||||
backupSuccessTotal = "backup_success_total"
|
backupSuccessTotal = "backup_success_total"
|
||||||
backupFailureTotal = "backup_failure_total"
|
backupFailureTotal = "backup_failure_total"
|
||||||
|
@ -37,6 +38,7 @@ const (
|
||||||
backupDeletionAttemptTotal = "backup_deletion_attempt_total"
|
backupDeletionAttemptTotal = "backup_deletion_attempt_total"
|
||||||
backupDeletionSuccessTotal = "backup_deletion_success_total"
|
backupDeletionSuccessTotal = "backup_deletion_success_total"
|
||||||
backupDeletionFailureTotal = "backup_deletion_failure_total"
|
backupDeletionFailureTotal = "backup_deletion_failure_total"
|
||||||
|
restoreTotal = "restore_total"
|
||||||
restoreAttemptTotal = "restore_attempt_total"
|
restoreAttemptTotal = "restore_attempt_total"
|
||||||
restoreValidationFailedTotal = "restore_validation_failed_total"
|
restoreValidationFailedTotal = "restore_validation_failed_total"
|
||||||
restoreSuccessTotal = "restore_success_total"
|
restoreSuccessTotal = "restore_success_total"
|
||||||
|
@ -63,6 +65,13 @@ func NewServerMetrics() *ServerMetrics {
|
||||||
},
|
},
|
||||||
[]string{scheduleLabel},
|
[]string{scheduleLabel},
|
||||||
),
|
),
|
||||||
|
backupTotal: prometheus.NewGauge(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Namespace: metricNamespace,
|
||||||
|
Name: backupTotal,
|
||||||
|
Help: "Current number of existent backups",
|
||||||
|
},
|
||||||
|
),
|
||||||
backupAttemptTotal: prometheus.NewCounterVec(
|
backupAttemptTotal: prometheus.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Namespace: metricNamespace,
|
Namespace: metricNamespace,
|
||||||
|
@ -130,6 +139,13 @@ func NewServerMetrics() *ServerMetrics {
|
||||||
},
|
},
|
||||||
[]string{scheduleLabel},
|
[]string{scheduleLabel},
|
||||||
),
|
),
|
||||||
|
restoreTotal: prometheus.NewGauge(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Namespace: metricNamespace,
|
||||||
|
Name: restoreTotal,
|
||||||
|
Help: "Current number of existent restores",
|
||||||
|
},
|
||||||
|
),
|
||||||
restoreAttemptTotal: prometheus.NewCounterVec(
|
restoreAttemptTotal: prometheus.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Namespace: metricNamespace,
|
Namespace: metricNamespace,
|
||||||
|
@ -247,6 +263,13 @@ func (m *ServerMetrics) SetBackupTarballSizeBytesGauge(backupSchedule string, si
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetBackupTotal records the current number of existent backups.
|
||||||
|
func (m *ServerMetrics) SetBackupTotal(numberOfBackups int64) {
|
||||||
|
if g, ok := m.metrics[backupTotal].(prometheus.Gauge); ok {
|
||||||
|
g.Set(float64(numberOfBackups))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// RegisterBackupAttempt records an backup attempt.
|
// RegisterBackupAttempt records an backup attempt.
|
||||||
func (m *ServerMetrics) RegisterBackupAttempt(backupSchedule string) {
|
func (m *ServerMetrics) RegisterBackupAttempt(backupSchedule string) {
|
||||||
if c, ok := m.metrics[backupAttemptTotal].(*prometheus.CounterVec); ok {
|
if c, ok := m.metrics[backupAttemptTotal].(*prometheus.CounterVec); ok {
|
||||||
|
@ -302,6 +325,13 @@ func toSeconds(d time.Duration) float64 {
|
||||||
return float64(d / time.Second)
|
return float64(d / time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetRestoreTotal records the current number of existent restores.
|
||||||
|
func (m *ServerMetrics) SetRestoreTotal(numberOfRestores int64) {
|
||||||
|
if g, ok := m.metrics[restoreTotal].(prometheus.Gauge); ok {
|
||||||
|
g.Set(float64(numberOfRestores))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// RegisterRestoreAttempt records an attempt to restore a backup.
|
// RegisterRestoreAttempt records an attempt to restore a backup.
|
||||||
func (m *ServerMetrics) RegisterRestoreAttempt(backupSchedule string) {
|
func (m *ServerMetrics) RegisterRestoreAttempt(backupSchedule string) {
|
||||||
if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {
|
if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {
|
||||||
|
|
Loading…
Reference in New Issue