Merge pull request #4295 from mercedes-benz/tobiasgiese/items-metrics

Add metrics backup_items_total and backup_items_errors
pull/4778/head
Scott Seago 2022-03-25 16:40:43 -04:00 committed by GitHub
commit 8e4f88db68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 3 deletions

View File

@ -0,0 +1 @@
Add metrics backup_items_total and backup_items_errors

View File

@ -636,15 +636,15 @@ func (c *backupController) runBackup(backup *pkgbackup.Request) error {
}
}
backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel)
backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel)
recordBackupMetrics(backupLog, backup.Backup, backupFile, c.metrics)
if err := gzippedLogFile.Close(); err != nil {
c.logger.WithField(Backup, kubeutil.NamespaceAndName(backup)).WithError(err).Error("error closing gzippedLogFile")
}
backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel)
backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel)
// Assign finalize phase as close to end as possible so that any errors
// logged to backupLog are captured. This is done before uploading the
// artifacts to object storage so that the JSON representation of the
@ -694,6 +694,10 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac
serverMetrics.RegisterVolumeSnapshotAttempts(backupScheduleName, backup.Status.VolumeSnapshotsAttempted)
serverMetrics.RegisterVolumeSnapshotSuccesses(backupScheduleName, backup.Status.VolumeSnapshotsCompleted)
serverMetrics.RegisterVolumeSnapshotFailures(backupScheduleName, backup.Status.VolumeSnapshotsAttempted-backup.Status.VolumeSnapshotsCompleted)
if backup.Status.Progress != nil {
serverMetrics.RegisterBackupItemsTotalGauge(backupScheduleName, backup.Status.Progress.TotalItems)
}
serverMetrics.RegisterBackupItemsErrorsGauge(backupScheduleName, backup.Status.Errors)
}
func persistBackup(backup *pkgbackup.Request,

View File

@ -43,6 +43,8 @@ const (
backupDeletionSuccessTotal = "backup_deletion_success_total"
backupDeletionFailureTotal = "backup_deletion_failure_total"
backupLastSuccessfulTimestamp = "backup_last_successful_timestamp"
backupItemsTotalGauge = "backup_items_total"
backupItemsErrorsGauge = "backup_items_errors"
restoreTotal = "restore_total"
restoreAttemptTotal = "restore_attempt_total"
restoreValidationFailedTotal = "restore_validation_failed_total"
@ -179,6 +181,22 @@ func NewServerMetrics() *ServerMetrics {
},
[]string{scheduleLabel},
),
backupItemsTotalGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: backupItemsTotalGauge,
Help: "Total number of items backed up",
},
[]string{scheduleLabel},
),
backupItemsErrorsGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: backupItemsErrorsGauge,
Help: "Total number of errors encountered during backup",
},
[]string{scheduleLabel},
),
restoreTotal: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: metricNamespace,
@ -337,6 +355,12 @@ func (m *ServerMetrics) InitSchedule(scheduleName string) {
if c, ok := m.metrics[backupDeletionFailureTotal].(*prometheus.CounterVec); ok {
c.WithLabelValues(scheduleName).Add(0)
}
if c, ok := m.metrics[backupItemsTotalGauge].(*prometheus.GaugeVec); ok {
c.WithLabelValues(scheduleName).Add(0)
}
if c, ok := m.metrics[backupItemsErrorsGauge].(*prometheus.GaugeVec); ok {
c.WithLabelValues(scheduleName).Add(0)
}
if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {
c.WithLabelValues(scheduleName).Add(0)
}
@ -486,6 +510,21 @@ func (m *ServerMetrics) RegisterBackupDeletionSuccess(backupSchedule string) {
}
}
// RegisterBackupItemsTotalGauge records the number of items to be backed up.
func (m *ServerMetrics) RegisterBackupItemsTotalGauge(backupSchedule string, items int) {
if c, ok := m.metrics[backupItemsTotalGauge].(*prometheus.GaugeVec); ok {
c.WithLabelValues(backupSchedule).Set(float64(items))
}
}
// RegisterBackupItemsErrorsGauge records the number of all error messages that were generated during
// execution of the backup.
func (m *ServerMetrics) RegisterBackupItemsErrorsGauge(backupSchedule string, items int) {
if c, ok := m.metrics[backupItemsErrorsGauge].(*prometheus.GaugeVec); ok {
c.WithLabelValues(backupSchedule).Set(float64(items))
}
}
// toSeconds translates a time.Duration value into a float64
// representing the number of seconds in that duration.
func toSeconds(d time.Duration) float64 {