Merge pull request #5779 from allenxu404/issue-matrics
add prometheus metrics to record warning total and last status of backupspull/5812/head
commit
51568525cb
|
@ -0,0 +1,3 @@
|
|||
Add File system backup related matrics to Grafana dashboard
|
||||
Add metrics backup_warning_total for record of total warnings
|
||||
Add metrics backup_last_status for record of last status of the backup
|
|
@ -303,12 +303,16 @@ func (c *backupController) processBackup(key string) error {
|
|||
switch request.Status.Phase {
|
||||
case velerov1api.BackupPhaseCompleted:
|
||||
c.metrics.RegisterBackupSuccess(backupScheduleName)
|
||||
c.metrics.RegisterBackupLastStatus(backupScheduleName, metrics.BackupLastStatusSucc)
|
||||
case velerov1api.BackupPhasePartiallyFailed:
|
||||
c.metrics.RegisterBackupPartialFailure(backupScheduleName)
|
||||
c.metrics.RegisterBackupLastStatus(backupScheduleName, metrics.BackupLastStatusFailure)
|
||||
case velerov1api.BackupPhaseFailed:
|
||||
c.metrics.RegisterBackupFailed(backupScheduleName)
|
||||
c.metrics.RegisterBackupLastStatus(backupScheduleName, metrics.BackupLastStatusFailure)
|
||||
case velerov1api.BackupPhaseFailedValidation:
|
||||
c.metrics.RegisterBackupValidationFailure(backupScheduleName)
|
||||
c.metrics.RegisterBackupLastStatus(backupScheduleName, metrics.BackupLastStatusFailure)
|
||||
}
|
||||
|
||||
log.Debug("Updating backup's final status")
|
||||
|
@ -789,6 +793,10 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac
|
|||
serverMetrics.RegisterBackupItemsTotalGauge(backupScheduleName, backup.Status.Progress.TotalItems)
|
||||
}
|
||||
serverMetrics.RegisterBackupItemsErrorsGauge(backupScheduleName, backup.Status.Errors)
|
||||
|
||||
if backup.Status.Warnings > 0 {
|
||||
serverMetrics.RegisterBackupWarning(backupScheduleName)
|
||||
}
|
||||
}
|
||||
|
||||
func persistBackup(backup *pkgbackup.Request,
|
||||
|
|
|
@ -45,6 +45,8 @@ const (
|
|||
backupLastSuccessfulTimestamp = "backup_last_successful_timestamp"
|
||||
backupItemsTotalGauge = "backup_items_total"
|
||||
backupItemsErrorsGauge = "backup_items_errors"
|
||||
backupWarningTotal = "backup_warning_total"
|
||||
backupLastStatus = "backup_last_status"
|
||||
restoreTotal = "restore_total"
|
||||
restoreAttemptTotal = "restore_attempt_total"
|
||||
restoreValidationFailedTotal = "restore_validation_failed_total"
|
||||
|
@ -70,6 +72,10 @@ const (
|
|||
pvbNameLabel = "pod_volume_backup"
|
||||
scheduleLabel = "schedule"
|
||||
backupNameLabel = "backupName"
|
||||
|
||||
// metrics values
|
||||
BackupLastStatusSucc int64 = 1
|
||||
BackupLastStatusFailure int64 = 0
|
||||
)
|
||||
|
||||
// NewServerMetrics returns new ServerMetrics
|
||||
|
@ -198,6 +204,22 @@ func NewServerMetrics() *ServerMetrics {
|
|||
},
|
||||
[]string{scheduleLabel},
|
||||
),
|
||||
backupWarningTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: backupWarningTotal,
|
||||
Help: "Total number of warned backups",
|
||||
},
|
||||
[]string{scheduleLabel},
|
||||
),
|
||||
backupLastStatus: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: backupLastStatus,
|
||||
Help: "Last status of the backup. A value of 1 is success, 0 is failure",
|
||||
},
|
||||
[]string{scheduleLabel},
|
||||
),
|
||||
restoreTotal: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricNamespace,
|
||||
|
@ -386,6 +408,12 @@ func (m *ServerMetrics) InitSchedule(scheduleName string) {
|
|||
if c, ok := m.metrics[backupItemsErrorsGauge].(*prometheus.GaugeVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
if c, ok := m.metrics[backupWarningTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
if c, ok := m.metrics[backupLastStatus].(*prometheus.GaugeVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(scheduleName).Add(0)
|
||||
}
|
||||
|
@ -559,6 +587,20 @@ func (m *ServerMetrics) RegisterBackupItemsErrorsGauge(backupSchedule string, it
|
|||
}
|
||||
}
|
||||
|
||||
// RegisterBackupWarning records a warned backup.
|
||||
func (m *ServerMetrics) RegisterBackupWarning(backupSchedule string) {
|
||||
if c, ok := m.metrics[backupWarningTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterBackupLastStatus records the last status of the backup.
|
||||
func (m *ServerMetrics) RegisterBackupLastStatus(backupSchedule string, lastStatus int64) {
|
||||
if g, ok := m.metrics[backupLastStatus].(*prometheus.GaugeVec); ok {
|
||||
g.WithLabelValues(backupSchedule).Set(float64(lastStatus))
|
||||
}
|
||||
}
|
||||
|
||||
// toSeconds translates a time.Duration value into a float64
|
||||
// representing the number of seconds in that duration.
|
||||
func toSeconds(d time.Duration) float64 {
|
||||
|
|
Loading…
Reference in New Issue