Merge pull request #4818 from blackpiglet/2499-add-metric-for-csi-snapshot
Add CSI VolumeSnapshot related metrics.pull/4838/head
commit
9373c8a383
|
@ -0,0 +1 @@
|
|||
Add CSI VolumeSnapshot related metrics.
|
|
@ -354,6 +354,14 @@ spec:
|
|||
format: date-time
|
||||
nullable: true
|
||||
type: string
|
||||
csiVolumeSnapshotsAttempted:
|
||||
description: CSIVolumeSnapshotsAttempted is the total number of attempted
|
||||
CSI VolumeSnapshots for this backup.
|
||||
type: integer
|
||||
csiVolumeSnapshotsCompleted:
|
||||
description: CSIVolumeSnapshotsCompleted is the total number of successfully
|
||||
completed CSI VolumeSnapshots for this backup.
|
||||
type: integer
|
||||
errors:
|
||||
description: Errors is a count of all error messages that were generated
|
||||
during execution of the backup. The actual errors are in the backup's
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -310,6 +310,16 @@ type BackupStatus struct {
|
|||
// +optional
|
||||
// +nullable
|
||||
Progress *BackupProgress `json:"progress,omitempty"`
|
||||
|
||||
// CSIVolumeSnapshotsAttempted is the total number of attempted
|
||||
// CSI VolumeSnapshots for this backup.
|
||||
// +optional
|
||||
CSIVolumeSnapshotsAttempted int `json:"csiVolumeSnapshotsAttempted,omitempty"`
|
||||
|
||||
// CSIVolumeSnapshotsCompleted is the total number of successfully
|
||||
// completed CSI VolumeSnapshots for this backup.
|
||||
// +optional
|
||||
CSIVolumeSnapshotsCompleted int `json:"csiVolumeSnapshotsCompleted,omitempty"`
|
||||
}
|
||||
|
||||
// BackupProgress stores information about the progress of a Backup's execution.
|
||||
|
|
|
@ -20,6 +20,8 @@ import (
|
|||
"fmt"
|
||||
"sort"
|
||||
|
||||
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
|
||||
|
||||
"github.com/vmware-tanzu/velero/internal/hook"
|
||||
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
|
||||
"github.com/vmware-tanzu/velero/pkg/plugin/framework"
|
||||
|
@ -48,6 +50,7 @@ type Request struct {
|
|||
VolumeSnapshots []*volume.Snapshot
|
||||
PodVolumeBackups []*velerov1api.PodVolumeBackup
|
||||
BackedUpItems map[itemKey]struct{}
|
||||
CSISnapshots []*snapshotv1api.VolumeSnapshot
|
||||
}
|
||||
|
||||
// BackupResourceList returns the list of backed up resources grouped by the API
|
||||
|
|
|
@ -636,6 +636,13 @@ func (c *backupController) runBackup(backup *pkgbackup.Request) error {
|
|||
}
|
||||
}
|
||||
|
||||
backup.Status.CSIVolumeSnapshotsAttempted = len(backup.CSISnapshots)
|
||||
for _, vs := range backup.CSISnapshots {
|
||||
if *vs.Status.ReadyToUse {
|
||||
backup.Status.CSIVolumeSnapshotsCompleted++
|
||||
}
|
||||
}
|
||||
|
||||
backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel)
|
||||
backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel)
|
||||
|
||||
|
@ -694,6 +701,13 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac
|
|||
serverMetrics.RegisterVolumeSnapshotAttempts(backupScheduleName, backup.Status.VolumeSnapshotsAttempted)
|
||||
serverMetrics.RegisterVolumeSnapshotSuccesses(backupScheduleName, backup.Status.VolumeSnapshotsCompleted)
|
||||
serverMetrics.RegisterVolumeSnapshotFailures(backupScheduleName, backup.Status.VolumeSnapshotsAttempted-backup.Status.VolumeSnapshotsCompleted)
|
||||
|
||||
if features.IsEnabled(velerov1api.CSIFeatureFlag) {
|
||||
serverMetrics.RegisterCSISnapshotAttempts(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsAttempted)
|
||||
serverMetrics.RegisterCSISnapshotSuccesses(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsCompleted)
|
||||
serverMetrics.RegisterCSISnapshotFailures(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsAttempted-backup.Status.CSIVolumeSnapshotsCompleted)
|
||||
}
|
||||
|
||||
if backup.Status.Progress != nil {
|
||||
serverMetrics.RegisterBackupItemsTotalGauge(backupScheduleName, backup.Status.Progress.TotalItems)
|
||||
}
|
||||
|
|
|
@ -54,6 +54,9 @@ const (
|
|||
volumeSnapshotAttemptTotal = "volume_snapshot_attempt_total"
|
||||
volumeSnapshotSuccessTotal = "volume_snapshot_success_total"
|
||||
volumeSnapshotFailureTotal = "volume_snapshot_failure_total"
|
||||
csiSnapshotAttemptTotal = "csi_snapshot_attempt_total"
|
||||
csiSnapshotSuccessTotal = "csi_snapshot_success_total"
|
||||
csiSnapshotFailureTotal = "csi_snapshot_failure_total"
|
||||
|
||||
// Restic metrics
|
||||
podVolumeBackupEnqueueTotal = "pod_volume_backup_enqueue_count"
|
||||
|
@ -67,8 +70,6 @@ const (
|
|||
pvbNameLabel = "pod_volume_backup"
|
||||
scheduleLabel = "schedule"
|
||||
backupNameLabel = "backupName"
|
||||
|
||||
secondsInMinute = 60.0
|
||||
)
|
||||
|
||||
// NewServerMetrics returns new ServerMetrics
|
||||
|
@ -268,6 +269,30 @@ func NewServerMetrics() *ServerMetrics {
|
|||
},
|
||||
[]string{scheduleLabel},
|
||||
),
|
||||
csiSnapshotAttemptTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: csiSnapshotAttemptTotal,
|
||||
Help: "Total number of CSI attempted volume snapshots",
|
||||
},
|
||||
[]string{scheduleLabel, backupNameLabel},
|
||||
),
|
||||
csiSnapshotSuccessTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: csiSnapshotSuccessTotal,
|
||||
Help: "Total number of CSI successful volume snapshots",
|
||||
},
|
||||
[]string{scheduleLabel, backupNameLabel},
|
||||
),
|
||||
csiSnapshotFailureTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: metricNamespace,
|
||||
Name: csiSnapshotFailureTotal,
|
||||
Help: "Total number of CSI failed volume snapshots",
|
||||
},
|
||||
[]string{scheduleLabel, backupNameLabel},
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -593,3 +618,24 @@ func (m *ServerMetrics) RegisterVolumeSnapshotFailures(backupSchedule string, vo
|
|||
c.WithLabelValues(backupSchedule).Add(float64(volumeSnapshotsFailed))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCSISnapshotAttempts records an attempt to snapshot a volume by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCSISnapshotAttempts(backupSchedule, backupName string, csiSnapshotsAttempted int) {
|
||||
if c, ok := m.metrics[csiSnapshotAttemptTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsAttempted))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCSISnapshotSuccesses records a completed volume snapshot by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCSISnapshotSuccesses(backupSchedule, backupName string, csiSnapshotCompleted int) {
|
||||
if c, ok := m.metrics[csiSnapshotSuccessTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotCompleted))
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterCSISnapshotFailures records a failed volume snapshot by CSI plugin.
|
||||
func (m *ServerMetrics) RegisterCSISnapshotFailures(backupSchedule, backupName string, csiSnapshotsFailed int) {
|
||||
if c, ok := m.metrics[csiSnapshotFailureTotal].(*prometheus.CounterVec); ok {
|
||||
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsFailed))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue