Merge pull request #4818 from blackpiglet/2499-add-metric-for-csi-snapshot

Add CSI VolumeSnapshot related metrics.
pull/4838/head
qiuming 2022-04-15 16:55:37 +08:00 committed by GitHub
commit 9373c8a383
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 85 additions and 3 deletions

View File

@ -0,0 +1 @@
Add CSI VolumeSnapshot related metrics.

View File

@ -354,6 +354,14 @@ spec:
format: date-time
nullable: true
type: string
csiVolumeSnapshotsAttempted:
description: CSIVolumeSnapshotsAttempted is the total number of attempted
CSI VolumeSnapshots for this backup.
type: integer
csiVolumeSnapshotsCompleted:
description: CSIVolumeSnapshotsCompleted is the total number of successfully
completed CSI VolumeSnapshots for this backup.
type: integer
errors:
description: Errors is a count of all error messages that were generated
during execution of the backup. The actual errors are in the backup's

File diff suppressed because one or more lines are too long

View File

@ -310,6 +310,16 @@ type BackupStatus struct {
// +optional
// +nullable
Progress *BackupProgress `json:"progress,omitempty"`
// CSIVolumeSnapshotsAttempted is the total number of attempted
// CSI VolumeSnapshots for this backup.
// +optional
CSIVolumeSnapshotsAttempted int `json:"csiVolumeSnapshotsAttempted,omitempty"`
// CSIVolumeSnapshotsCompleted is the total number of successfully
// completed CSI VolumeSnapshots for this backup.
// +optional
CSIVolumeSnapshotsCompleted int `json:"csiVolumeSnapshotsCompleted,omitempty"`
}
// BackupProgress stores information about the progress of a Backup's execution.

View File

@ -20,6 +20,8 @@ import (
"fmt"
"sort"
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
"github.com/vmware-tanzu/velero/internal/hook"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/plugin/framework"
@ -48,6 +50,7 @@ type Request struct {
VolumeSnapshots []*volume.Snapshot
PodVolumeBackups []*velerov1api.PodVolumeBackup
BackedUpItems map[itemKey]struct{}
CSISnapshots []*snapshotv1api.VolumeSnapshot
}
// BackupResourceList returns the list of backed up resources grouped by the API

View File

@ -636,6 +636,13 @@ func (c *backupController) runBackup(backup *pkgbackup.Request) error {
}
}
backup.Status.CSIVolumeSnapshotsAttempted = len(backup.CSISnapshots)
for _, vs := range backup.CSISnapshots {
if *vs.Status.ReadyToUse {
backup.Status.CSIVolumeSnapshotsCompleted++
}
}
backup.Status.Warnings = logCounter.GetCount(logrus.WarnLevel)
backup.Status.Errors = logCounter.GetCount(logrus.ErrorLevel)
@ -694,6 +701,13 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac
serverMetrics.RegisterVolumeSnapshotAttempts(backupScheduleName, backup.Status.VolumeSnapshotsAttempted)
serverMetrics.RegisterVolumeSnapshotSuccesses(backupScheduleName, backup.Status.VolumeSnapshotsCompleted)
serverMetrics.RegisterVolumeSnapshotFailures(backupScheduleName, backup.Status.VolumeSnapshotsAttempted-backup.Status.VolumeSnapshotsCompleted)
if features.IsEnabled(velerov1api.CSIFeatureFlag) {
serverMetrics.RegisterCSISnapshotAttempts(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsAttempted)
serverMetrics.RegisterCSISnapshotSuccesses(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsCompleted)
serverMetrics.RegisterCSISnapshotFailures(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsAttempted-backup.Status.CSIVolumeSnapshotsCompleted)
}
if backup.Status.Progress != nil {
serverMetrics.RegisterBackupItemsTotalGauge(backupScheduleName, backup.Status.Progress.TotalItems)
}

View File

@ -54,6 +54,9 @@ const (
volumeSnapshotAttemptTotal = "volume_snapshot_attempt_total"
volumeSnapshotSuccessTotal = "volume_snapshot_success_total"
volumeSnapshotFailureTotal = "volume_snapshot_failure_total"
csiSnapshotAttemptTotal = "csi_snapshot_attempt_total"
csiSnapshotSuccessTotal = "csi_snapshot_success_total"
csiSnapshotFailureTotal = "csi_snapshot_failure_total"
// Restic metrics
podVolumeBackupEnqueueTotal = "pod_volume_backup_enqueue_count"
@ -67,8 +70,6 @@ const (
pvbNameLabel = "pod_volume_backup"
scheduleLabel = "schedule"
backupNameLabel = "backupName"
secondsInMinute = 60.0
)
// NewServerMetrics returns new ServerMetrics
@ -268,6 +269,30 @@ func NewServerMetrics() *ServerMetrics {
},
[]string{scheduleLabel},
),
csiSnapshotAttemptTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: csiSnapshotAttemptTotal,
Help: "Total number of CSI attempted volume snapshots",
},
[]string{scheduleLabel, backupNameLabel},
),
csiSnapshotSuccessTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: csiSnapshotSuccessTotal,
Help: "Total number of CSI successful volume snapshots",
},
[]string{scheduleLabel, backupNameLabel},
),
csiSnapshotFailureTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: csiSnapshotFailureTotal,
Help: "Total number of CSI failed volume snapshots",
},
[]string{scheduleLabel, backupNameLabel},
),
},
}
}
@ -593,3 +618,24 @@ func (m *ServerMetrics) RegisterVolumeSnapshotFailures(backupSchedule string, vo
c.WithLabelValues(backupSchedule).Add(float64(volumeSnapshotsFailed))
}
}
// RegisterCSISnapshotAttempts records an attempt to snapshot a volume by CSI plugin.
func (m *ServerMetrics) RegisterCSISnapshotAttempts(backupSchedule, backupName string, csiSnapshotsAttempted int) {
if c, ok := m.metrics[csiSnapshotAttemptTotal].(*prometheus.CounterVec); ok {
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsAttempted))
}
}
// RegisterCSISnapshotSuccesses records a completed volume snapshot by CSI plugin.
func (m *ServerMetrics) RegisterCSISnapshotSuccesses(backupSchedule, backupName string, csiSnapshotCompleted int) {
if c, ok := m.metrics[csiSnapshotSuccessTotal].(*prometheus.CounterVec); ok {
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotCompleted))
}
}
// RegisterCSISnapshotFailures records a failed volume snapshot by CSI plugin.
func (m *ServerMetrics) RegisterCSISnapshotFailures(backupSchedule, backupName string, csiSnapshotsFailed int) {
if c, ok := m.metrics[csiSnapshotFailureTotal].(*prometheus.CounterVec); ok {
c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsFailed))
}
}