Merge pull request #5104 from blackpiglet/5048-CSI-snapshot-timeout-configurable
Make CSI snapshot creation timeout configurable for backup and schedule.pull/5203/head
commit
6951875053
|
@ -0,0 +1 @@
|
|||
Make CSI snapshot creation timeout configurable.
|
|
@ -37,6 +37,11 @@ spec:
|
|||
spec:
|
||||
description: BackupSpec defines the specification for a Velero backup.
|
||||
properties:
|
||||
csiSnapshotTimeout:
|
||||
description: CSISnapshotTimeout specifies the time used to wait for
|
||||
CSI VolumeSnapshot status turns to ReadyToUse during creation, before
|
||||
returning error as timeout. The default value is 10 minute.
|
||||
type: string
|
||||
defaultVolumesToRestic:
|
||||
description: DefaultVolumesToRestic specifies whether restic should
|
||||
be used to take a backup of all pod volumes by default.
|
||||
|
|
|
@ -61,6 +61,11 @@ spec:
|
|||
description: Template is the definition of the Backup to be run on
|
||||
the provided schedule
|
||||
properties:
|
||||
csiSnapshotTimeout:
|
||||
description: CSISnapshotTimeout specifies the time used to wait
|
||||
for CSI VolumeSnapshot status turns to ReadyToUse during creation,
|
||||
before returning error as timeout. The default value is 10 minute.
|
||||
type: string
|
||||
defaultVolumesToRestic:
|
||||
description: DefaultVolumesToRestic specifies whether restic should
|
||||
be used to take a backup of all pod volumes by default.
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -110,6 +110,12 @@ type BackupSpec struct {
|
|||
// +optional
|
||||
// +nullable
|
||||
OrderedResources map[string]string `json:"orderedResources,omitempty"`
|
||||
|
||||
// CSISnapshotTimeout specifies the time used to wait for CSI VolumeSnapshot status turns to
|
||||
// ReadyToUse during creation, before returning error as timeout.
|
||||
// The default value is 10 minute.
|
||||
// +optional
|
||||
CSISnapshotTimeout metav1.Duration `json:"csiSnapshotTimeout,omitempty"`
|
||||
}
|
||||
|
||||
// BackupHooks contains custom behaviors that should be executed at different phases of the backup.
|
||||
|
|
|
@ -344,6 +344,7 @@ func (in *BackupSpec) DeepCopyInto(out *BackupSpec) {
|
|||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
out.CSISnapshotTimeout = in.CSISnapshotTimeout
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackupSpec.
|
||||
|
|
|
@ -233,3 +233,9 @@ func (b *BackupBuilder) OrderedResources(orders map[string]string) *BackupBuilde
|
|||
b.object.Spec.OrderedResources = orders
|
||||
return b
|
||||
}
|
||||
|
||||
// CSISnapshotTimeout sets the Backup's CSISnapshotTimeout
|
||||
func (b *BackupBuilder) CSISnapshotTimeout(timeout time.Duration) *BackupBuilder {
|
||||
b.object.Spec.CSISnapshotTimeout.Duration = timeout
|
||||
return b
|
||||
}
|
||||
|
|
|
@ -98,6 +98,7 @@ type CreateOptions struct {
|
|||
SnapshotLocations []string
|
||||
FromSchedule string
|
||||
OrderedResources string
|
||||
CSISnapshotTimeout time.Duration
|
||||
|
||||
client veleroclient.Interface
|
||||
}
|
||||
|
@ -122,6 +123,7 @@ func (o *CreateOptions) BindFlags(flags *pflag.FlagSet) {
|
|||
flags.StringSliceVar(&o.SnapshotLocations, "volume-snapshot-locations", o.SnapshotLocations, "List of locations (at most one per provider) where volume snapshots should be stored.")
|
||||
flags.VarP(&o.Selector, "selector", "l", "Only back up resources matching this label selector.")
|
||||
flags.StringVar(&o.OrderedResources, "ordered-resources", "", "Mapping Kinds to an ordered list of specific resources of that Kind. Resource names are separated by commas and their names are in format 'namespace/resourcename'. For cluster scope resource, simply use resource name. Key-value pairs in the mapping are separated by semi-colon. Example: 'pods=ns1/pod1,ns1/pod2;persistentvolumeclaims=ns1/pvc4,ns1/pvc8'. Optional.")
|
||||
flags.DurationVar(&o.CSISnapshotTimeout, "csi-snapshot-timeout", o.CSISnapshotTimeout, "How long to wait for CSI snapshot creation before timeout.")
|
||||
f := flags.VarPF(&o.SnapshotVolumes, "snapshot-volumes", "", "Take snapshots of PersistentVolumes as part of the backup.")
|
||||
// this allows the user to just specify "--snapshot-volumes" as shorthand for "--snapshot-volumes=true"
|
||||
// like a normal bool flag
|
||||
|
@ -332,7 +334,8 @@ func (o *CreateOptions) BuildBackup(namespace string) (*velerov1api.Backup, erro
|
|||
LabelSelector(o.Selector.LabelSelector).
|
||||
TTL(o.TTL).
|
||||
StorageLocation(o.StorageLocation).
|
||||
VolumeSnapshotLocations(o.SnapshotLocations...)
|
||||
VolumeSnapshotLocations(o.SnapshotLocations...).
|
||||
CSISnapshotTimeout(o.CSISnapshotTimeout)
|
||||
if len(o.OrderedResources) > 0 {
|
||||
orders, err := ParseOrderedResources(o.OrderedResources)
|
||||
if err != nil {
|
||||
|
|
|
@ -19,6 +19,7 @@ package backup
|
|||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
@ -35,6 +36,7 @@ func TestCreateOptions_BuildBackup(t *testing.T) {
|
|||
o.Labels.Set("velero.io/test=true")
|
||||
o.OrderedResources = "pods=p1,p2;persistentvolumeclaims=pvc1,pvc2"
|
||||
orders, err := ParseOrderedResources(o.OrderedResources)
|
||||
o.CSISnapshotTimeout = 20 * time.Minute
|
||||
assert.NoError(t, err)
|
||||
|
||||
backup, err := o.BuildBackup(testNamespace)
|
||||
|
@ -46,6 +48,7 @@ func TestCreateOptions_BuildBackup(t *testing.T) {
|
|||
SnapshotVolumes: o.SnapshotVolumes.Value,
|
||||
IncludeClusterResources: o.IncludeClusterResources.Value,
|
||||
OrderedResources: orders,
|
||||
CSISnapshotTimeout: metav1.Duration{Duration: o.CSISnapshotTimeout},
|
||||
}, backup.Spec)
|
||||
|
||||
assert.Equal(t, map[string]string{
|
||||
|
|
|
@ -145,6 +145,7 @@ func (o *CreateOptions) Run(c *cobra.Command, f client.Factory) error {
|
|||
VolumeSnapshotLocations: o.BackupOptions.SnapshotLocations,
|
||||
DefaultVolumesToRestic: o.BackupOptions.DefaultVolumesToRestic.Value,
|
||||
OrderedResources: orders,
|
||||
CSISnapshotTimeout: metav1.Duration{Duration: o.BackupOptions.CSISnapshotTimeout},
|
||||
},
|
||||
Schedule: o.Schedule,
|
||||
UseOwnerReferencesInBackup: &o.UseOwnerReferencesInBackup,
|
||||
|
|
|
@ -103,6 +103,8 @@ const (
|
|||
// the default TTL for a backup
|
||||
defaultBackupTTL = 30 * 24 * time.Hour
|
||||
|
||||
defaultCSISnapshotTimeout = 10 * time.Minute
|
||||
|
||||
// defaultCredentialsDirectory is the path on disk where credential
|
||||
// files will be written to
|
||||
defaultCredentialsDirectory = "/tmp/credentials"
|
||||
|
@ -112,7 +114,7 @@ type serverConfig struct {
|
|||
// TODO(2.0) Deprecate defaultBackupLocation
|
||||
pluginDir, metricsAddress, defaultBackupLocation string
|
||||
backupSyncPeriod, podVolumeOperationTimeout, resourceTerminatingTimeout time.Duration
|
||||
defaultBackupTTL, storeValidationFrequency time.Duration
|
||||
defaultBackupTTL, storeValidationFrequency, defaultCSISnapshotTimeout time.Duration
|
||||
restoreResourcePriorities []string
|
||||
defaultVolumeSnapshotLocations map[string]string
|
||||
restoreOnly bool
|
||||
|
@ -143,6 +145,7 @@ func NewCommand(f client.Factory) *cobra.Command {
|
|||
defaultVolumeSnapshotLocations: make(map[string]string),
|
||||
backupSyncPeriod: defaultBackupSyncPeriod,
|
||||
defaultBackupTTL: defaultBackupTTL,
|
||||
defaultCSISnapshotTimeout: defaultCSISnapshotTimeout,
|
||||
storeValidationFrequency: defaultStoreValidationFrequency,
|
||||
podVolumeOperationTimeout: defaultPodVolumeOperationTimeout,
|
||||
restoreResourcePriorities: defaultRestorePriorities,
|
||||
|
@ -651,6 +654,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
|
|||
s.config.defaultBackupLocation,
|
||||
s.config.defaultVolumesToRestic,
|
||||
s.config.defaultBackupTTL,
|
||||
s.config.defaultCSISnapshotTimeout,
|
||||
s.sharedInformerFactory.Velero().V1().VolumeSnapshotLocations().Lister(),
|
||||
defaultVolumeSnapshotLocations,
|
||||
s.metrics,
|
||||
|
|
|
@ -88,6 +88,7 @@ type backupController struct {
|
|||
defaultBackupLocation string
|
||||
defaultVolumesToRestic bool
|
||||
defaultBackupTTL time.Duration
|
||||
defaultCSISnapshotTimeout time.Duration
|
||||
snapshotLocationLister velerov1listers.VolumeSnapshotLocationLister
|
||||
defaultSnapshotLocations map[string]string
|
||||
metrics *metrics.ServerMetrics
|
||||
|
@ -112,6 +113,7 @@ func NewBackupController(
|
|||
defaultBackupLocation string,
|
||||
defaultVolumesToRestic bool,
|
||||
defaultBackupTTL time.Duration,
|
||||
defaultCSISnapshotTimeout time.Duration,
|
||||
volumeSnapshotLocationLister velerov1listers.VolumeSnapshotLocationLister,
|
||||
defaultSnapshotLocations map[string]string,
|
||||
metrics *metrics.ServerMetrics,
|
||||
|
@ -136,6 +138,7 @@ func NewBackupController(
|
|||
defaultBackupLocation: defaultBackupLocation,
|
||||
defaultVolumesToRestic: defaultVolumesToRestic,
|
||||
defaultBackupTTL: defaultBackupTTL,
|
||||
defaultCSISnapshotTimeout: defaultCSISnapshotTimeout,
|
||||
snapshotLocationLister: volumeSnapshotLocationLister,
|
||||
defaultSnapshotLocations: defaultSnapshotLocations,
|
||||
metrics: metrics,
|
||||
|
@ -359,6 +362,11 @@ func (c *backupController) prepareBackupRequest(backup *velerov1api.Backup) *pkg
|
|||
request.Spec.TTL.Duration = c.defaultBackupTTL
|
||||
}
|
||||
|
||||
if request.Spec.CSISnapshotTimeout.Duration == 0 {
|
||||
// set default CSI VolumeSnapshot timeout
|
||||
request.Spec.CSISnapshotTimeout.Duration = c.defaultCSISnapshotTimeout
|
||||
}
|
||||
|
||||
// calculate expiration
|
||||
request.Status.Expiration = &metav1.Time{Time: c.clock.Now().Add(request.Spec.TTL.Duration)}
|
||||
|
||||
|
@ -649,7 +657,7 @@ func (c *backupController) runBackup(backup *pkgbackup.Request) error {
|
|||
backupLog.Error(err)
|
||||
}
|
||||
|
||||
err = c.checkVolumeSnapshotReadyToUse(context.Background(), volumeSnapshots)
|
||||
err = c.checkVolumeSnapshotReadyToUse(context.Background(), volumeSnapshots, backup.Spec.CSISnapshotTimeout.Duration)
|
||||
if err != nil {
|
||||
backupLog.Errorf("fail to wait VolumeSnapshot change to Ready: %s", err.Error())
|
||||
}
|
||||
|
@ -890,9 +898,10 @@ func encodeToJSONGzip(data interface{}, desc string) (*bytes.Buffer, []error) {
|
|||
// using goroutine here instead of waiting in CSI plugin, because it's not easy to make BackupItemAction
|
||||
// parallel by now. After BackupItemAction parallel is implemented, this logic should be moved to CSI plugin
|
||||
// as https://github.com/vmware-tanzu/velero-plugin-for-csi/pull/100
|
||||
func (c *backupController) checkVolumeSnapshotReadyToUse(ctx context.Context, volumesnapshots []*snapshotv1api.VolumeSnapshot) error {
|
||||
func (c *backupController) checkVolumeSnapshotReadyToUse(ctx context.Context, volumesnapshots []*snapshotv1api.VolumeSnapshot,
|
||||
csiSnapshotTimeout time.Duration) error {
|
||||
eg, _ := errgroup.WithContext(ctx)
|
||||
timeout := 10 * time.Minute
|
||||
timeout := csiSnapshotTimeout
|
||||
interval := 5 * time.Second
|
||||
|
||||
for _, vs := range volumesnapshots {
|
||||
|
|
|
@ -29,6 +29,10 @@ metadata:
|
|||
namespace: velero
|
||||
# Parameters about the backup. Required.
|
||||
spec:
|
||||
# CSISnapshotTimeout specifies the time used to wait for
|
||||
# CSI VolumeSnapshot status turns to ReadyToUse during creation, before
|
||||
# returning error as timeout. The default value is 10 minute.
|
||||
csiSnapshotTimeout: 10m
|
||||
# Array of namespaces to include in the backup. If unspecified, all namespaces are included.
|
||||
# Optional.
|
||||
includedNamespaces:
|
||||
|
|
|
@ -34,6 +34,10 @@ spec:
|
|||
schedule: 0 7 * * *
|
||||
# Template is the spec that should be used for each backup triggered by this schedule.
|
||||
template:
|
||||
# CSISnapshotTimeout specifies the time used to wait for
|
||||
# CSI VolumeSnapshot status turns to ReadyToUse during creation, before
|
||||
# returning error as timeout. The default value is 10 minute.
|
||||
csiSnapshotTimeout: 10m
|
||||
# Array of namespaces to include in the scheduled backup. If unspecified, all namespaces are included.
|
||||
# Optional.
|
||||
includedNamespaces:
|
||||
|
|
|
@ -47,10 +47,10 @@ cd velero
|
|||
kubectl apply -f examples/nginx-app/with-pv.yaml
|
||||
```
|
||||
|
||||
1. Create a backup with PV snapshotting:
|
||||
1. Create a backup with PV snapshotting. `--csi-snapshot-timeout` is used to setup time to wait before CSI snapshot creation timeout. The default value is 10 minutes:
|
||||
|
||||
```bash
|
||||
velero backup create nginx-backup --include-namespaces nginx-example
|
||||
velero backup create nginx-backup --include-namespaces nginx-example --csi-snapshot-timeout=20m
|
||||
```
|
||||
|
||||
1. Simulate a disaster:
|
||||
|
|
|
@ -29,6 +29,11 @@ metadata:
|
|||
namespace: velero
|
||||
# Parameters about the backup. Required.
|
||||
spec:
|
||||
# Available since v1.9.1.
|
||||
# CSISnapshotTimeout specifies the time used to wait for
|
||||
# CSI VolumeSnapshot status turns to ReadyToUse during creation, before
|
||||
# returning error as timeout. The default value is 10 minute.
|
||||
csiSnapshotTimeout: 10m
|
||||
# Array of namespaces to include in the backup. If unspecified, all namespaces are included.
|
||||
# Optional.
|
||||
includedNamespaces:
|
||||
|
|
|
@ -34,6 +34,11 @@ spec:
|
|||
schedule: 0 7 * * *
|
||||
# Template is the spec that should be used for each backup triggered by this schedule.
|
||||
template:
|
||||
# Available since v1.9.1.
|
||||
# CSISnapshotTimeout specifies the time used to wait for
|
||||
# CSI VolumeSnapshot status turns to ReadyToUse during creation, before
|
||||
# returning error as timeout. The default value is 10 minute.
|
||||
csiSnapshotTimeout: 10m
|
||||
# Array of namespaces to include in the scheduled backup. If unspecified, all namespaces are included.
|
||||
# Optional.
|
||||
includedNamespaces:
|
||||
|
|
Loading…
Reference in New Issue