add server setting for default timeouts

Signed-off-by: Emily McMullan <emcmulla@redhat.com>
pull/5926/head
Emily McMullan 2023-02-27 18:13:31 -05:00
parent 54042c3b01
commit ec4a7072b3
5 changed files with 29 additions and 13 deletions

View File

@ -0,0 +1 @@
Add configurable server setting for default timeouts

View File

@ -103,6 +103,8 @@ const (
defaultCSISnapshotTimeout = 10 * time.Minute
defaultItemOperationTimeout = 60 * time.Minute
resourceTimeout = 10 * time.Minute
// defaultCredentialsDirectory is the path on disk where credential
// files will be written to
defaultCredentialsDirectory = "/tmp/credentials"
@ -113,7 +115,7 @@ type serverConfig struct {
pluginDir, metricsAddress, defaultBackupLocation string
backupSyncPeriod, podVolumeOperationTimeout, resourceTerminatingTimeout time.Duration
defaultBackupTTL, storeValidationFrequency, defaultCSISnapshotTimeout time.Duration
defaultItemOperationTimeout time.Duration
defaultItemOperationTimeout, resourceTimeout time.Duration
restoreResourcePriorities restore.Priorities
defaultVolumeSnapshotLocations map[string]string
restoreOnly bool
@ -148,6 +150,7 @@ func NewCommand(f client.Factory) *cobra.Command {
defaultBackupTTL: defaultBackupTTL,
defaultCSISnapshotTimeout: defaultCSISnapshotTimeout,
defaultItemOperationTimeout: defaultItemOperationTimeout,
resourceTimeout: resourceTimeout,
storeValidationFrequency: defaultStoreValidationFrequency,
podVolumeOperationTimeout: defaultPodVolumeOperationTimeout,
restoreResourcePriorities: defaultRestorePriorities,
@ -227,6 +230,7 @@ func NewCommand(f client.Factory) *cobra.Command {
command.Flags().BoolVar(&config.defaultVolumesToFsBackup, "default-volumes-to-fs-backup", config.defaultVolumesToFsBackup, "Backup all volumes with pod volume file system backup by default.")
command.Flags().StringVar(&config.uploaderType, "uploader-type", config.uploaderType, "Type of uploader to handle the transfer of data of pod volumes")
command.Flags().DurationVar(&config.defaultItemOperationTimeout, "default-item-operation-timeout", config.defaultItemOperationTimeout, "How long to wait on asynchronous BackupItemActions and RestoreItemActions to complete before timing out.")
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
return command
}
@ -568,7 +572,7 @@ func (s *server) initRepoManager() error {
}
s.repoLocker = repository.NewRepoLocker()
s.repoEnsurer = repository.NewRepositoryEnsurer(s.mgr.GetClient(), s.logger)
s.repoEnsurer = repository.NewRepositoryEnsurer(s.mgr.GetClient(), s.logger, s.config.resourceTimeout)
s.repoManager = repository.NewManager(s.namespace, s.mgr.GetClient(), s.repoLocker, s.repoEnsurer, s.credentialFileStore, s.credentialSecretStore, s.logger)
@ -736,6 +740,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.config.defaultVolumesToFsBackup,
s.config.defaultBackupTTL,
s.config.defaultCSISnapshotTimeout,
s.config.resourceTimeout,
s.config.defaultItemOperationTimeout,
defaultVolumeSnapshotLocations,
s.metrics,
@ -863,6 +868,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
),
s.config.podVolumeOperationTimeout,
s.config.resourceTerminatingTimeout,
s.config.resourceTimeout,
s.logger,
podexec.NewPodCommandExecutor(s.kubeClientConfig, s.kubeClient.CoreV1().RESTClient()),
s.kubeClient.CoreV1().RESTClient(),

View File

@ -84,6 +84,7 @@ type backupReconciler struct {
defaultVolumesToFsBackup bool
defaultBackupTTL time.Duration
defaultCSISnapshotTimeout time.Duration
resourceTimeout time.Duration
defaultItemOperationTimeout time.Duration
defaultSnapshotLocations map[string]string
metrics *metrics.ServerMetrics
@ -107,6 +108,7 @@ func NewBackupReconciler(
defaultVolumesToFsBackup bool,
defaultBackupTTL time.Duration,
defaultCSISnapshotTimeout time.Duration,
resourceTimeout time.Duration,
defaultItemOperationTimeout time.Duration,
defaultSnapshotLocations map[string]string,
metrics *metrics.ServerMetrics,
@ -131,6 +133,7 @@ func NewBackupReconciler(
defaultVolumesToFsBackup: defaultVolumesToFsBackup,
defaultBackupTTL: defaultBackupTTL,
defaultCSISnapshotTimeout: defaultCSISnapshotTimeout,
resourceTimeout: resourceTimeout,
defaultItemOperationTimeout: defaultItemOperationTimeout,
defaultSnapshotLocations: defaultSnapshotLocations,
metrics: metrics,
@ -1057,7 +1060,7 @@ func (b *backupReconciler) deleteVolumeSnapshot(volumeSnapshots []snapshotv1api.
// Set VolumeSnapshotRef's UID to nil will let the csi-controller finds out the related VS is gone, then
// VSC can be deleted.
func (b *backupReconciler) recreateVolumeSnapshotContent(vsc snapshotv1api.VolumeSnapshotContent) error {
timeout := 1 * time.Minute
timeout := b.resourceTimeout
interval := 1 * time.Second
err := b.kbClient.Delete(context.TODO(), &vsc)

View File

@ -37,15 +37,17 @@ type RepositoryEnsurer struct {
// repoLocksMu synchronizes reads/writes to the repoLocks map itself
// since maps are not threadsafe.
repoLocksMu sync.Mutex
repoLocks map[BackupRepositoryKey]*sync.Mutex
repoLocksMu sync.Mutex
repoLocks map[BackupRepositoryKey]*sync.Mutex
resourceTimeout time.Duration
}
func NewRepositoryEnsurer(repoClient client.Client, log logrus.FieldLogger) *RepositoryEnsurer {
func NewRepositoryEnsurer(repoClient client.Client, log logrus.FieldLogger, resourceTimeout time.Duration) *RepositoryEnsurer {
return &RepositoryEnsurer{
log: log,
repoClient: repoClient,
repoLocks: make(map[BackupRepositoryKey]*sync.Mutex),
log: log,
repoClient: repoClient,
repoLocks: make(map[BackupRepositoryKey]*sync.Mutex),
resourceTimeout: resourceTimeout,
}
}
@ -124,7 +126,7 @@ func (r *RepositoryEnsurer) createBackupRepositoryAndWait(ctx context.Context, n
}
}
err := wait.PollWithContext(ctx, time.Millisecond*500, time.Minute*5, checkFunc)
err := wait.PollWithContext(ctx, time.Millisecond*500, r.resourceTimeout, checkFunc)
if err != nil {
return nil, errors.Wrap(err, "failed to wait BackupRepository")
} else {

View File

@ -96,6 +96,7 @@ type kubernetesRestorer struct {
podVolumeRestorerFactory podvolume.RestorerFactory
podVolumeTimeout time.Duration
resourceTerminatingTimeout time.Duration
resourceTimeout time.Duration
resourcePriorities Priorities
fileSystem filesystem.Interface
pvRenamer func(string) (string, error)
@ -115,6 +116,7 @@ func NewKubernetesRestorer(
podVolumeRestorerFactory podvolume.RestorerFactory,
podVolumeTimeout time.Duration,
resourceTerminatingTimeout time.Duration,
resourceTimeout time.Duration,
logger logrus.FieldLogger,
podCommandExecutor podexec.PodCommandExecutor,
podGetter cache.Getter,
@ -128,6 +130,7 @@ func NewKubernetesRestorer(
podVolumeRestorerFactory: podVolumeRestorerFactory,
podVolumeTimeout: podVolumeTimeout,
resourceTerminatingTimeout: resourceTerminatingTimeout,
resourceTimeout: resourceTimeout,
resourcePriorities: resourcePriorities,
logger: logger,
pvRenamer: func(string) (string, error) {
@ -296,6 +299,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
volumeSnapshots: req.VolumeSnapshots,
podVolumeBackups: req.PodVolumeBackups,
resourceTerminatingTimeout: kr.resourceTerminatingTimeout,
resourceTimeout: kr.resourceTimeout,
resourceClients: make(map[resourceClientKey]client.Dynamic),
restoredItems: req.RestoredItems,
renamedPVs: make(map[string]string),
@ -339,6 +343,7 @@ type restoreContext struct {
volumeSnapshots []*volume.Snapshot
podVolumeBackups []*velerov1api.PodVolumeBackup
resourceTerminatingTimeout time.Duration
resourceTimeout time.Duration
resourceClients map[resourceClientKey]client.Dynamic
restoredItems map[itemKey]string
renamedPVs map[string]string
@ -842,9 +847,8 @@ func (ctx *restoreContext) crdAvailable(name string, crdClient client.Dynamic) (
crdLogger := ctx.log.WithField("crdName", name)
var available bool
// Wait 1 minute rather than the standard resource timeout, since each CRD
// will transition fairly quickly.
err := wait.PollImmediate(time.Second, time.Minute*1, func() (bool, error) {
err := wait.PollImmediate(time.Second, ctx.resourceTimeout, func() (bool, error) {
unstructuredCRD, err := crdClient.Get(name, metav1.GetOptions{})
if err != nil {
return true, err