Added ability to disable controllers (#1326)

Signed-off-by: James King <james.king@emc.com>
pull/1362/head
Aman Wangde 2019-04-10 15:41:28 -04:00 committed by Nolan Brubaker
parent 23c0d3f612
commit 0e0f357cef
2 changed files with 198 additions and 112 deletions

View File

@ -0,0 +1 @@
Added ability to dynamically disable controllers

View File

@ -84,10 +84,35 @@ const (
defaultProfilerAddress = "localhost:6060"
// keys used to map out available controllers with disable-controllers flag
BackupControllerKey = "backup"
BackupSyncControllerKey = "backup-sync"
ScheduleControllerKey = "schedule"
GcControllerKey = "gc"
BackupDeletionControllerKey = "backup-deletion"
RestoreControllerKey = "restore"
DownloadRequestControllerKey = "download-request"
ResticRepoControllerKey = "restic-repo"
ServerStatusRequestControllerKey = "server-status-request"
defaultControllerWorkers = 1
// the default TTL for a backup
defaultBackupTTL = 30 * 24 * time.Hour
)
// list of available controllers for input validation
var disableControllerList = []string{
BackupControllerKey,
BackupSyncControllerKey,
ScheduleControllerKey,
GcControllerKey,
BackupDeletionControllerKey,
RestoreControllerKey,
DownloadRequestControllerKey,
ResticRepoControllerKey,
ServerStatusRequestControllerKey,
}
type serverConfig struct {
pluginDir, metricsAddress, defaultBackupLocation string
backupSyncPeriod, podVolumeOperationTimeout, resourceTerminatingTimeout time.Duration
@ -95,11 +120,17 @@ type serverConfig struct {
restoreResourcePriorities []string
defaultVolumeSnapshotLocations map[string]string
restoreOnly bool
disabledControllers []string
clientQPS float32
clientBurst int
profilerAddress string
}
type controllerRunInfo struct {
controller controller.Interface
numWorkers int
}
func NewCommand() *cobra.Command {
var (
volumeSnapshotLocations = flag.NewMap().WithKeyValueDelimiter(":")
@ -169,6 +200,7 @@ func NewCommand() *cobra.Command {
command.Flags().DurationVar(&config.backupSyncPeriod, "backup-sync-period", config.backupSyncPeriod, "how often to ensure all Velero backups in object storage exist as Backup API objects in the cluster")
command.Flags().DurationVar(&config.podVolumeOperationTimeout, "restic-timeout", config.podVolumeOperationTimeout, "how long backups/restores of pod volumes should be allowed to run before timing out")
command.Flags().BoolVar(&config.restoreOnly, "restore-only", config.restoreOnly, "run in a mode where only restores are allowed; backups, schedules, and garbage-collection are all disabled")
command.Flags().StringSliceVar(&config.disabledControllers, "disable-controllers", config.disabledControllers, fmt.Sprintf("list of controllers to disable on startup. Valid values are %s", strings.Join(disableControllerList, ",")))
command.Flags().StringSliceVar(&config.restoreResourcePriorities, "restore-resource-priorities", config.restoreResourcePriorities, "desired order of resource restores; any resource not in the list will be restored alphabetically after the prioritized resources")
command.Flags().StringVar(&config.defaultBackupLocation, "default-backup-storage-location", config.defaultBackupLocation, "name of the default backup storage location")
command.Flags().Var(&volumeSnapshotLocations, "default-volume-snapshot-locations", "list of unique volume providers and default volume snapshot location (provider1:location-01,provider2:location-02,...)")
@ -530,28 +562,28 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
return clientmgmt.NewManager(logger, s.logLevel, s.pluginRegistry)
}
backupSyncController := controller.NewBackupSyncController(
s.veleroClient.VeleroV1(),
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().Backups(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.config.backupSyncPeriod,
s.namespace,
s.config.defaultBackupLocation,
newPluginManager,
s.logger,
)
wg.Add(1)
go func() {
backupSyncController.Run(ctx, 1)
wg.Done()
}()
backupSyncControllerRunInfo := func() controllerRunInfo {
backupSyncContoller := controller.NewBackupSyncController(
s.veleroClient.VeleroV1(),
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().Backups(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.config.backupSyncPeriod,
s.namespace,
s.config.defaultBackupLocation,
newPluginManager,
s.logger,
)
if s.config.restoreOnly {
s.logger.Info("Restore only mode - not starting the backup, schedule, delete-backup, or GC controllers")
} else {
backupTracker := controller.NewBackupTracker()
return controllerRunInfo{
controller: backupSyncContoller,
numWorkers: defaultControllerWorkers,
}
}
backupTracker := controller.NewBackupTracker()
backupControllerRunInfo := func() controllerRunInfo {
backupper, err := backup.NewKubernetesBackupper(
s.discoveryHelper,
client.NewDynamicFactory(s.dynamicClient),
@ -576,12 +608,14 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
defaultVolumeSnapshotLocations,
s.metrics,
)
wg.Add(1)
go func() {
backupController.Run(ctx, 1)
wg.Done()
}()
return controllerRunInfo{
controller: backupController,
numWorkers: defaultControllerWorkers,
}
}
scheduleControllerRunInfo := func() controllerRunInfo {
scheduleController := controller.NewScheduleController(
s.namespace,
s.veleroClient.VeleroV1(),
@ -590,25 +624,29 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.logger,
s.metrics,
)
wg.Add(1)
go func() {
scheduleController.Run(ctx, 1)
wg.Done()
}()
return controllerRunInfo{
controller: scheduleController,
numWorkers: defaultControllerWorkers,
}
}
gcControllerRunInfo := func() controllerRunInfo {
gcController := controller.NewGCController(
s.logger,
s.sharedInformerFactory.Velero().V1().Backups(),
s.sharedInformerFactory.Velero().V1().DeleteBackupRequests(),
s.veleroClient.VeleroV1(),
)
wg.Add(1)
go func() {
gcController.Run(ctx, 1)
wg.Done()
}()
backupDeletionController := controller.NewBackupDeletionController(
return controllerRunInfo{
controller: gcController,
numWorkers: defaultControllerWorkers,
}
}
deletionControllerRunInfo := func() controllerRunInfo {
deletionController := controller.NewBackupDeletionController(
s.logger,
s.sharedInformerFactory.Velero().V1().DeleteBackupRequests(),
s.veleroClient.VeleroV1(), // deleteBackupRequestClient
@ -623,88 +661,135 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
newPluginManager,
s.metrics,
)
return controllerRunInfo{
controller: deletionController,
numWorkers: defaultControllerWorkers,
}
}
restoreControllerRunInfo := func() controllerRunInfo {
restorer, err := restore.NewKubernetesRestorer(
s.discoveryHelper,
client.NewDynamicFactory(s.dynamicClient),
s.config.restoreResourcePriorities,
s.kubeClient.CoreV1().Namespaces(),
s.resticManager,
s.config.podVolumeOperationTimeout,
s.config.resourceTerminatingTimeout,
s.logger,
)
cmd.CheckError(err)
restoreController := controller.NewRestoreController(
s.namespace,
s.sharedInformerFactory.Velero().V1().Restores(),
s.veleroClient.VeleroV1(),
s.veleroClient.VeleroV1(),
restorer,
s.sharedInformerFactory.Velero().V1().Backups(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.sharedInformerFactory.Velero().V1().VolumeSnapshotLocations(),
s.logger,
s.logLevel,
newPluginManager,
s.config.defaultBackupLocation,
s.metrics,
)
return controllerRunInfo{
controller: restoreController,
numWorkers: defaultControllerWorkers,
}
}
resticRepoControllerRunInfo := func() controllerRunInfo {
resticRepoController := controller.NewResticRepositoryController(
s.logger,
s.sharedInformerFactory.Velero().V1().ResticRepositories(),
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.resticManager,
)
return controllerRunInfo{
controller: resticRepoController,
numWorkers: defaultControllerWorkers,
}
}
downloadrequestControllerRunInfo := func() controllerRunInfo {
downloadRequestController := controller.NewDownloadRequestController(
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().DownloadRequests(),
s.sharedInformerFactory.Velero().V1().Restores(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.sharedInformerFactory.Velero().V1().Backups(),
newPluginManager,
s.logger,
)
return controllerRunInfo{
controller: downloadRequestController,
numWorkers: defaultControllerWorkers,
}
}
serverStatusRequestControllerRunInfo := func() controllerRunInfo {
serverStatusRequestController := controller.NewServerStatusRequestController(
s.logger,
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().ServerStatusRequests(),
)
return controllerRunInfo{
controller: serverStatusRequestController,
numWorkers: defaultControllerWorkers,
}
}
enabledControllers := map[string]func() controllerRunInfo{
BackupSyncControllerKey: backupSyncControllerRunInfo,
BackupControllerKey: backupControllerRunInfo,
ScheduleControllerKey: scheduleControllerRunInfo,
GcControllerKey: gcControllerRunInfo,
BackupDeletionControllerKey: deletionControllerRunInfo,
RestoreControllerKey: restoreControllerRunInfo,
ResticRepoControllerKey: resticRepoControllerRunInfo,
DownloadRequestControllerKey: downloadrequestControllerRunInfo,
ServerStatusRequestControllerKey: serverStatusRequestControllerRunInfo,
}
if s.config.restoreOnly {
s.logger.Info("Restore only mode - not starting the backup, schedule, delete-backup, or GC controllers")
s.config.disabledControllers = append(s.config.disabledControllers,
BackupControllerKey,
ScheduleControllerKey,
GcControllerKey,
BackupDeletionControllerKey,
)
}
// remove disabled controllers
for _, controllerName := range s.config.disabledControllers {
if _, ok := enabledControllers[controllerName]; ok {
s.logger.Infof("Disabling controller: %s", controllerName)
delete(enabledControllers, controllerName)
} else {
s.logger.Fatalf("Invalid value for --disable-controllers flag provided: %s. Valid values are: %s", controllerName, strings.Join(disableControllerList, ","))
}
}
for _, newController := range enabledControllers {
controllerRunInfo := newController()
wg.Add(1)
go func() {
backupDeletionController.Run(ctx, 1)
controllerRunInfo.controller.Run(ctx, controllerRunInfo.numWorkers)
wg.Done()
}()
}
restorer, err := restore.NewKubernetesRestorer(
s.discoveryHelper,
client.NewDynamicFactory(s.dynamicClient),
s.config.restoreResourcePriorities,
s.kubeClient.CoreV1().Namespaces(),
s.resticManager,
s.config.podVolumeOperationTimeout,
s.config.resourceTerminatingTimeout,
s.logger,
)
cmd.CheckError(err)
restoreController := controller.NewRestoreController(
s.namespace,
s.sharedInformerFactory.Velero().V1().Restores(),
s.veleroClient.VeleroV1(),
s.veleroClient.VeleroV1(),
restorer,
s.sharedInformerFactory.Velero().V1().Backups(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.sharedInformerFactory.Velero().V1().VolumeSnapshotLocations(),
s.logger,
s.logLevel,
newPluginManager,
s.config.defaultBackupLocation,
s.metrics,
)
wg.Add(1)
go func() {
restoreController.Run(ctx, 1)
wg.Done()
}()
downloadRequestController := controller.NewDownloadRequestController(
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().DownloadRequests(),
s.sharedInformerFactory.Velero().V1().Restores(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.sharedInformerFactory.Velero().V1().Backups(),
newPluginManager,
s.logger,
)
wg.Add(1)
go func() {
downloadRequestController.Run(ctx, 1)
wg.Done()
}()
resticRepoController := controller.NewResticRepositoryController(
s.logger,
s.sharedInformerFactory.Velero().V1().ResticRepositories(),
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().BackupStorageLocations(),
s.resticManager,
)
wg.Add(1)
go func() {
// TODO only having a single worker may be an issue since maintenance
// can take a long time.
resticRepoController.Run(ctx, 1)
wg.Done()
}()
serverStatusRequestController := controller.NewServerStatusRequestController(
s.logger,
s.veleroClient.VeleroV1(),
s.sharedInformerFactory.Velero().V1().ServerStatusRequests(),
)
wg.Add(1)
go func() {
serverStatusRequestController.Run(ctx, 1)
wg.Done()
}()
// SHARED INFORMERS HAVE TO BE STARTED AFTER ALL CONTROLLERS
go s.sharedInformerFactory.Start(ctx.Done())