2017-08-02 17:27:17 +00:00
|
|
|
/*
|
|
|
|
Copyright 2017 Heptio Inc.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package controller
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/sirupsen/logrus"
|
2017-08-02 17:27:17 +00:00
|
|
|
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
"k8s.io/apimachinery/pkg/util/clock"
|
|
|
|
kuberrs "k8s.io/apimachinery/pkg/util/errors"
|
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
"k8s.io/client-go/tools/cache"
|
|
|
|
"k8s.io/client-go/util/workqueue"
|
|
|
|
|
|
|
|
api "github.com/heptio/ark/pkg/apis/ark/v1"
|
|
|
|
"github.com/heptio/ark/pkg/backup"
|
|
|
|
"github.com/heptio/ark/pkg/cloudprovider"
|
2017-10-25 16:42:03 +00:00
|
|
|
"github.com/heptio/ark/pkg/generated/clientset/versioned/scheme"
|
|
|
|
arkv1client "github.com/heptio/ark/pkg/generated/clientset/versioned/typed/ark/v1"
|
2017-08-02 17:27:17 +00:00
|
|
|
informers "github.com/heptio/ark/pkg/generated/informers/externalversions/ark/v1"
|
|
|
|
listers "github.com/heptio/ark/pkg/generated/listers/ark/v1"
|
|
|
|
"github.com/heptio/ark/pkg/util/collections"
|
|
|
|
"github.com/heptio/ark/pkg/util/encode"
|
2017-09-14 21:27:31 +00:00
|
|
|
kubeutil "github.com/heptio/ark/pkg/util/kube"
|
2017-08-02 17:27:17 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const backupVersion = 1
|
|
|
|
|
|
|
|
type backupController struct {
|
2017-08-18 22:11:42 +00:00
|
|
|
backupper backup.Backupper
|
|
|
|
backupService cloudprovider.BackupService
|
|
|
|
bucket string
|
|
|
|
pvProviderExists bool
|
2017-09-14 21:27:31 +00:00
|
|
|
lister listers.BackupLister
|
|
|
|
listerSynced cache.InformerSynced
|
|
|
|
client arkv1client.BackupsGetter
|
|
|
|
syncHandler func(backupName string) error
|
|
|
|
queue workqueue.RateLimitingInterface
|
|
|
|
clock clock.Clock
|
|
|
|
logger *logrus.Logger
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewBackupController(
|
|
|
|
backupInformer informers.BackupInformer,
|
|
|
|
client arkv1client.BackupsGetter,
|
|
|
|
backupper backup.Backupper,
|
|
|
|
backupService cloudprovider.BackupService,
|
|
|
|
bucket string,
|
2017-08-18 22:11:42 +00:00
|
|
|
pvProviderExists bool,
|
2017-09-14 21:27:31 +00:00
|
|
|
logger *logrus.Logger,
|
2017-08-02 17:27:17 +00:00
|
|
|
) Interface {
|
|
|
|
c := &backupController{
|
2017-08-18 22:11:42 +00:00
|
|
|
backupper: backupper,
|
|
|
|
backupService: backupService,
|
|
|
|
bucket: bucket,
|
|
|
|
pvProviderExists: pvProviderExists,
|
2017-09-14 21:27:31 +00:00
|
|
|
lister: backupInformer.Lister(),
|
|
|
|
listerSynced: backupInformer.Informer().HasSynced,
|
|
|
|
client: client,
|
|
|
|
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "backup"),
|
|
|
|
clock: &clock.RealClock{},
|
|
|
|
logger: logger,
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
c.syncHandler = c.processBackup
|
|
|
|
|
|
|
|
backupInformer.Informer().AddEventHandler(
|
|
|
|
cache.ResourceEventHandlerFuncs{
|
|
|
|
AddFunc: func(obj interface{}) {
|
|
|
|
backup := obj.(*api.Backup)
|
|
|
|
|
|
|
|
switch backup.Status.Phase {
|
|
|
|
case "", api.BackupPhaseNew:
|
|
|
|
// only process new backups
|
|
|
|
default:
|
2017-09-14 21:27:31 +00:00
|
|
|
c.logger.WithFields(logrus.Fields{
|
|
|
|
"backup": kubeutil.NamespaceAndName(backup),
|
|
|
|
"phase": backup.Status.Phase,
|
|
|
|
}).Debug("Backup is not new, skipping")
|
2017-08-02 17:27:17 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
key, err := cache.MetaNamespaceKeyFunc(backup)
|
|
|
|
if err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
c.logger.WithError(err).WithField("backup", backup).Error("Error creating queue key, item not added to queue")
|
2017-08-02 17:27:17 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
c.queue.Add(key)
|
|
|
|
},
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run is a blocking function that runs the specified number of worker goroutines
|
|
|
|
// to process items in the work queue. It will return when it receives on the
|
|
|
|
// ctx.Done() channel.
|
|
|
|
func (controller *backupController) Run(ctx context.Context, numWorkers int) error {
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
|
|
|
defer func() {
|
2017-09-14 21:27:31 +00:00
|
|
|
controller.logger.Info("Waiting for workers to finish their work")
|
2017-08-02 17:27:17 +00:00
|
|
|
|
|
|
|
controller.queue.ShutDown()
|
|
|
|
|
|
|
|
// We have to wait here in the deferred function instead of at the bottom of the function body
|
|
|
|
// because we have to shut down the queue in order for the workers to shut down gracefully, and
|
|
|
|
// we want to shut down the queue via defer and not at the end of the body.
|
|
|
|
wg.Wait()
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
controller.logger.Info("All workers have finished")
|
|
|
|
|
2017-08-02 17:27:17 +00:00
|
|
|
}()
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
controller.logger.Info("Starting BackupController")
|
|
|
|
defer controller.logger.Info("Shutting down BackupController")
|
2017-08-02 17:27:17 +00:00
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
controller.logger.Info("Waiting for caches to sync")
|
2017-08-02 17:27:17 +00:00
|
|
|
if !cache.WaitForCacheSync(ctx.Done(), controller.listerSynced) {
|
|
|
|
return errors.New("timed out waiting for caches to sync")
|
|
|
|
}
|
2017-09-14 21:27:31 +00:00
|
|
|
controller.logger.Info("Caches are synced")
|
2017-08-02 17:27:17 +00:00
|
|
|
|
|
|
|
wg.Add(numWorkers)
|
|
|
|
for i := 0; i < numWorkers; i++ {
|
|
|
|
go func() {
|
|
|
|
wait.Until(controller.runWorker, time.Second, ctx.Done())
|
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
<-ctx.Done()
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (controller *backupController) runWorker() {
|
|
|
|
// continually take items off the queue (waits if it's
|
|
|
|
// empty) until we get a shutdown signal from the queue
|
|
|
|
for controller.processNextWorkItem() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (controller *backupController) processNextWorkItem() bool {
|
|
|
|
key, quit := controller.queue.Get()
|
|
|
|
if quit {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// always call done on this item, since if it fails we'll add
|
|
|
|
// it back with rate-limiting below
|
|
|
|
defer controller.queue.Done(key)
|
|
|
|
|
|
|
|
err := controller.syncHandler(key.(string))
|
|
|
|
if err == nil {
|
|
|
|
// If you had no error, tell the queue to stop tracking history for your key. This will reset
|
|
|
|
// things like failure counts for per-item rate limiting.
|
|
|
|
controller.queue.Forget(key)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
controller.logger.WithError(err).WithField("key", key).Error("Error in syncHandler, re-adding item to queue")
|
2017-08-02 17:27:17 +00:00
|
|
|
// we had an error processing the item so add it back
|
|
|
|
// into the queue for re-processing with rate-limiting
|
|
|
|
controller.queue.AddRateLimited(key)
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (controller *backupController) processBackup(key string) error {
|
2017-09-14 21:27:31 +00:00
|
|
|
logContext := controller.logger.WithField("key", key)
|
|
|
|
|
|
|
|
logContext.Debug("Running processBackup")
|
2017-08-02 17:27:17 +00:00
|
|
|
ns, name, err := cache.SplitMetaNamespaceKey(key)
|
|
|
|
if err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrap(err, "error splitting queue key")
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
logContext.Debug("Getting backup")
|
2017-08-02 17:27:17 +00:00
|
|
|
backup, err := controller.lister.Backups(ns).Get(name)
|
|
|
|
if err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrap(err, "error getting backup")
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO I think this is now unnecessary. We only initially place
|
|
|
|
// item with Phase = ("" | New) into the queue. Items will only get
|
|
|
|
// re-queued if syncHandler returns an error, which will only
|
|
|
|
// happen if there's an error updating Phase from its initial
|
|
|
|
// state to something else. So any time it's re-queued it will
|
|
|
|
// still have its initial state, which we've already confirmed
|
|
|
|
// is ("" | New)
|
|
|
|
switch backup.Status.Phase {
|
|
|
|
case "", api.BackupPhaseNew:
|
|
|
|
// only process new backups
|
|
|
|
default:
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
logContext.Debug("Cloning backup")
|
2017-08-02 17:27:17 +00:00
|
|
|
// don't modify items in the cache
|
|
|
|
backup, err = cloneBackup(backup)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// set backup version
|
|
|
|
backup.Status.Version = backupVersion
|
|
|
|
|
|
|
|
// calculate expiration
|
|
|
|
if backup.Spec.TTL.Duration > 0 {
|
|
|
|
backup.Status.Expiration = metav1.NewTime(controller.clock.Now().Add(backup.Spec.TTL.Duration))
|
|
|
|
}
|
|
|
|
|
|
|
|
// validation
|
|
|
|
if backup.Status.ValidationErrors = controller.getValidationErrors(backup); len(backup.Status.ValidationErrors) > 0 {
|
|
|
|
backup.Status.Phase = api.BackupPhaseFailedValidation
|
|
|
|
} else {
|
|
|
|
backup.Status.Phase = api.BackupPhaseInProgress
|
|
|
|
}
|
|
|
|
|
|
|
|
// update status
|
|
|
|
updatedBackup, err := controller.client.Backups(ns).Update(backup)
|
|
|
|
if err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrapf(err, "error updating Backup status to %s", backup.Status.Phase)
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
backup = updatedBackup
|
|
|
|
|
|
|
|
if backup.Status.Phase == api.BackupPhaseFailedValidation {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
logContext.Debug("Running backup")
|
2017-08-02 17:27:17 +00:00
|
|
|
// execution & upload of backup
|
|
|
|
if err := controller.runBackup(backup, controller.bucket); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
logContext.WithError(err).Error("backup failed")
|
2017-08-02 17:27:17 +00:00
|
|
|
backup.Status.Phase = api.BackupPhaseFailed
|
|
|
|
}
|
|
|
|
|
2017-09-14 21:27:31 +00:00
|
|
|
logContext.Debug("Updating backup's final status")
|
2017-08-02 17:27:17 +00:00
|
|
|
if _, err = controller.client.Backups(ns).Update(backup); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
logContext.WithError(err).Error("error updating backup's final status")
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func cloneBackup(in interface{}) (*api.Backup, error) {
|
|
|
|
clone, err := scheme.Scheme.DeepCopy(in)
|
|
|
|
if err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return nil, errors.Wrap(err, "error deep-copying Backup")
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out, ok := clone.(*api.Backup)
|
|
|
|
if !ok {
|
2017-09-14 21:27:31 +00:00
|
|
|
return nil, errors.Errorf("unexpected type: %T", clone)
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return out, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (controller *backupController) getValidationErrors(itm *api.Backup) []string {
|
|
|
|
var validationErrors []string
|
|
|
|
|
2017-09-05 22:06:15 +00:00
|
|
|
for _, err := range collections.ValidateIncludesExcludes(itm.Spec.IncludedResources, itm.Spec.ExcludedResources) {
|
2017-08-02 17:27:17 +00:00
|
|
|
validationErrors = append(validationErrors, fmt.Sprintf("Invalid included/excluded resource lists: %v", err))
|
|
|
|
}
|
|
|
|
|
2017-09-05 22:06:15 +00:00
|
|
|
for _, err := range collections.ValidateIncludesExcludes(itm.Spec.IncludedNamespaces, itm.Spec.ExcludedNamespaces) {
|
2017-08-02 17:27:17 +00:00
|
|
|
validationErrors = append(validationErrors, fmt.Sprintf("Invalid included/excluded namespace lists: %v", err))
|
|
|
|
}
|
|
|
|
|
2017-08-18 22:11:42 +00:00
|
|
|
if !controller.pvProviderExists && itm.Spec.SnapshotVolumes != nil && *itm.Spec.SnapshotVolumes {
|
2017-08-09 22:52:27 +00:00
|
|
|
validationErrors = append(validationErrors, "Server is not configured for PV snapshots")
|
|
|
|
}
|
|
|
|
|
2017-08-02 17:27:17 +00:00
|
|
|
return validationErrors
|
|
|
|
}
|
|
|
|
|
|
|
|
func (controller *backupController) runBackup(backup *api.Backup, bucket string) error {
|
|
|
|
backupFile, err := ioutil.TempFile("", "")
|
|
|
|
if err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrap(err, "error creating temp file for Backup")
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
2017-08-11 15:05:56 +00:00
|
|
|
|
|
|
|
logFile, err := ioutil.TempFile("", "")
|
|
|
|
if err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrap(err, "error creating temp file for Backup log")
|
2017-08-11 15:05:56 +00:00
|
|
|
}
|
|
|
|
|
2017-08-02 17:27:17 +00:00
|
|
|
defer func() {
|
|
|
|
var errs []error
|
2017-09-14 21:27:31 +00:00
|
|
|
// TODO should this be wrapped?
|
2017-08-02 17:27:17 +00:00
|
|
|
errs = append(errs, err)
|
|
|
|
|
2017-08-11 15:05:56 +00:00
|
|
|
if err := backupFile.Close(); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
errs = append(errs, errors.Wrap(err, "error closing Backup temp file"))
|
2017-08-11 15:05:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := os.Remove(backupFile.Name()); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
errs = append(errs, errors.Wrap(err, "error removing Backup temp file"))
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
2017-08-11 15:05:56 +00:00
|
|
|
if err := logFile.Close(); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
errs = append(errs, errors.Wrap(err, "error closing Backup log temp file"))
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
2017-08-11 15:05:56 +00:00
|
|
|
|
|
|
|
if err := os.Remove(logFile.Name()); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
errs = append(errs, errors.Wrap(err, "error removing Backup log temp file"))
|
2017-08-11 15:05:56 +00:00
|
|
|
}
|
|
|
|
|
2017-08-02 17:27:17 +00:00
|
|
|
err = kuberrs.NewAggregate(errs)
|
|
|
|
}()
|
|
|
|
|
2017-08-11 15:05:56 +00:00
|
|
|
if err := controller.backupper.Backup(backup, backupFile, logFile); err != nil {
|
2017-08-02 17:27:17 +00:00
|
|
|
return err
|
|
|
|
}
|
2017-09-14 21:27:31 +00:00
|
|
|
controller.logger.WithField("backup", kubeutil.NamespaceAndName(backup)).Info("backup completed")
|
2017-08-02 17:27:17 +00:00
|
|
|
|
|
|
|
// note: updating this here so the uploaded JSON shows "completed". If
|
|
|
|
// the upload fails, we'll alter the phase in the calling func.
|
|
|
|
backup.Status.Phase = api.BackupPhaseCompleted
|
|
|
|
|
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
if err := encode.EncodeTo(backup, "json", buf); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrap(err, "error encoding Backup")
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
2017-08-11 15:05:56 +00:00
|
|
|
// re-set the files' offset to 0 for reading
|
|
|
|
if _, err = backupFile.Seek(0, 0); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrap(err, "error resetting Backup file offset")
|
2017-08-11 15:05:56 +00:00
|
|
|
}
|
|
|
|
if _, err = logFile.Seek(0, 0); err != nil {
|
2017-09-14 21:27:31 +00:00
|
|
|
return errors.Wrap(err, "error resetting Backup log file offset")
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|
|
|
|
|
2017-08-11 15:05:56 +00:00
|
|
|
return controller.backupService.UploadBackup(bucket, backup.Name, bytes.NewReader(buf.Bytes()), backupFile, logFile)
|
2017-08-02 17:27:17 +00:00
|
|
|
}
|