211 lines
8.0 KiB
Go
211 lines
8.0 KiB
Go
/*
|
|
Copyright 2017 Heptio Inc.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package controller
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"time"
|
|
|
|
"github.com/golang/glog"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/apimachinery/pkg/util/clock"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
"k8s.io/client-go/tools/cache"
|
|
|
|
api "github.com/heptio/ark/pkg/apis/ark/v1"
|
|
"github.com/heptio/ark/pkg/cloudprovider"
|
|
arkv1client "github.com/heptio/ark/pkg/generated/clientset/typed/ark/v1"
|
|
informers "github.com/heptio/ark/pkg/generated/informers/externalversions/ark/v1"
|
|
listers "github.com/heptio/ark/pkg/generated/listers/ark/v1"
|
|
"github.com/heptio/ark/pkg/util/kube"
|
|
)
|
|
|
|
// gcController removes expired backup content from object storage.
|
|
type gcController struct {
|
|
backupService cloudprovider.BackupService
|
|
snapshotService cloudprovider.SnapshotService
|
|
bucket string
|
|
syncPeriod time.Duration
|
|
clock clock.Clock
|
|
backupLister listers.BackupLister
|
|
backupListerSynced cache.InformerSynced
|
|
backupClient arkv1client.BackupsGetter
|
|
restoreLister listers.RestoreLister
|
|
restoreListerSynced cache.InformerSynced
|
|
restoreClient arkv1client.RestoresGetter
|
|
}
|
|
|
|
// NewGCController constructs a new gcController.
|
|
func NewGCController(
|
|
backupService cloudprovider.BackupService,
|
|
snapshotService cloudprovider.SnapshotService,
|
|
bucket string,
|
|
syncPeriod time.Duration,
|
|
backupInformer informers.BackupInformer,
|
|
backupClient arkv1client.BackupsGetter,
|
|
restoreInformer informers.RestoreInformer,
|
|
restoreClient arkv1client.RestoresGetter,
|
|
) Interface {
|
|
if syncPeriod < time.Minute {
|
|
glog.Infof("GC sync period %v is too short. Setting to 1 minute", syncPeriod)
|
|
syncPeriod = time.Minute
|
|
}
|
|
|
|
return &gcController{
|
|
backupService: backupService,
|
|
snapshotService: snapshotService,
|
|
bucket: bucket,
|
|
syncPeriod: syncPeriod,
|
|
clock: clock.RealClock{},
|
|
backupLister: backupInformer.Lister(),
|
|
backupListerSynced: backupInformer.Informer().HasSynced,
|
|
backupClient: backupClient,
|
|
restoreLister: restoreInformer.Lister(),
|
|
restoreListerSynced: restoreInformer.Informer().HasSynced,
|
|
restoreClient: restoreClient,
|
|
}
|
|
}
|
|
|
|
var _ Interface = &gcController{}
|
|
|
|
// Run is a blocking function that runs a single worker to garbage-collect backups
|
|
// from object/block storage and the Ark API. It will return when it receives on the
|
|
// ctx.Done() channel.
|
|
func (c *gcController) Run(ctx context.Context, workers int) error {
|
|
glog.Info("Waiting for caches to sync")
|
|
if !cache.WaitForCacheSync(ctx.Done(), c.backupListerSynced, c.restoreListerSynced) {
|
|
return errors.New("timed out waiting for caches to sync")
|
|
}
|
|
glog.Info("Caches are synced")
|
|
|
|
wait.Until(c.run, c.syncPeriod, ctx.Done())
|
|
return nil
|
|
}
|
|
|
|
func (c *gcController) run() {
|
|
c.processBackups()
|
|
}
|
|
|
|
// garbageCollectBackup removes an expired backup by deleting any associated backup files (if
|
|
// deleteBackupFile = true), volume snapshots, restore API objects, and the backup API object
|
|
// itself.
|
|
func (c *gcController) garbageCollectBackup(backup *api.Backup, deleteBackupFile bool) {
|
|
// if the backup includes snapshots but we don't currently have a PVProvider, we don't
|
|
// want to orphan the snapshots so skip garbage-collection entirely.
|
|
if c.snapshotService == nil && len(backup.Status.VolumeBackups) > 0 {
|
|
glog.Warningf("Cannot garbage-collect backup %s because backup includes snapshots and server is not configured with PersistentVolumeProvider",
|
|
kube.NamespaceAndName(backup))
|
|
return
|
|
}
|
|
|
|
// The GC process is primarily intended to delete expired cloud resources (file in object
|
|
// storage, snapshots). If we fail to delete any of these, we don't delete the Backup API
|
|
// object or metadata file in object storage so that we don't orphan the cloud resources.
|
|
deletionFailure := false
|
|
|
|
for _, volumeBackup := range backup.Status.VolumeBackups {
|
|
glog.Infof("Removing snapshot %s associated with backup %s", volumeBackup.SnapshotID, kube.NamespaceAndName(backup))
|
|
if err := c.snapshotService.DeleteSnapshot(volumeBackup.SnapshotID); err != nil {
|
|
glog.Errorf("error deleting snapshot %v: %v", volumeBackup.SnapshotID, err)
|
|
deletionFailure = true
|
|
}
|
|
}
|
|
|
|
// If applicable, delete backup & metadata file from object storage *before* deleting the API object
|
|
// because otherwise the backup sync controller could re-sync the backup from object storage.
|
|
if deleteBackupFile {
|
|
glog.Infof("Removing backup %s", kube.NamespaceAndName(backup))
|
|
if err := c.backupService.DeleteBackupFile(c.bucket, backup.Name); err != nil {
|
|
glog.Errorf("error deleting backup %s: %v", kube.NamespaceAndName(backup), err)
|
|
deletionFailure = true
|
|
}
|
|
|
|
if deletionFailure {
|
|
glog.Warningf("Backup %s will not be deleted due to errors deleting related object storage files(s) and/or volume snapshots", kube.NamespaceAndName(backup))
|
|
} else {
|
|
if err := c.backupService.DeleteBackupMetadataFile(c.bucket, backup.Name); err != nil {
|
|
glog.Errorf("error deleting backup metadata file for %s: %v", kube.NamespaceAndName(backup), err)
|
|
deletionFailure = true
|
|
}
|
|
}
|
|
}
|
|
|
|
glog.Infof("Getting restore API objects referencing backup %s", kube.NamespaceAndName(backup))
|
|
if restores, err := c.restoreLister.Restores(backup.Namespace).List(labels.Everything()); err != nil {
|
|
glog.Errorf("error getting restore API objects: %v", err)
|
|
} else {
|
|
for _, restore := range restores {
|
|
if restore.Spec.BackupName == backup.Name {
|
|
glog.Infof("Removing restore API object %s of backup %s", kube.NamespaceAndName(restore), kube.NamespaceAndName(backup))
|
|
if err := c.restoreClient.Restores(restore.Namespace).Delete(restore.Name, &metav1.DeleteOptions{}); err != nil {
|
|
glog.Errorf("error deleting restore API object %s: %v", kube.NamespaceAndName(restore), err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if deletionFailure {
|
|
glog.Warningf("Backup %s will not be deleted due to errors deleting related object storage files(s) and/or volume snapshots", kube.NamespaceAndName(backup))
|
|
return
|
|
}
|
|
|
|
glog.Infof("Removing backup API object %s", kube.NamespaceAndName(backup))
|
|
if err := c.backupClient.Backups(backup.Namespace).Delete(backup.Name, &metav1.DeleteOptions{}); err != nil {
|
|
glog.Errorf("error deleting backup API object %s: %v", kube.NamespaceAndName(backup), err)
|
|
}
|
|
}
|
|
|
|
// garbageCollectBackups checks backups for expiration and triggers garbage-collection for the expired
|
|
// ones.
|
|
func (c *gcController) garbageCollectBackups(backups []*api.Backup, expiration time.Time, deleteBackupFiles bool) {
|
|
for _, backup := range backups {
|
|
if backup.Status.Expiration.Time.After(expiration) {
|
|
glog.Infof("Backup %s has not expired yet, skipping", kube.NamespaceAndName(backup))
|
|
continue
|
|
}
|
|
|
|
c.garbageCollectBackup(backup, deleteBackupFiles)
|
|
}
|
|
}
|
|
|
|
// processBackups gets backups from object storage and the API and submits
|
|
// them for garbage-collection.
|
|
func (c *gcController) processBackups() {
|
|
now := c.clock.Now()
|
|
glog.Infof("garbage-collecting backups that have expired as of %v", now)
|
|
|
|
// GC backups in object storage. We do this in addition
|
|
// to GC'ing API objects to prevent orphan backup files.
|
|
backups, err := c.backupService.GetAllBackups(c.bucket)
|
|
if err != nil {
|
|
glog.Errorf("error getting all backups from object storage: %v", err)
|
|
return
|
|
}
|
|
c.garbageCollectBackups(backups, now, true)
|
|
|
|
// GC backups without files in object storage
|
|
apiBackups, err := c.backupLister.List(labels.Everything())
|
|
if err != nil {
|
|
glog.Errorf("error getting all backup API objects: %v", err)
|
|
return
|
|
}
|
|
c.garbageCollectBackups(apiBackups, now, false)
|
|
}
|