2017-08-02 17:27:17 +00:00
/ *
2018-01-02 18:51:49 +00:00
Copyright 2017 the Heptio Ark contributors .
2017-08-02 17:27:17 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package controller
import (
"bytes"
2018-05-13 13:28:09 +00:00
"compress/gzip"
2017-12-11 22:10:52 +00:00
"encoding/json"
2017-08-02 17:27:17 +00:00
"fmt"
2017-12-19 17:16:39 +00:00
"io"
2017-08-02 17:27:17 +00:00
"io/ioutil"
"os"
"time"
2018-05-14 21:34:24 +00:00
jsonpatch "github.com/evanphx/json-patch"
2017-09-14 21:27:31 +00:00
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
2017-08-02 17:27:17 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2017-12-11 22:10:52 +00:00
"k8s.io/apimachinery/pkg/types"
2017-08-02 17:27:17 +00:00
"k8s.io/apimachinery/pkg/util/clock"
2017-12-19 17:16:39 +00:00
kerrors "k8s.io/apimachinery/pkg/util/errors"
2017-08-02 17:27:17 +00:00
"k8s.io/client-go/tools/cache"
api "github.com/heptio/ark/pkg/apis/ark/v1"
"github.com/heptio/ark/pkg/backup"
2017-10-25 16:42:03 +00:00
arkv1client "github.com/heptio/ark/pkg/generated/clientset/versioned/typed/ark/v1"
2017-08-02 17:27:17 +00:00
informers "github.com/heptio/ark/pkg/generated/informers/externalversions/ark/v1"
listers "github.com/heptio/ark/pkg/generated/listers/ark/v1"
2018-06-06 21:35:06 +00:00
"github.com/heptio/ark/pkg/metrics"
2018-08-20 18:47:16 +00:00
"github.com/heptio/ark/pkg/persistence"
2017-11-15 02:35:02 +00:00
"github.com/heptio/ark/pkg/plugin"
2017-08-02 17:27:17 +00:00
"github.com/heptio/ark/pkg/util/collections"
"github.com/heptio/ark/pkg/util/encode"
2017-09-14 21:27:31 +00:00
kubeutil "github.com/heptio/ark/pkg/util/kube"
2018-05-13 13:28:09 +00:00
"github.com/heptio/ark/pkg/util/logging"
2017-08-02 17:27:17 +00:00
)
const backupVersion = 1
type backupController struct {
2018-08-29 19:52:09 +00:00
* genericController
2018-09-25 14:51:28 +00:00
backupper backup . Backupper
pvProviderExists bool
lister listers . BackupLister
client arkv1client . BackupsGetter
clock clock . Clock
backupLogLevel logrus . Level
newPluginManager func ( logrus . FieldLogger ) plugin . Manager
backupTracker BackupTracker
backupLocationLister listers . BackupStorageLocationLister
defaultBackupLocation string
snapshotLocationLister listers . VolumeSnapshotLocationLister
defaultSnapshotLocations map [ string ] string
metrics * metrics . ServerMetrics
newBackupStore func ( * api . BackupStorageLocation , persistence . ObjectStoreGetter , logrus . FieldLogger ) ( persistence . BackupStore , error )
2017-08-02 17:27:17 +00:00
}
func NewBackupController (
backupInformer informers . BackupInformer ,
client arkv1client . BackupsGetter ,
backupper backup . Backupper ,
2017-08-18 22:11:42 +00:00
pvProviderExists bool ,
2017-12-11 22:10:52 +00:00
logger logrus . FieldLogger ,
2018-08-29 19:52:09 +00:00
backupLogLevel logrus . Level ,
2018-08-25 19:53:56 +00:00
newPluginManager func ( logrus . FieldLogger ) plugin . Manager ,
2018-04-06 17:08:39 +00:00
backupTracker BackupTracker ,
2018-08-16 22:41:59 +00:00
backupLocationInformer informers . BackupStorageLocationInformer ,
defaultBackupLocation string ,
2018-09-25 14:51:28 +00:00
volumeSnapshotLocationInformer informers . VolumeSnapshotLocationInformer ,
defaultSnapshotLocations map [ string ] string ,
2018-06-06 21:35:06 +00:00
metrics * metrics . ServerMetrics ,
2017-08-02 17:27:17 +00:00
) Interface {
c := & backupController {
2018-09-25 14:51:28 +00:00
genericController : newGenericController ( "backup" , logger ) ,
backupper : backupper ,
pvProviderExists : pvProviderExists ,
lister : backupInformer . Lister ( ) ,
client : client ,
clock : & clock . RealClock { } ,
backupLogLevel : backupLogLevel ,
newPluginManager : newPluginManager ,
backupTracker : backupTracker ,
backupLocationLister : backupLocationInformer . Lister ( ) ,
defaultBackupLocation : defaultBackupLocation ,
snapshotLocationLister : volumeSnapshotLocationInformer . Lister ( ) ,
defaultSnapshotLocations : defaultSnapshotLocations ,
metrics : metrics ,
2018-08-20 23:29:54 +00:00
newBackupStore : persistence . NewObjectBackupStore ,
2017-08-02 17:27:17 +00:00
}
c . syncHandler = c . processBackup
2018-08-29 19:52:09 +00:00
c . cacheSyncWaiters = append ( c . cacheSyncWaiters ,
backupInformer . Informer ( ) . HasSynced ,
backupLocationInformer . Informer ( ) . HasSynced ,
2018-09-25 14:51:28 +00:00
volumeSnapshotLocationInformer . Informer ( ) . HasSynced ,
2018-08-29 19:52:09 +00:00
)
2017-08-02 17:27:17 +00:00
backupInformer . Informer ( ) . AddEventHandler (
cache . ResourceEventHandlerFuncs {
AddFunc : func ( obj interface { } ) {
backup := obj . ( * api . Backup )
switch backup . Status . Phase {
case "" , api . BackupPhaseNew :
// only process new backups
default :
2017-09-14 21:27:31 +00:00
c . logger . WithFields ( logrus . Fields {
"backup" : kubeutil . NamespaceAndName ( backup ) ,
"phase" : backup . Status . Phase ,
} ) . Debug ( "Backup is not new, skipping" )
2017-08-02 17:27:17 +00:00
return
}
key , err := cache . MetaNamespaceKeyFunc ( backup )
if err != nil {
2017-09-14 21:27:31 +00:00
c . logger . WithError ( err ) . WithField ( "backup" , backup ) . Error ( "Error creating queue key, item not added to queue" )
2017-08-02 17:27:17 +00:00
return
}
c . queue . Add ( key )
} ,
} ,
)
return c
}
2018-08-29 19:52:09 +00:00
func ( c * backupController ) processBackup ( key string ) error {
log := c . logger . WithField ( "key" , key )
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running processBackup" )
2017-08-02 17:27:17 +00:00
ns , name , err := cache . SplitMetaNamespaceKey ( key )
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrap ( err , "error splitting queue key" )
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Getting backup" )
backup , err := c . lister . Backups ( ns ) . Get ( name )
2017-08-02 17:27:17 +00:00
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrap ( err , "error getting backup" )
2017-08-02 17:27:17 +00:00
}
2017-12-19 17:16:39 +00:00
// Double-check we have the correct phase. In the unlikely event that multiple controller
// instances are running, it's possible for controller A to succeed in changing the phase to
// InProgress, while controller B's attempt to patch the phase fails. When controller B
// reprocesses the same backup, it will either show up as New (informer hasn't seen the update
// yet) or as InProgress. In the former case, the patch attempt will fail again, until the
// informer sees the update. In the latter case, after the informer has seen the update to
// InProgress, we still need this check so we can return nil to indicate we've finished processing
// this key (even though it was a no-op).
2017-08-02 17:27:17 +00:00
switch backup . Status . Phase {
case "" , api . BackupPhaseNew :
// only process new backups
default :
return nil
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Cloning backup" )
2017-12-11 22:10:52 +00:00
// store ref to original for creating patch
original := backup
2017-08-02 17:27:17 +00:00
// don't modify items in the cache
2017-10-25 16:57:40 +00:00
backup = backup . DeepCopy ( )
2017-08-02 17:27:17 +00:00
// set backup version
backup . Status . Version = backupVersion
// calculate expiration
if backup . Spec . TTL . Duration > 0 {
2018-08-29 19:52:09 +00:00
backup . Status . Expiration = metav1 . NewTime ( c . clock . Now ( ) . Add ( backup . Spec . TTL . Duration ) )
2017-08-02 17:27:17 +00:00
}
2018-09-25 14:51:28 +00:00
backupLocation , errs := c . getLocationAndValidate ( backup , c . defaultBackupLocation )
errs = append ( errs , c . defaultAndValidateSnapshotLocations ( backup , c . defaultSnapshotLocations ) ... )
backup . Status . ValidationErrors = append ( backup . Status . ValidationErrors , errs ... )
if len ( backup . Status . ValidationErrors ) > 0 {
2017-08-02 17:27:17 +00:00
backup . Status . Phase = api . BackupPhaseFailedValidation
} else {
backup . Status . Phase = api . BackupPhaseInProgress
}
// update status
2018-08-29 19:52:09 +00:00
updatedBackup , err := patchBackup ( original , backup , c . client )
2017-08-02 17:27:17 +00:00
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrapf ( err , "error updating Backup status to %s" , backup . Status . Phase )
2017-08-02 17:27:17 +00:00
}
2017-12-11 22:10:52 +00:00
// store ref to just-updated item for creating patch
original = updatedBackup
backup = updatedBackup . DeepCopy ( )
2017-08-02 17:27:17 +00:00
if backup . Status . Phase == api . BackupPhaseFailedValidation {
return nil
}
2018-08-29 19:52:09 +00:00
c . backupTracker . Add ( backup . Namespace , backup . Name )
defer c . backupTracker . Delete ( backup . Namespace , backup . Name )
2018-04-06 17:08:39 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running backup" )
2017-08-02 17:27:17 +00:00
// execution & upload of backup
2018-06-06 21:35:06 +00:00
backupScheduleName := backup . GetLabels ( ) [ "ark-schedule" ]
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupAttempt ( backupScheduleName )
2018-06-06 21:35:06 +00:00
2018-08-29 19:52:09 +00:00
if err := c . runBackup ( backup , backupLocation ) ; err != nil {
log . WithError ( err ) . Error ( "backup failed" )
2017-08-02 17:27:17 +00:00
backup . Status . Phase = api . BackupPhaseFailed
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupFailed ( backupScheduleName )
2018-06-06 21:35:06 +00:00
} else {
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupSuccess ( backupScheduleName )
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Updating backup's final status" )
if _ , err := patchBackup ( original , backup , c . client ) ; err != nil {
log . WithError ( err ) . Error ( "error updating backup's final status" )
2017-08-02 17:27:17 +00:00
}
return nil
}
2017-12-11 22:10:52 +00:00
func patchBackup ( original , updated * api . Backup , client arkv1client . BackupsGetter ) ( * api . Backup , error ) {
origBytes , err := json . Marshal ( original )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling original backup" )
}
updatedBytes , err := json . Marshal ( updated )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling updated backup" )
}
2018-05-14 21:34:24 +00:00
patchBytes , err := jsonpatch . CreateMergePatch ( origBytes , updatedBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
2018-05-14 21:34:24 +00:00
return nil , errors . Wrap ( err , "error creating json merge patch for backup" )
2017-12-11 22:10:52 +00:00
}
2017-12-22 14:43:44 +00:00
res , err := client . Backups ( original . Namespace ) . Patch ( original . Name , types . MergePatchType , patchBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
return nil , errors . Wrap ( err , "error patching backup" )
}
return res , nil
}
2018-08-29 19:52:09 +00:00
func ( c * backupController ) getLocationAndValidate ( itm * api . Backup , defaultBackupLocation string ) ( * api . BackupStorageLocation , [ ] string ) {
2017-08-02 17:27:17 +00:00
var validationErrors [ ] string
2017-09-05 22:06:15 +00:00
for _ , err := range collections . ValidateIncludesExcludes ( itm . Spec . IncludedResources , itm . Spec . ExcludedResources ) {
2017-08-02 17:27:17 +00:00
validationErrors = append ( validationErrors , fmt . Sprintf ( "Invalid included/excluded resource lists: %v" , err ) )
}
2017-09-05 22:06:15 +00:00
for _ , err := range collections . ValidateIncludesExcludes ( itm . Spec . IncludedNamespaces , itm . Spec . ExcludedNamespaces ) {
2017-08-02 17:27:17 +00:00
validationErrors = append ( validationErrors , fmt . Sprintf ( "Invalid included/excluded namespace lists: %v" , err ) )
}
2018-08-16 22:41:59 +00:00
if itm . Spec . StorageLocation == "" {
itm . Spec . StorageLocation = defaultBackupLocation
}
2018-08-21 23:52:49 +00:00
// add the storage location as a label for easy filtering later.
if itm . Labels == nil {
itm . Labels = make ( map [ string ] string )
}
itm . Labels [ api . StorageLocationLabel ] = itm . Spec . StorageLocation
2018-08-16 22:41:59 +00:00
var backupLocation * api . BackupStorageLocation
2018-08-29 19:52:09 +00:00
backupLocation , err := c . backupLocationLister . BackupStorageLocations ( itm . Namespace ) . Get ( itm . Spec . StorageLocation )
2018-08-16 22:41:59 +00:00
if err != nil {
validationErrors = append ( validationErrors , fmt . Sprintf ( "Error getting backup storage location: %v" , err ) )
}
return backupLocation , validationErrors
2017-08-02 17:27:17 +00:00
}
2018-09-25 14:51:28 +00:00
// defaultAndValidateSnapshotLocations ensures:
// - each location name in Spec VolumeSnapshotLocation exists as a location
// - exactly 1 location per existing or default provider
// - a given default provider's location name is added to the Spec VolumeSnapshotLocation if it does not exist as a VSL
func ( c * backupController ) defaultAndValidateSnapshotLocations ( itm * api . Backup , defaultLocations map [ string ] string ) [ ] string {
var errors [ ] string
perProviderLocationName := make ( map [ string ] string )
var finalLocationNameList [ ] string
for _ , locationName := range itm . Spec . VolumeSnapshotLocations {
// validate each locationName exists as a VolumeSnapshotLocation
location , err := c . snapshotLocationLister . VolumeSnapshotLocations ( itm . Namespace ) . Get ( locationName )
if err != nil {
errors = append ( errors , fmt . Sprintf ( "error getting volume snapshot location named %s: %v" , locationName , err ) )
continue
}
// ensure we end up with exactly 1 locationName *per provider*
providerLocationName := perProviderLocationName [ location . Spec . Provider ]
if providerLocationName != "" {
// if > 1 location name per provider as in ["aws-us-east-1" | "aws-us-west-1"] (same provider, multiple names)
if providerLocationName != locationName {
errors = append ( errors , fmt . Sprintf ( "more than one VolumeSnapshotLocation name specified for provider %s: %s; unexpected name was %s" , location . Spec . Provider , locationName , providerLocationName ) )
continue
}
} else {
// no dup exists: add locationName to the final list
finalLocationNameList = append ( finalLocationNameList , locationName )
// keep track of all valid existing locations, per provider
perProviderLocationName [ location . Spec . Provider ] = locationName
}
}
if len ( errors ) > 0 {
return errors
}
for provider , defaultLocationName := range defaultLocations {
// if a location name for a given provider does not already exist, add the provider's default
if _ , ok := perProviderLocationName [ provider ] ; ! ok {
finalLocationNameList = append ( finalLocationNameList , defaultLocationName )
}
}
itm . Spec . VolumeSnapshotLocations = finalLocationNameList
return nil
}
2018-08-29 19:52:09 +00:00
func ( c * backupController ) runBackup ( backup * api . Backup , backupLocation * api . BackupStorageLocation ) error {
log := c . logger . WithField ( "backup" , kubeutil . NamespaceAndName ( backup ) )
2017-12-19 17:16:39 +00:00
log . Info ( "Starting backup" )
2018-08-29 19:52:09 +00:00
backup . Status . StartTimestamp . Time = c . clock . Now ( )
2017-08-11 15:05:56 +00:00
logFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
2017-12-19 17:16:39 +00:00
return errors . Wrap ( err , "error creating temp file for backup log" )
2017-08-11 15:05:56 +00:00
}
2018-05-13 13:28:09 +00:00
gzippedLogFile := gzip . NewWriter ( logFile )
// Assuming we successfully uploaded the log file, this will have already been closed below. It is safe to call
// close multiple times. If we get an error closing this, there's not really anything we can do about it.
defer gzippedLogFile . Close ( )
2018-08-29 19:52:09 +00:00
defer closeAndRemoveFile ( logFile , c . logger )
2018-05-13 13:28:09 +00:00
// Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the
// backup log failed for whatever reason.
2018-08-29 19:52:09 +00:00
logger := logging . DefaultLogger ( c . backupLogLevel )
2018-05-13 13:28:09 +00:00
logger . Out = io . MultiWriter ( os . Stdout , gzippedLogFile )
log = logger . WithField ( "backup" , kubeutil . NamespaceAndName ( backup ) )
log . Info ( "Starting backup" )
2017-12-19 17:16:39 +00:00
backupFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
return errors . Wrap ( err , "error creating temp file for backup" )
}
defer closeAndRemoveFile ( backupFile , log )
2017-08-02 17:27:17 +00:00
2018-08-29 19:52:09 +00:00
pluginManager := c . newPluginManager ( log )
2018-08-20 23:29:54 +00:00
defer pluginManager . CleanupClients ( )
2018-05-13 13:28:09 +00:00
actions , err := pluginManager . GetBackupItemActions ( )
if err != nil {
return err
}
2018-08-29 19:52:09 +00:00
backupStore , err := c . newBackupStore ( backupLocation , pluginManager , log )
2017-11-15 02:35:02 +00:00
if err != nil {
return err
}
2017-12-19 17:16:39 +00:00
var errs [ ] error
2017-11-15 02:35:02 +00:00
2018-06-06 21:35:06 +00:00
var backupJSONToUpload , backupFileToUpload io . Reader
2017-12-19 17:16:39 +00:00
// Do the actual backup
2018-08-29 19:52:09 +00:00
if err := c . backupper . Backup ( log , backup , backupFile , actions ) ; err != nil {
2017-12-19 17:16:39 +00:00
errs = append ( errs , err )
2017-11-15 02:35:02 +00:00
2017-12-19 17:16:39 +00:00
backup . Status . Phase = api . BackupPhaseFailed
} else {
backup . Status . Phase = api . BackupPhaseCompleted
}
2017-08-02 17:27:17 +00:00
2018-07-10 22:17:53 +00:00
// Mark completion timestamp before serializing and uploading.
// Otherwise, the JSON file in object storage has a CompletionTimestamp of 'null'.
2018-08-29 19:52:09 +00:00
backup . Status . CompletionTimestamp . Time = c . clock . Now ( )
2018-07-10 22:17:53 +00:00
2018-06-06 21:35:06 +00:00
backupJSON := new ( bytes . Buffer )
if err := encode . EncodeTo ( backup , "json" , backupJSON ) ; err != nil {
2017-12-19 17:16:39 +00:00
errs = append ( errs , errors . Wrap ( err , "error encoding backup" ) )
} else {
// Only upload the json and backup tarball if encoding to json succeeded.
2018-06-06 21:35:06 +00:00
backupJSONToUpload = backupJSON
2017-12-19 17:16:39 +00:00
backupFileToUpload = backupFile
}
2017-08-02 17:27:17 +00:00
2018-06-06 21:35:06 +00:00
var backupSizeBytes int64
if backupFileStat , err := backupFile . Stat ( ) ; err != nil {
errs = append ( errs , errors . Wrap ( err , "error getting file info" ) )
} else {
backupSizeBytes = backupFileStat . Size ( )
}
2018-05-13 13:28:09 +00:00
if err := gzippedLogFile . Close ( ) ; err != nil {
2018-08-29 19:52:09 +00:00
c . logger . WithError ( err ) . Error ( "error closing gzippedLogFile" )
2018-05-13 13:28:09 +00:00
}
2018-08-20 23:29:54 +00:00
if err := backupStore . PutBackup ( backup . Name , backupJSONToUpload , backupFileToUpload , logFile ) ; err != nil {
2017-12-19 17:16:39 +00:00
errs = append ( errs , err )
2017-08-02 17:27:17 +00:00
}
2018-06-06 21:35:06 +00:00
backupScheduleName := backup . GetLabels ( ) [ "ark-schedule" ]
2018-08-29 19:52:09 +00:00
c . metrics . SetBackupTarballSizeBytesGauge ( backupScheduleName , backupSizeBytes )
2018-06-06 21:35:06 +00:00
2018-06-20 18:08:07 +00:00
backupDuration := backup . Status . CompletionTimestamp . Time . Sub ( backup . Status . StartTimestamp . Time )
backupDurationSeconds := float64 ( backupDuration / time . Second )
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupDuration ( backupScheduleName , backupDurationSeconds )
2018-06-20 18:08:07 +00:00
2017-12-19 17:16:39 +00:00
log . Info ( "Backup completed" )
return kerrors . NewAggregate ( errs )
}
func closeAndRemoveFile ( file * os . File , log logrus . FieldLogger ) {
if err := file . Close ( ) ; err != nil {
log . WithError ( err ) . WithField ( "file" , file . Name ( ) ) . Error ( "error closing file" )
2017-08-11 15:05:56 +00:00
}
2017-12-19 17:16:39 +00:00
if err := os . Remove ( file . Name ( ) ) ; err != nil {
log . WithError ( err ) . WithField ( "file" , file . Name ( ) ) . Error ( "error removing file" )
2017-08-02 17:27:17 +00:00
}
}