2017-08-02 17:27:17 +00:00
/ *
2018-01-02 18:51:49 +00:00
Copyright 2017 the Heptio Ark contributors .
2017-08-02 17:27:17 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package controller
import (
2017-10-26 15:24:16 +00:00
"compress/gzip"
"encoding/json"
2017-08-02 17:27:17 +00:00
"fmt"
"io"
"io/ioutil"
"os"
2018-04-20 18:02:59 +00:00
"sort"
2017-08-02 17:27:17 +00:00
2018-05-14 21:34:24 +00:00
jsonpatch "github.com/evanphx/json-patch"
2017-09-14 21:27:31 +00:00
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
2017-08-02 17:27:17 +00:00
2017-08-25 22:02:00 +00:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2018-04-20 18:02:59 +00:00
"k8s.io/apimachinery/pkg/labels"
2017-12-11 22:10:52 +00:00
"k8s.io/apimachinery/pkg/types"
2017-09-19 03:44:33 +00:00
"k8s.io/apimachinery/pkg/util/sets"
2017-08-02 17:27:17 +00:00
"k8s.io/client-go/tools/cache"
api "github.com/heptio/ark/pkg/apis/ark/v1"
2017-10-25 16:42:03 +00:00
arkv1client "github.com/heptio/ark/pkg/generated/clientset/versioned/typed/ark/v1"
2017-08-02 17:27:17 +00:00
informers "github.com/heptio/ark/pkg/generated/informers/externalversions/ark/v1"
listers "github.com/heptio/ark/pkg/generated/listers/ark/v1"
2018-06-25 18:15:46 +00:00
"github.com/heptio/ark/pkg/metrics"
2018-08-20 18:47:16 +00:00
"github.com/heptio/ark/pkg/persistence"
2017-11-21 17:24:43 +00:00
"github.com/heptio/ark/pkg/plugin"
2017-08-02 17:27:17 +00:00
"github.com/heptio/ark/pkg/restore"
2018-04-20 18:02:59 +00:00
"github.com/heptio/ark/pkg/util/boolptr"
2017-08-30 02:14:21 +00:00
"github.com/heptio/ark/pkg/util/collections"
2017-09-14 21:27:31 +00:00
kubeutil "github.com/heptio/ark/pkg/util/kube"
2018-05-13 13:28:09 +00:00
"github.com/heptio/ark/pkg/util/logging"
2017-08-02 17:27:17 +00:00
)
2017-09-19 03:44:33 +00:00
// nonRestorableResources is a blacklist for the restoration process. Any resources
// included here are explicitly excluded from the restoration process.
2018-06-28 20:56:39 +00:00
var nonRestorableResources = [ ] string {
"nodes" ,
"events" ,
"events.events.k8s.io" ,
// Don't ever restore backups - if appropriate, they'll be synced in from object storage.
// https://github.com/heptio/ark/issues/622
"backups.ark.heptio.com" ,
// Restores are cluster-specific, and don't have value moving across clusters.
// https://github.com/heptio/ark/issues/622
"restores.ark.heptio.com" ,
}
2017-09-19 03:44:33 +00:00
2017-08-02 17:27:17 +00:00
type restoreController struct {
2018-08-29 19:52:09 +00:00
* genericController
namespace string
restoreClient arkv1client . RestoresGetter
backupClient arkv1client . BackupsGetter
restorer restore . Restorer
pvProviderExists bool
backupLister listers . BackupLister
restoreLister listers . RestoreLister
backupLocationLister listers . BackupStorageLocationLister
restoreLogLevel logrus . Level
defaultBackupLocation string
metrics * metrics . ServerMetrics
2018-05-13 13:28:09 +00:00
2018-08-20 23:29:54 +00:00
newPluginManager func ( logger logrus . FieldLogger ) plugin . Manager
newBackupStore func ( * api . BackupStorageLocation , persistence . ObjectStoreGetter , logrus . FieldLogger ) ( persistence . BackupStore , error )
2017-08-02 17:27:17 +00:00
}
func NewRestoreController (
2017-12-22 14:43:44 +00:00
namespace string ,
2017-08-02 17:27:17 +00:00
restoreInformer informers . RestoreInformer ,
restoreClient arkv1client . RestoresGetter ,
backupClient arkv1client . BackupsGetter ,
restorer restore . Restorer ,
backupInformer informers . BackupInformer ,
2018-08-14 14:28:11 +00:00
backupLocationInformer informers . BackupStorageLocationInformer ,
2017-08-18 22:11:42 +00:00
pvProviderExists bool ,
2017-11-21 17:24:43 +00:00
logger logrus . FieldLogger ,
2018-08-29 19:52:09 +00:00
restoreLogLevel logrus . Level ,
2018-08-25 19:53:56 +00:00
newPluginManager func ( logrus . FieldLogger ) plugin . Manager ,
2018-08-14 14:28:11 +00:00
defaultBackupLocation string ,
2018-06-25 18:15:46 +00:00
metrics * metrics . ServerMetrics ,
2017-08-02 17:27:17 +00:00
) Interface {
c := & restoreController {
2018-08-29 19:52:09 +00:00
genericController : newGenericController ( "restore" , logger ) ,
namespace : namespace ,
restoreClient : restoreClient ,
backupClient : backupClient ,
restorer : restorer ,
pvProviderExists : pvProviderExists ,
backupLister : backupInformer . Lister ( ) ,
restoreLister : restoreInformer . Lister ( ) ,
backupLocationLister : backupLocationInformer . Lister ( ) ,
restoreLogLevel : restoreLogLevel ,
2018-08-14 14:28:11 +00:00
defaultBackupLocation : defaultBackupLocation ,
metrics : metrics ,
2018-05-13 13:28:09 +00:00
2018-08-25 19:53:56 +00:00
// use variables to refer to these functions so they can be
// replaced with fakes for testing.
2018-08-20 23:29:54 +00:00
newPluginManager : newPluginManager ,
newBackupStore : persistence . NewObjectBackupStore ,
2017-08-02 17:27:17 +00:00
}
c . syncHandler = c . processRestore
2018-08-29 19:52:09 +00:00
c . cacheSyncWaiters = append ( c . cacheSyncWaiters ,
backupInformer . Informer ( ) . HasSynced ,
restoreInformer . Informer ( ) . HasSynced ,
backupLocationInformer . Informer ( ) . HasSynced ,
)
2017-08-02 17:27:17 +00:00
restoreInformer . Informer ( ) . AddEventHandler (
cache . ResourceEventHandlerFuncs {
AddFunc : func ( obj interface { } ) {
restore := obj . ( * api . Restore )
switch restore . Status . Phase {
case "" , api . RestorePhaseNew :
// only process new restores
default :
2017-09-14 21:27:31 +00:00
c . logger . WithFields ( logrus . Fields {
"restore" : kubeutil . NamespaceAndName ( restore ) ,
"phase" : restore . Status . Phase ,
} ) . Debug ( "Restore is not new, skipping" )
2017-08-02 17:27:17 +00:00
return
}
key , err := cache . MetaNamespaceKeyFunc ( restore )
if err != nil {
2017-09-14 21:27:31 +00:00
c . logger . WithError ( errors . WithStack ( err ) ) . WithField ( "restore" , restore ) . Error ( "Error creating queue key, item not added to queue" )
2017-08-02 17:27:17 +00:00
return
}
c . queue . Add ( key )
} ,
} ,
)
return c
}
2018-07-09 18:02:41 +00:00
func ( c * restoreController ) processRestore ( key string ) error {
2018-08-29 19:52:09 +00:00
log := c . logger . WithField ( "key" , key )
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running processRestore" )
2017-08-02 17:27:17 +00:00
ns , name , err := cache . SplitMetaNamespaceKey ( key )
if err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( err ) . Error ( "unable to process restore: error splitting queue key" )
2018-05-13 13:28:09 +00:00
// Return nil here so we don't try to process the key any more
return nil
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Getting Restore" )
2018-07-09 18:02:41 +00:00
restore , err := c . restoreLister . Restores ( ns ) . Get ( name )
2017-08-02 17:27:17 +00:00
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrap ( err , "error getting Restore" )
2017-08-02 17:27:17 +00:00
}
// TODO I think this is now unnecessary. We only initially place
// item with Phase = ("" | New) into the queue. Items will only get
// re-queued if syncHandler returns an error, which will only
// happen if there's an error updating Phase from its initial
// state to something else. So any time it's re-queued it will
// still have its initial state, which we've already confirmed
// is ("" | New)
switch restore . Status . Phase {
case "" , api . RestorePhaseNew :
// only process new restores
default :
return nil
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Cloning Restore" )
2017-12-11 22:10:52 +00:00
// store ref to original for creating patch
original := restore
2017-08-02 17:27:17 +00:00
// don't modify items in the cache
2017-10-25 16:57:40 +00:00
restore = restore . DeepCopy ( )
2017-08-02 17:27:17 +00:00
2018-08-29 19:52:09 +00:00
pluginManager := c . newPluginManager ( log )
2018-05-13 13:28:09 +00:00
defer pluginManager . CleanupClients ( )
actions , err := pluginManager . GetRestoreItemActions ( )
if err != nil {
return errors . Wrap ( err , "error initializing restore item actions" )
}
2018-08-14 14:28:11 +00:00
// validate the restore and fetch the backup
info := c . validateAndComplete ( restore , pluginManager )
backupScheduleName := restore . Spec . ScheduleName
// Register attempts after validation so we don't have to fetch the backup multiple times
c . metrics . RegisterRestoreAttempt ( backupScheduleName )
if len ( restore . Status . ValidationErrors ) > 0 {
2017-08-02 17:27:17 +00:00
restore . Status . Phase = api . RestorePhaseFailedValidation
2018-08-14 14:28:11 +00:00
c . metrics . RegisterRestoreValidationFailed ( backupScheduleName )
2017-08-02 17:27:17 +00:00
} else {
restore . Status . Phase = api . RestorePhaseInProgress
}
2018-08-14 14:28:11 +00:00
// patch to update status and persist to API
2018-07-09 18:02:41 +00:00
updatedRestore , err := patchRestore ( original , restore , c . restoreClient )
2017-08-02 17:27:17 +00:00
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrapf ( err , "error updating Restore phase to %s" , restore . Status . Phase )
2017-08-02 17:27:17 +00:00
}
2017-12-11 22:10:52 +00:00
// store ref to just-updated item for creating patch
original = updatedRestore
restore = updatedRestore . DeepCopy ( )
2017-08-02 17:27:17 +00:00
if restore . Status . Phase == api . RestorePhaseFailedValidation {
return nil
}
2018-08-14 14:28:11 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running restore" )
2018-08-14 14:28:11 +00:00
2017-08-02 17:27:17 +00:00
// execution & upload of restore
2018-05-13 13:28:09 +00:00
restoreWarnings , restoreErrors , restoreFailure := c . runRestore (
restore ,
actions ,
2018-08-14 14:28:11 +00:00
info ,
2018-05-13 13:28:09 +00:00
)
2017-10-26 15:24:16 +00:00
restore . Status . Warnings = len ( restoreWarnings . Ark ) + len ( restoreWarnings . Cluster )
for _ , w := range restoreWarnings . Namespaces {
restore . Status . Warnings += len ( w )
}
restore . Status . Errors = len ( restoreErrors . Ark ) + len ( restoreErrors . Cluster )
for _ , e := range restoreErrors . Namespaces {
restore . Status . Errors += len ( e )
}
2018-07-05 20:49:47 +00:00
if restoreFailure != nil {
2018-08-29 19:52:09 +00:00
log . Debug ( "restore failed" )
2018-07-05 20:49:47 +00:00
restore . Status . Phase = api . RestorePhaseFailed
restore . Status . FailureReason = restoreFailure . Error ( )
c . metrics . RegisterRestoreFailed ( backupScheduleName )
2018-06-25 18:15:46 +00:00
} else {
2018-08-29 19:52:09 +00:00
log . Debug ( "restore completed" )
2018-07-05 20:49:47 +00:00
// We got through the restore process without failing validation or restore execution
restore . Status . Phase = api . RestorePhaseCompleted
2018-06-25 18:15:46 +00:00
c . metrics . RegisterRestoreSuccess ( backupScheduleName )
}
2017-08-02 17:27:17 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Updating Restore final status" )
2018-07-09 18:02:41 +00:00
if _ , err = patchRestore ( original , restore , c . restoreClient ) ; err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( errors . WithStack ( err ) ) . Info ( "Error updating Restore final status" )
2017-08-02 17:27:17 +00:00
}
return nil
}
2018-08-14 14:28:11 +00:00
type backupInfo struct {
backup * api . Backup
2018-08-20 23:29:54 +00:00
backupStore persistence . BackupStore
2018-08-14 14:28:11 +00:00
}
func ( c * restoreController ) validateAndComplete ( restore * api . Restore , pluginManager plugin . Manager ) backupInfo {
2018-04-20 18:02:59 +00:00
// add non-restorable resources to restore's excluded resources
excludedResources := sets . NewString ( restore . Spec . ExcludedResources ... )
for _ , nonrestorable := range nonRestorableResources {
if ! excludedResources . Has ( nonrestorable ) {
restore . Spec . ExcludedResources = append ( restore . Spec . ExcludedResources , nonrestorable )
}
2017-08-02 17:27:17 +00:00
}
2018-04-20 18:02:59 +00:00
// validate that included resources don't contain any non-restorable resources
includedResources := sets . NewString ( restore . Spec . IncludedResources ... )
2017-09-19 03:44:33 +00:00
for _ , nonRestorableResource := range nonRestorableResources {
if includedResources . Has ( nonRestorableResource ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "%v are non-restorable resources" , nonRestorableResource ) )
2017-09-19 03:44:33 +00:00
}
}
2018-04-20 18:02:59 +00:00
// validate included/excluded resources
for _ , err := range collections . ValidateIncludesExcludes ( restore . Spec . IncludedResources , restore . Spec . ExcludedResources ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded resource lists: %v" , err ) )
2017-08-27 16:42:10 +00:00
}
2018-04-20 18:02:59 +00:00
// validate included/excluded namespaces
for _ , err := range collections . ValidateIncludesExcludes ( restore . Spec . IncludedNamespaces , restore . Spec . ExcludedNamespaces ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded namespace lists: %v" , err ) )
2017-09-01 21:39:30 +00:00
}
2018-04-20 18:02:59 +00:00
// validate that PV provider exists if we're restoring PVs
2018-07-09 18:02:41 +00:00
if boolptr . IsSetToTrue ( restore . Spec . RestorePVs ) && ! c . pvProviderExists {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "Server is not configured for PV snapshot restores" )
2017-08-09 22:52:27 +00:00
}
2018-04-20 18:02:59 +00:00
// validate that exactly one of BackupName and ScheduleName have been specified
if ! backupXorScheduleProvided ( restore ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "Either a backup or schedule must be specified as a source for the restore, but not both" )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
// if ScheduleName is specified, fill in BackupName with the most recent successful backup from
// the schedule
if restore . Spec . ScheduleName != "" {
selector := labels . SelectorFromSet ( labels . Set ( map [ string ] string {
"ark-schedule" : restore . Spec . ScheduleName ,
} ) )
2018-07-09 18:02:41 +00:00
backups , err := c . backupLister . Backups ( c . namespace ) . List ( selector )
2018-04-20 18:02:59 +00:00
if err != nil {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "Unable to list backups for schedule" )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
if len ( backups ) == 0 {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "No backups found for schedule" )
2018-04-20 18:02:59 +00:00
}
if backup := mostRecentCompletedBackup ( backups ) ; backup != nil {
restore . Spec . BackupName = backup . Name
} else {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "No completed backups found for schedule" )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
}
2018-08-14 14:28:11 +00:00
info , err := c . fetchBackupInfo ( restore . Spec . BackupName , pluginManager )
if err != nil {
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "Error retrieving backup: %v" , err ) )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
2018-07-05 20:49:47 +00:00
// Fill in the ScheduleName so it's easier to consume for metrics.
if restore . Spec . ScheduleName == "" {
2018-08-14 14:28:11 +00:00
restore . Spec . ScheduleName = info . backup . GetLabels ( ) [ "ark-schedule" ]
2018-07-05 20:49:47 +00:00
}
2018-08-14 14:28:11 +00:00
return info
2017-08-02 17:27:17 +00:00
}
2018-04-20 18:02:59 +00:00
// backupXorScheduleProvided returns true if exactly one of BackupName and
// ScheduleName are non-empty for the restore, or false otherwise.
func backupXorScheduleProvided ( restore * api . Restore ) bool {
if restore . Spec . BackupName != "" && restore . Spec . ScheduleName != "" {
return false
}
if restore . Spec . BackupName == "" && restore . Spec . ScheduleName == "" {
return false
}
return true
}
// mostRecentCompletedBackup returns the most recent backup that's
2018-07-11 16:56:19 +00:00
// completed from a list of backups.
2018-04-20 18:02:59 +00:00
func mostRecentCompletedBackup ( backups [ ] * api . Backup ) * api . Backup {
sort . Slice ( backups , func ( i , j int ) bool {
2018-07-11 16:56:19 +00:00
// Use .After() because we want descending sort.
return backups [ i ] . Status . StartTimestamp . After ( backups [ j ] . Status . StartTimestamp . Time )
2018-04-20 18:02:59 +00:00
} )
for _ , backup := range backups {
if backup . Status . Phase == api . BackupPhaseCompleted {
return backup
}
}
return nil
}
2018-08-14 14:28:11 +00:00
// fetchBackupInfo checks the backup lister for a backup that matches the given name. If it doesn't
// find it, it tries to retrieve it from one of the backup storage locations.
func ( c * restoreController ) fetchBackupInfo ( backupName string , pluginManager plugin . Manager ) ( backupInfo , error ) {
2018-08-20 23:29:54 +00:00
backup , err := c . backupLister . Backups ( c . namespace ) . Get ( backupName )
2018-08-14 14:28:11 +00:00
if err != nil {
if ! apierrors . IsNotFound ( err ) {
return backupInfo { } , errors . WithStack ( err )
}
2018-08-29 19:52:09 +00:00
log := c . logger . WithField ( "backupName" , backupName )
log . Debug ( "Backup not found in backupLister, checking each backup location directly, starting with default..." )
2018-08-14 14:28:11 +00:00
return c . fetchFromBackupStorage ( backupName , pluginManager )
2017-08-25 22:02:00 +00:00
}
2018-08-20 23:29:54 +00:00
location , err := c . backupLocationLister . BackupStorageLocations ( c . namespace ) . Get ( backup . Spec . StorageLocation )
2018-08-14 14:28:11 +00:00
if err != nil {
return backupInfo { } , errors . WithStack ( err )
2017-08-25 22:02:00 +00:00
}
2018-08-20 23:29:54 +00:00
backupStore , err := c . newBackupStore ( location , pluginManager , c . logger )
2018-08-14 14:28:11 +00:00
if err != nil {
2018-08-20 23:29:54 +00:00
return backupInfo { } , err
2018-08-14 14:28:11 +00:00
}
2017-09-14 21:27:31 +00:00
2018-08-20 23:29:54 +00:00
return backupInfo {
backup : backup ,
backupStore : backupStore ,
} , nil
2018-08-14 14:28:11 +00:00
}
// fetchFromBackupStorage checks each backup storage location, starting with the default,
// looking for a backup that matches the given backup name.
func ( c * restoreController ) fetchFromBackupStorage ( backupName string , pluginManager plugin . Manager ) ( backupInfo , error ) {
locations , err := c . backupLocationLister . BackupStorageLocations ( c . namespace ) . List ( labels . Everything ( ) )
2017-08-25 22:02:00 +00:00
if err != nil {
2018-08-14 14:28:11 +00:00
return backupInfo { } , errors . WithStack ( err )
}
orderedLocations := orderedBackupLocations ( locations , c . defaultBackupLocation )
2018-08-29 19:52:09 +00:00
log := c . logger . WithField ( "backupName" , backupName )
2018-08-14 14:28:11 +00:00
for _ , location := range orderedLocations {
info , err := c . backupInfoForLocation ( location , backupName , pluginManager )
if err != nil {
2018-08-29 19:52:09 +00:00
log . WithField ( "locationName" , location . Name ) . WithError ( err ) . Error ( "Unable to fetch backup from object storage location" )
2018-08-14 14:28:11 +00:00
continue
}
return info , nil
}
return backupInfo { } , errors . New ( "not able to fetch from backup storage" )
}
2018-08-24 18:07:01 +00:00
// orderedBackupLocations returns a new slice with the default backup location first (if it exists),
// followed by the rest of the locations in no particular order.
2018-08-14 14:28:11 +00:00
func orderedBackupLocations ( locations [ ] * api . BackupStorageLocation , defaultLocationName string ) [ ] * api . BackupStorageLocation {
var result [ ] * api . BackupStorageLocation
for i := range locations {
if locations [ i ] . Name == defaultLocationName {
// put the default location first
result = append ( result , locations [ i ] )
// append everything before the default
result = append ( result , locations [ : i ] ... )
// append everything after the default
result = append ( result , locations [ i + 1 : ] ... )
return result
}
}
return locations
}
func ( c * restoreController ) backupInfoForLocation ( location * api . BackupStorageLocation , backupName string , pluginManager plugin . Manager ) ( backupInfo , error ) {
2018-08-20 23:29:54 +00:00
backupStore , err := persistence . NewObjectBackupStore ( location , pluginManager , c . logger )
2018-08-14 14:28:11 +00:00
if err != nil {
return backupInfo { } , err
}
2018-08-20 23:29:54 +00:00
backup , err := backupStore . GetBackupMetadata ( backupName )
2018-08-14 14:28:11 +00:00
if err != nil {
return backupInfo { } , err
2017-08-25 22:02:00 +00:00
}
// ResourceVersion needs to be cleared in order to create the object in the API
backup . ResourceVersion = ""
2018-08-14 14:28:11 +00:00
// Clear out the namespace, in case the backup was made in a different cluster, with a different namespace
2017-12-22 14:43:44 +00:00
backup . Namespace = ""
2017-08-25 22:02:00 +00:00
2018-08-14 14:28:11 +00:00
backupCreated , err := c . backupClient . Backups ( c . namespace ) . Create ( backup )
if err != nil {
return backupInfo { } , errors . WithStack ( err )
2017-08-25 22:02:00 +00:00
}
2018-08-14 14:28:11 +00:00
return backupInfo {
backup : backupCreated ,
2018-08-20 23:29:54 +00:00
backupStore : backupStore ,
2018-08-14 14:28:11 +00:00
} , nil
2017-08-25 22:02:00 +00:00
}
2018-05-13 13:28:09 +00:00
func ( c * restoreController ) runRestore (
restore * api . Restore ,
actions [ ] restore . ItemAction ,
2018-08-14 14:28:11 +00:00
info backupInfo ,
2018-05-13 13:28:09 +00:00
) ( restoreWarnings , restoreErrors api . RestoreResult , restoreFailure error ) {
logFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
c . logger .
WithFields (
logrus . Fields {
"restore" : kubeutil . NamespaceAndName ( restore ) ,
"backup" : restore . Spec . BackupName ,
} ,
) .
WithError ( errors . WithStack ( err ) ) .
Error ( "Error creating log temp file" )
restoreErrors . Ark = append ( restoreErrors . Ark , err . Error ( ) )
return
}
gzippedLogFile := gzip . NewWriter ( logFile )
// Assuming we successfully uploaded the log file, this will have already been closed below. It is safe to call
// close multiple times. If we get an error closing this, there's not really anything we can do about it.
defer gzippedLogFile . Close ( )
defer closeAndRemoveFile ( logFile , c . logger )
// Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the
// backup log failed for whatever reason.
2018-08-29 19:52:09 +00:00
logger := logging . DefaultLogger ( c . restoreLogLevel )
2018-05-13 13:28:09 +00:00
logger . Out = io . MultiWriter ( os . Stdout , gzippedLogFile )
2018-08-29 19:52:09 +00:00
log := logger . WithFields (
2017-10-26 15:24:16 +00:00
logrus . Fields {
"restore" : kubeutil . NamespaceAndName ( restore ) ,
"backup" : restore . Spec . BackupName ,
} )
2017-09-14 21:27:31 +00:00
2018-08-20 23:29:54 +00:00
backupFile , err := downloadToTempFile ( restore . Spec . BackupName , info . backupStore , c . logger )
2017-08-02 17:27:17 +00:00
if err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( err ) . Error ( "Error downloading backup" )
2017-09-14 21:27:31 +00:00
restoreErrors . Ark = append ( restoreErrors . Ark , err . Error ( ) )
2018-07-05 20:49:47 +00:00
restoreFailure = err
2017-09-12 19:54:08 +00:00
return
}
2018-05-13 13:28:09 +00:00
defer closeAndRemoveFile ( backupFile , c . logger )
2017-08-02 17:27:17 +00:00
2017-10-26 15:24:16 +00:00
resultsFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( errors . WithStack ( err ) ) . Error ( "Error creating results temp file" )
2017-10-26 15:24:16 +00:00
restoreErrors . Ark = append ( restoreErrors . Ark , err . Error ( ) )
2018-07-05 20:49:47 +00:00
restoreFailure = err
2017-10-26 15:24:16 +00:00
return
}
2018-05-13 13:28:09 +00:00
defer closeAndRemoveFile ( resultsFile , c . logger )
2017-11-21 17:24:43 +00:00
2018-07-05 20:49:47 +00:00
// Any return statement above this line means a total restore failure
// Some failures after this line *may* be a total restore failure
2018-08-29 19:52:09 +00:00
log . Info ( "starting restore" )
restoreWarnings , restoreErrors = c . restorer . Restore ( log , restore , info . backup , backupFile , actions )
log . Info ( "restore completed" )
2017-09-12 19:54:08 +00:00
// Try to upload the log file. This is best-effort. If we fail, we'll add to the ark errors.
2018-05-13 13:28:09 +00:00
if err := gzippedLogFile . Close ( ) ; err != nil {
c . logger . WithError ( err ) . Error ( "error closing gzippedLogFile" )
}
2017-09-12 19:54:08 +00:00
// Reset the offset to 0 for reading
if _ , err = logFile . Seek ( 0 , 0 ) ; err != nil {
2017-09-14 21:27:31 +00:00
restoreErrors . Ark = append ( restoreErrors . Ark , fmt . Sprintf ( "error resetting log file offset to 0: %v" , err ) )
2017-09-12 19:54:08 +00:00
return
}
2018-08-20 23:29:54 +00:00
if err := info . backupStore . PutRestoreLog ( restore . Spec . BackupName , restore . Name , logFile ) ; err != nil {
restoreErrors . Ark = append ( restoreErrors . Ark , fmt . Sprintf ( "error uploading log file to backup storage: %v" , err ) )
2017-09-12 19:54:08 +00:00
}
2017-10-26 15:24:16 +00:00
m := map [ string ] api . RestoreResult {
"warnings" : restoreWarnings ,
"errors" : restoreErrors ,
}
gzippedResultsFile := gzip . NewWriter ( resultsFile )
if err := json . NewEncoder ( gzippedResultsFile ) . Encode ( m ) ; err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( errors . WithStack ( err ) ) . Error ( "Error encoding restore results" )
2017-10-26 15:24:16 +00:00
return
}
gzippedResultsFile . Close ( )
2017-11-14 19:39:42 +00:00
if _ , err = resultsFile . Seek ( 0 , 0 ) ; err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( errors . WithStack ( err ) ) . Error ( "Error resetting results file offset to 0" )
2017-11-14 19:39:42 +00:00
return
}
2018-08-20 23:29:54 +00:00
if err := info . backupStore . PutRestoreResults ( restore . Spec . BackupName , restore . Name , resultsFile ) ; err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( errors . WithStack ( err ) ) . Error ( "Error uploading results file to backup storage" )
2017-10-26 15:24:16 +00:00
}
2017-09-12 19:54:08 +00:00
return
2017-08-02 17:27:17 +00:00
}
2018-05-13 13:28:09 +00:00
func downloadToTempFile (
2018-08-20 23:29:54 +00:00
backupName string ,
backupStore persistence . BackupStore ,
2018-05-13 13:28:09 +00:00
logger logrus . FieldLogger ,
) ( * os . File , error ) {
2018-08-20 23:29:54 +00:00
readCloser , err := backupStore . GetBackupContents ( backupName )
2017-08-02 17:27:17 +00:00
if err != nil {
return nil , err
}
defer readCloser . Close ( )
file , err := ioutil . TempFile ( "" , backupName )
if err != nil {
2017-09-14 21:27:31 +00:00
return nil , errors . Wrap ( err , "error creating Backup temp file" )
2017-08-02 17:27:17 +00:00
}
n , err := io . Copy ( file , readCloser )
if err != nil {
2017-09-14 21:27:31 +00:00
return nil , errors . Wrap ( err , "error copying Backup to temp file" )
2017-08-02 17:27:17 +00:00
}
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log := logger . WithField ( "backup" , backupName )
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log . WithFields ( logrus . Fields {
2017-09-14 21:27:31 +00:00
"fileName" : file . Name ( ) ,
"bytes" : n ,
} ) . Debug ( "Copied Backup to file" )
2017-08-02 17:27:17 +00:00
if _ , err := file . Seek ( 0 , 0 ) ; err != nil {
2017-09-14 21:27:31 +00:00
return nil , errors . Wrap ( err , "error resetting Backup file offset" )
2017-08-02 17:27:17 +00:00
}
return file , nil
}
2017-12-11 22:10:52 +00:00
func patchRestore ( original , updated * api . Restore , client arkv1client . RestoresGetter ) ( * api . Restore , error ) {
origBytes , err := json . Marshal ( original )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling original restore" )
}
updatedBytes , err := json . Marshal ( updated )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling updated restore" )
}
2018-05-14 21:34:24 +00:00
patchBytes , err := jsonpatch . CreateMergePatch ( origBytes , updatedBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
2018-05-14 21:34:24 +00:00
return nil , errors . Wrap ( err , "error creating json merge patch for restore" )
2017-12-11 22:10:52 +00:00
}
2017-12-22 14:43:44 +00:00
res , err := client . Restores ( original . Namespace ) . Patch ( original . Name , types . MergePatchType , patchBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
return nil , errors . Wrap ( err , "error patching restore" )
}
return res , nil
}