2017-08-02 17:27:17 +00:00
/ *
2020-12-08 21:38:29 +00:00
Copyright 2020 the Velero contributors .
2017-08-02 17:27:17 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package controller
import (
2019-04-09 21:17:28 +00:00
"bytes"
2017-10-26 15:24:16 +00:00
"compress/gzip"
2020-06-24 16:55:18 +00:00
"context"
2017-10-26 15:24:16 +00:00
"encoding/json"
2017-08-02 17:27:17 +00:00
"fmt"
"io"
"io/ioutil"
"os"
2018-04-20 18:02:59 +00:00
"sort"
2019-04-05 02:11:53 +00:00
"time"
2017-08-02 17:27:17 +00:00
2018-05-14 21:34:24 +00:00
jsonpatch "github.com/evanphx/json-patch"
2018-10-23 14:36:11 +00:00
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
k8s 1.18 import (#2651)
* k8s 1.18 import wip
backup, cmd, controller, generated, restic, restore, serverstatusrequest, test and util
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go mod tidy
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* add changelog file
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go fmt
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* update code-generator and controller-gen in CI
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* checkout proper code-generator version, regen
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix remaining calls
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* regenerate CRDs with ./hack/update-generated-crd-code.sh
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use existing context in restic and server
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix test cases by resetting resource version
also use main library go context, not golang.org/x/net/context, in pkg/restore/restore.go
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* clarify changelog message
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use github.com/kubernetes-csi/external-snapshotter/v2@v2.2.0-rc1
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* run 'go mod tidy' to remove old external-snapshotter version
Signed-off-by: Andrew Lavery <laverya@umich.edu>
2020-07-16 16:21:37 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2018-10-23 14:36:11 +00:00
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
2020-07-22 18:40:39 +00:00
"k8s.io/apimachinery/pkg/util/clock"
2018-10-23 14:36:11 +00:00
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/cache"
2019-09-30 21:26:56 +00:00
api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov1client "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned/typed/velero/v1"
2020-02-25 20:01:24 +00:00
velerov1informers "github.com/vmware-tanzu/velero/pkg/generated/informers/externalversions/velero/v1"
velerov1listers "github.com/vmware-tanzu/velero/pkg/generated/listers/velero/v1"
2020-05-07 18:56:13 +00:00
"github.com/vmware-tanzu/velero/pkg/label"
2019-09-30 21:26:56 +00:00
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/persistence"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
pkgrestore "github.com/vmware-tanzu/velero/pkg/restore"
"github.com/vmware-tanzu/velero/pkg/util/collections"
kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
2020-06-24 16:55:18 +00:00
"sigs.k8s.io/controller-runtime/pkg/client"
2017-08-02 17:27:17 +00:00
)
2020-07-17 21:59:51 +00:00
// nonRestorableResources is an exclusion list for the restoration process. Any resources
2017-09-19 03:44:33 +00:00
// included here are explicitly excluded from the restoration process.
2018-06-28 20:56:39 +00:00
var nonRestorableResources = [ ] string {
"nodes" ,
"events" ,
"events.events.k8s.io" ,
// Don't ever restore backups - if appropriate, they'll be synced in from object storage.
2019-09-30 21:26:56 +00:00
// https://github.com/vmware-tanzu/velero/issues/622
2019-01-25 03:33:07 +00:00
"backups.velero.io" ,
2018-06-28 20:56:39 +00:00
// Restores are cluster-specific, and don't have value moving across clusters.
2019-09-30 21:26:56 +00:00
// https://github.com/vmware-tanzu/velero/issues/622
2019-01-25 03:33:07 +00:00
"restores.velero.io" ,
2019-03-18 18:38:37 +00:00
// Restic repositories are automatically managed by Velero and will be automatically
// created as needed if they don't exist.
2019-09-30 21:26:56 +00:00
// https://github.com/vmware-tanzu/velero/issues/1113
2019-03-18 18:38:37 +00:00
"resticrepositories.velero.io" ,
2018-06-28 20:56:39 +00:00
}
2017-09-19 03:44:33 +00:00
2017-08-02 17:27:17 +00:00
type restoreController struct {
2018-08-29 19:52:09 +00:00
* genericController
2018-10-16 14:28:05 +00:00
namespace string
2019-01-25 03:33:07 +00:00
restoreClient velerov1client . RestoresGetter
2019-08-06 20:17:36 +00:00
podVolumeBackupClient velerov1client . PodVolumeBackupsGetter
2019-04-16 18:57:02 +00:00
restorer pkgrestore . Restorer
2020-02-25 20:01:24 +00:00
backupLister velerov1listers . BackupLister
restoreLister velerov1listers . RestoreLister
2020-06-24 16:55:18 +00:00
kbClient client . Client
2020-02-25 20:01:24 +00:00
snapshotLocationLister velerov1listers . VolumeSnapshotLocationLister
2018-10-16 14:28:05 +00:00
restoreLogLevel logrus . Level
metrics * metrics . ServerMetrics
2019-07-30 23:29:34 +00:00
logFormat logging . Format
2020-07-22 18:40:39 +00:00
clock clock . Clock
2018-05-13 13:28:09 +00:00
2021-02-08 18:04:08 +00:00
newPluginManager func ( logger logrus . FieldLogger ) clientmgmt . Manager
backupStoreGetter persistence . ObjectBackupStoreGetter
2017-08-02 17:27:17 +00:00
}
func NewRestoreController (
2017-12-22 14:43:44 +00:00
namespace string ,
2020-02-25 20:01:24 +00:00
restoreInformer velerov1informers . RestoreInformer ,
2019-01-25 03:33:07 +00:00
restoreClient velerov1client . RestoresGetter ,
2019-08-06 20:17:36 +00:00
podVolumeBackupClient velerov1client . PodVolumeBackupsGetter ,
2019-04-16 18:57:02 +00:00
restorer pkgrestore . Restorer ,
2020-02-25 20:01:24 +00:00
backupLister velerov1listers . BackupLister ,
2020-06-24 16:55:18 +00:00
kbClient client . Client ,
2020-02-25 20:01:24 +00:00
snapshotLocationLister velerov1listers . VolumeSnapshotLocationLister ,
2017-11-21 17:24:43 +00:00
logger logrus . FieldLogger ,
2018-08-29 19:52:09 +00:00
restoreLogLevel logrus . Level ,
2019-03-15 18:32:11 +00:00
newPluginManager func ( logrus . FieldLogger ) clientmgmt . Manager ,
2021-02-08 18:04:08 +00:00
backupStoreGetter persistence . ObjectBackupStoreGetter ,
2018-06-25 18:15:46 +00:00
metrics * metrics . ServerMetrics ,
2019-07-30 23:29:34 +00:00
logFormat logging . Format ,
2017-08-02 17:27:17 +00:00
) Interface {
c := & restoreController {
2020-10-06 17:58:56 +00:00
genericController : newGenericController ( Restore , logger ) ,
2018-10-16 14:28:05 +00:00
namespace : namespace ,
restoreClient : restoreClient ,
2019-08-06 20:17:36 +00:00
podVolumeBackupClient : podVolumeBackupClient ,
2018-10-16 14:28:05 +00:00
restorer : restorer ,
2020-02-25 20:01:24 +00:00
backupLister : backupLister ,
2018-10-16 14:28:05 +00:00
restoreLister : restoreInformer . Lister ( ) ,
2020-06-24 16:55:18 +00:00
kbClient : kbClient ,
2020-02-25 20:01:24 +00:00
snapshotLocationLister : snapshotLocationLister ,
2018-10-16 14:28:05 +00:00
restoreLogLevel : restoreLogLevel ,
metrics : metrics ,
2019-07-30 23:29:34 +00:00
logFormat : logFormat ,
2020-07-22 18:40:39 +00:00
clock : & clock . RealClock { } ,
2018-05-13 13:28:09 +00:00
2018-08-25 19:53:56 +00:00
// use variables to refer to these functions so they can be
// replaced with fakes for testing.
2021-02-08 18:04:08 +00:00
newPluginManager : newPluginManager ,
backupStoreGetter : backupStoreGetter ,
2017-08-02 17:27:17 +00:00
}
2019-04-09 21:17:28 +00:00
c . syncHandler = c . processQueueItem
2019-04-05 02:11:53 +00:00
c . resyncFunc = c . resync
c . resyncPeriod = time . Minute
2017-08-02 17:27:17 +00:00
restoreInformer . Informer ( ) . AddEventHandler (
cache . ResourceEventHandlerFuncs {
AddFunc : func ( obj interface { } ) {
restore := obj . ( * api . Restore )
switch restore . Status . Phase {
case "" , api . RestorePhaseNew :
// only process new restores
default :
2017-09-14 21:27:31 +00:00
c . logger . WithFields ( logrus . Fields {
"restore" : kubeutil . NamespaceAndName ( restore ) ,
"phase" : restore . Status . Phase ,
} ) . Debug ( "Restore is not new, skipping" )
2017-08-02 17:27:17 +00:00
return
}
key , err := cache . MetaNamespaceKeyFunc ( restore )
if err != nil {
2017-09-14 21:27:31 +00:00
c . logger . WithError ( errors . WithStack ( err ) ) . WithField ( "restore" , restore ) . Error ( "Error creating queue key, item not added to queue" )
2017-08-02 17:27:17 +00:00
return
}
c . queue . Add ( key )
} ,
} ,
)
return c
}
2019-04-05 02:11:53 +00:00
func ( c * restoreController ) resync ( ) {
restores , err := c . restoreLister . List ( labels . Everything ( ) )
if err != nil {
c . logger . Error ( err , "Error computing restore_total metric" )
} else {
c . metrics . SetRestoreTotal ( int64 ( len ( restores ) ) )
}
}
2019-04-09 21:17:28 +00:00
func ( c * restoreController ) processQueueItem ( key string ) error {
2018-08-29 19:52:09 +00:00
log := c . logger . WithField ( "key" , key )
2017-09-14 21:27:31 +00:00
2019-04-09 21:17:28 +00:00
log . Debug ( "Running processQueueItem" )
2017-08-02 17:27:17 +00:00
ns , name , err := cache . SplitMetaNamespaceKey ( key )
if err != nil {
2019-04-09 21:17:28 +00:00
log . WithError ( err ) . Error ( "unable to process queue item: error splitting queue key" )
2018-05-13 13:28:09 +00:00
// Return nil here so we don't try to process the key any more
return nil
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Getting Restore" )
2018-07-09 18:02:41 +00:00
restore , err := c . restoreLister . Restores ( ns ) . Get ( name )
2017-08-02 17:27:17 +00:00
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrap ( err , "error getting Restore" )
2017-08-02 17:27:17 +00:00
}
// TODO I think this is now unnecessary. We only initially place
// item with Phase = ("" | New) into the queue. Items will only get
// re-queued if syncHandler returns an error, which will only
// happen if there's an error updating Phase from its initial
// state to something else. So any time it's re-queued it will
// still have its initial state, which we've already confirmed
// is ("" | New)
switch restore . Status . Phase {
case "" , api . RestorePhaseNew :
// only process new restores
default :
return nil
}
2019-04-09 21:17:28 +00:00
// Deep-copy the restore so the copy from the lister is not modified.
// Any errors returned by processRestore will be bubbled up, meaning
// the key will be re-enqueued by the controller.
return c . processRestore ( restore . DeepCopy ( ) )
}
2017-08-02 17:27:17 +00:00
2019-04-09 21:17:28 +00:00
func ( c * restoreController ) processRestore ( restore * api . Restore ) error {
// Developer note: any error returned by this method will
// cause the restore to be re-enqueued and re-processed by
// the controller.
2018-05-13 13:28:09 +00:00
2019-04-09 21:17:28 +00:00
// store a copy of the original restore for creating patch
original := restore . DeepCopy ( )
2018-05-13 13:28:09 +00:00
2019-04-09 21:17:28 +00:00
// Validate the restore and fetch the backup. Note that the plugin
// manager used here is not the same one used by c.runValidatedRestore,
// since within that function we want the plugin manager to log to
// our per-restore log (which is instantiated within c.runValidatedRestore).
pluginManager := c . newPluginManager ( c . logger )
2019-11-05 16:58:08 +00:00
defer pluginManager . CleanupClients ( )
2018-08-14 14:28:11 +00:00
info := c . validateAndComplete ( restore , pluginManager )
2019-04-09 21:17:28 +00:00
2018-08-14 14:28:11 +00:00
// Register attempts after validation so we don't have to fetch the backup multiple times
2019-04-09 21:17:28 +00:00
backupScheduleName := restore . Spec . ScheduleName
2018-08-14 14:28:11 +00:00
c . metrics . RegisterRestoreAttempt ( backupScheduleName )
if len ( restore . Status . ValidationErrors ) > 0 {
2017-08-02 17:27:17 +00:00
restore . Status . Phase = api . RestorePhaseFailedValidation
2018-08-14 14:28:11 +00:00
c . metrics . RegisterRestoreValidationFailed ( backupScheduleName )
2017-08-02 17:27:17 +00:00
} else {
2020-07-22 18:40:39 +00:00
restore . Status . StartTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2017-08-02 17:27:17 +00:00
restore . Status . Phase = api . RestorePhaseInProgress
}
2018-08-14 14:28:11 +00:00
// patch to update status and persist to API
2018-07-09 18:02:41 +00:00
updatedRestore , err := patchRestore ( original , restore , c . restoreClient )
2017-08-02 17:27:17 +00:00
if err != nil {
2019-04-09 21:17:28 +00:00
// return the error so the restore can be re-processed; it's currently
// still in phase = New.
2017-09-14 21:27:31 +00:00
return errors . Wrapf ( err , "error updating Restore phase to %s" , restore . Status . Phase )
2017-08-02 17:27:17 +00:00
}
2017-12-11 22:10:52 +00:00
// store ref to just-updated item for creating patch
original = updatedRestore
restore = updatedRestore . DeepCopy ( )
2017-08-02 17:27:17 +00:00
if restore . Status . Phase == api . RestorePhaseFailedValidation {
return nil
}
2018-08-14 14:28:11 +00:00
2019-04-09 21:17:28 +00:00
if err := c . runValidatedRestore ( restore , info ) ; err != nil {
c . logger . WithError ( err ) . Debug ( "Restore failed" )
2018-07-05 20:49:47 +00:00
restore . Status . Phase = api . RestorePhaseFailed
2019-04-09 21:17:28 +00:00
restore . Status . FailureReason = err . Error ( )
2018-07-05 20:49:47 +00:00
c . metrics . RegisterRestoreFailed ( backupScheduleName )
2019-04-23 22:26:16 +00:00
} else if restore . Status . Errors > 0 {
c . logger . Debug ( "Restore partially failed" )
restore . Status . Phase = api . RestorePhasePartiallyFailed
c . metrics . RegisterRestorePartialFailure ( backupScheduleName )
2018-06-25 18:15:46 +00:00
} else {
2019-04-09 21:17:28 +00:00
c . logger . Debug ( "Restore completed" )
2018-07-05 20:49:47 +00:00
restore . Status . Phase = api . RestorePhaseCompleted
2018-06-25 18:15:46 +00:00
c . metrics . RegisterRestoreSuccess ( backupScheduleName )
}
2017-08-02 17:27:17 +00:00
2020-07-22 18:40:39 +00:00
restore . Status . CompletionTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2019-04-09 21:17:28 +00:00
c . logger . Debug ( "Updating restore's final status" )
2018-07-09 18:02:41 +00:00
if _ , err = patchRestore ( original , restore , c . restoreClient ) ; err != nil {
2019-04-09 21:17:28 +00:00
c . logger . WithError ( errors . WithStack ( err ) ) . Info ( "Error updating restore's final status" )
2017-08-02 17:27:17 +00:00
}
return nil
}
2018-08-14 14:28:11 +00:00
type backupInfo struct {
backup * api . Backup
2020-06-24 16:55:18 +00:00
location * velerov1api . BackupStorageLocation
2018-08-20 23:29:54 +00:00
backupStore persistence . BackupStore
2018-08-14 14:28:11 +00:00
}
2019-03-15 18:32:11 +00:00
func ( c * restoreController ) validateAndComplete ( restore * api . Restore , pluginManager clientmgmt . Manager ) backupInfo {
2018-04-20 18:02:59 +00:00
// add non-restorable resources to restore's excluded resources
excludedResources := sets . NewString ( restore . Spec . ExcludedResources ... )
for _ , nonrestorable := range nonRestorableResources {
if ! excludedResources . Has ( nonrestorable ) {
restore . Spec . ExcludedResources = append ( restore . Spec . ExcludedResources , nonrestorable )
}
2017-08-02 17:27:17 +00:00
}
2018-04-20 18:02:59 +00:00
// validate that included resources don't contain any non-restorable resources
includedResources := sets . NewString ( restore . Spec . IncludedResources ... )
2017-09-19 03:44:33 +00:00
for _ , nonRestorableResource := range nonRestorableResources {
if includedResources . Has ( nonRestorableResource ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "%v are non-restorable resources" , nonRestorableResource ) )
2017-09-19 03:44:33 +00:00
}
}
2018-04-20 18:02:59 +00:00
// validate included/excluded resources
for _ , err := range collections . ValidateIncludesExcludes ( restore . Spec . IncludedResources , restore . Spec . ExcludedResources ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded resource lists: %v" , err ) )
2017-08-27 16:42:10 +00:00
}
2018-04-20 18:02:59 +00:00
// validate included/excluded namespaces
for _ , err := range collections . ValidateIncludesExcludes ( restore . Spec . IncludedNamespaces , restore . Spec . ExcludedNamespaces ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded namespace lists: %v" , err ) )
2017-09-01 21:39:30 +00:00
}
2018-04-20 18:02:59 +00:00
// validate that exactly one of BackupName and ScheduleName have been specified
if ! backupXorScheduleProvided ( restore ) {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "Either a backup or schedule must be specified as a source for the restore, but not both" )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
// if ScheduleName is specified, fill in BackupName with the most recent successful backup from
// the schedule
if restore . Spec . ScheduleName != "" {
selector := labels . SelectorFromSet ( labels . Set ( map [ string ] string {
2019-01-25 03:33:07 +00:00
velerov1api . ScheduleNameLabel : restore . Spec . ScheduleName ,
2018-04-20 18:02:59 +00:00
} ) )
2018-07-09 18:02:41 +00:00
backups , err := c . backupLister . Backups ( c . namespace ) . List ( selector )
2018-04-20 18:02:59 +00:00
if err != nil {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "Unable to list backups for schedule" )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
if len ( backups ) == 0 {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "No backups found for schedule" )
2018-04-20 18:02:59 +00:00
}
if backup := mostRecentCompletedBackup ( backups ) ; backup != nil {
restore . Spec . BackupName = backup . Name
} else {
2018-08-14 14:28:11 +00:00
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , "No completed backups found for schedule" )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
}
2018-08-14 14:28:11 +00:00
info , err := c . fetchBackupInfo ( restore . Spec . BackupName , pluginManager )
if err != nil {
restore . Status . ValidationErrors = append ( restore . Status . ValidationErrors , fmt . Sprintf ( "Error retrieving backup: %v" , err ) )
return backupInfo { }
2018-04-20 18:02:59 +00:00
}
2018-07-05 20:49:47 +00:00
// Fill in the ScheduleName so it's easier to consume for metrics.
if restore . Spec . ScheduleName == "" {
2019-01-25 03:33:07 +00:00
restore . Spec . ScheduleName = info . backup . GetLabels ( ) [ velerov1api . ScheduleNameLabel ]
2018-07-05 20:49:47 +00:00
}
2018-08-14 14:28:11 +00:00
return info
2017-08-02 17:27:17 +00:00
}
2018-04-20 18:02:59 +00:00
// backupXorScheduleProvided returns true if exactly one of BackupName and
// ScheduleName are non-empty for the restore, or false otherwise.
func backupXorScheduleProvided ( restore * api . Restore ) bool {
if restore . Spec . BackupName != "" && restore . Spec . ScheduleName != "" {
return false
}
if restore . Spec . BackupName == "" && restore . Spec . ScheduleName == "" {
return false
}
return true
}
// mostRecentCompletedBackup returns the most recent backup that's
2018-07-11 16:56:19 +00:00
// completed from a list of backups.
2018-04-20 18:02:59 +00:00
func mostRecentCompletedBackup ( backups [ ] * api . Backup ) * api . Backup {
sort . Slice ( backups , func ( i , j int ) bool {
2018-07-11 16:56:19 +00:00
// Use .After() because we want descending sort.
2019-10-14 16:20:28 +00:00
var iStartTime , jStartTime time . Time
if backups [ i ] . Status . StartTimestamp != nil {
iStartTime = backups [ i ] . Status . StartTimestamp . Time
}
if backups [ j ] . Status . StartTimestamp != nil {
jStartTime = backups [ j ] . Status . StartTimestamp . Time
}
return iStartTime . After ( jStartTime )
2018-04-20 18:02:59 +00:00
} )
for _ , backup := range backups {
if backup . Status . Phase == api . BackupPhaseCompleted {
return backup
}
}
return nil
}
2018-08-14 14:28:11 +00:00
// fetchBackupInfo checks the backup lister for a backup that matches the given name. If it doesn't
2018-10-30 19:14:40 +00:00
// find it, it returns an error.
2019-03-15 18:32:11 +00:00
func ( c * restoreController ) fetchBackupInfo ( backupName string , pluginManager clientmgmt . Manager ) ( backupInfo , error ) {
2018-08-20 23:29:54 +00:00
backup , err := c . backupLister . Backups ( c . namespace ) . Get ( backupName )
2018-08-14 14:28:11 +00:00
if err != nil {
2018-10-30 19:14:40 +00:00
return backupInfo { } , err
2017-08-25 22:02:00 +00:00
}
2020-06-24 16:55:18 +00:00
location := & velerov1api . BackupStorageLocation { }
if err := c . kbClient . Get ( context . Background ( ) , client . ObjectKey {
Namespace : c . namespace ,
Name : backup . Spec . StorageLocation ,
} , location ) ; err != nil {
2018-08-14 14:28:11 +00:00
return backupInfo { } , errors . WithStack ( err )
2017-08-25 22:02:00 +00:00
}
2021-02-08 18:04:08 +00:00
backupStore , err := c . backupStoreGetter . Get ( location , pluginManager , c . logger )
2018-08-14 14:28:11 +00:00
if err != nil {
2018-08-20 23:29:54 +00:00
return backupInfo { } , err
2018-08-14 14:28:11 +00:00
}
2017-09-14 21:27:31 +00:00
2018-08-20 23:29:54 +00:00
return backupInfo {
backup : backup ,
2020-05-27 23:03:52 +00:00
location : location ,
2018-08-20 23:29:54 +00:00
backupStore : backupStore ,
} , nil
2018-08-14 14:28:11 +00:00
}
2019-04-09 21:17:28 +00:00
// runValidatedRestore takes a validated restore API object and executes the restore process.
// The log and results files are uploaded to backup storage. Any error returned from this function
// means that the restore failed. This function updates the restore API object with warning and error
// counts, but *does not* update its phase or patch it via the API.
func ( c * restoreController ) runValidatedRestore ( restore * api . Restore , info backupInfo ) error {
// instantiate the per-restore logger that will output both to a temp file
// (for upload to object storage) and to stdout.
2019-07-30 23:29:34 +00:00
restoreLog , err := newRestoreLogger ( restore , c . logger , c . restoreLogLevel , c . logFormat )
2018-05-13 13:28:09 +00:00
if err != nil {
2019-04-09 21:17:28 +00:00
return err
}
defer restoreLog . closeAndRemove ( c . logger )
pluginManager := c . newPluginManager ( restoreLog )
defer pluginManager . CleanupClients ( )
actions , err := pluginManager . GetRestoreItemActions ( )
2017-08-02 17:27:17 +00:00
if err != nil {
2019-04-09 21:17:28 +00:00
return errors . Wrap ( err , "error getting restore item actions" )
2017-09-12 19:54:08 +00:00
}
2017-08-02 17:27:17 +00:00
2019-04-09 21:17:28 +00:00
backupFile , err := downloadToTempFile ( restore . Spec . BackupName , info . backupStore , restoreLog )
2017-10-26 15:24:16 +00:00
if err != nil {
2019-04-09 21:17:28 +00:00
return errors . Wrap ( err , "error downloading backup" )
2017-10-26 15:24:16 +00:00
}
2019-04-09 21:17:28 +00:00
defer closeAndRemoveFile ( backupFile , c . logger )
2017-11-21 17:24:43 +00:00
2020-05-07 18:56:13 +00:00
opts := label . NewListOptionsForBackup ( restore . Spec . BackupName )
2020-02-25 20:01:24 +00:00
k8s 1.18 import (#2651)
* k8s 1.18 import wip
backup, cmd, controller, generated, restic, restore, serverstatusrequest, test and util
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go mod tidy
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* add changelog file
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go fmt
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* update code-generator and controller-gen in CI
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* checkout proper code-generator version, regen
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix remaining calls
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* regenerate CRDs with ./hack/update-generated-crd-code.sh
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use existing context in restic and server
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix test cases by resetting resource version
also use main library go context, not golang.org/x/net/context, in pkg/restore/restore.go
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* clarify changelog message
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use github.com/kubernetes-csi/external-snapshotter/v2@v2.2.0-rc1
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* run 'go mod tidy' to remove old external-snapshotter version
Signed-off-by: Andrew Lavery <laverya@umich.edu>
2020-07-16 16:21:37 +00:00
podVolumeBackupList , err := c . podVolumeBackupClient . PodVolumeBackups ( c . namespace ) . List ( context . TODO ( ) , opts )
2019-08-06 20:17:36 +00:00
if err != nil {
return errors . WithStack ( err )
}
2018-10-16 14:28:05 +00:00
volumeSnapshots , err := info . backupStore . GetBackupVolumeSnapshots ( restore . Spec . BackupName )
if err != nil {
2019-04-09 21:17:28 +00:00
return errors . Wrap ( err , "error fetching volume snapshots metadata" )
2018-10-16 14:28:05 +00:00
}
2019-04-09 21:17:28 +00:00
restoreLog . Info ( "starting restore" )
2019-08-06 20:17:36 +00:00
var podVolumeBackups [ ] * velerov1api . PodVolumeBackup
for i := range podVolumeBackupList . Items {
podVolumeBackups = append ( podVolumeBackups , & podVolumeBackupList . Items [ i ] )
}
restoreReq := pkgrestore . Request {
Log : restoreLog ,
Restore : restore ,
Backup : info . backup ,
PodVolumeBackups : podVolumeBackups ,
VolumeSnapshots : volumeSnapshots ,
BackupReader : backupFile ,
}
restoreWarnings , restoreErrors := c . restorer . Restore ( restoreReq , actions , c . snapshotLocationLister , pluginManager )
2019-04-09 21:17:28 +00:00
restoreLog . Info ( "restore completed" )
2017-09-12 19:54:08 +00:00
2020-05-27 23:03:52 +00:00
// re-instantiate the backup store because credentials could have changed since the original
// instantiation, if this was a long-running restore
2021-02-08 18:04:08 +00:00
info . backupStore , err = c . backupStoreGetter . Get ( info . location , pluginManager , c . logger )
2020-05-27 23:03:52 +00:00
if err != nil {
return errors . Wrap ( err , "error setting up backup store to persist log and results files" )
}
2019-04-09 21:17:28 +00:00
if logReader , err := restoreLog . done ( c . logger ) ; err != nil {
restoreErrors . Velero = append ( restoreErrors . Velero , fmt . Sprintf ( "error getting restore log reader: %v" , err ) )
} else {
if err := info . backupStore . PutRestoreLog ( restore . Spec . BackupName , restore . Name , logReader ) ; err != nil {
restoreErrors . Velero = append ( restoreErrors . Velero , fmt . Sprintf ( "error uploading log file to backup storage: %v" , err ) )
}
2018-05-13 13:28:09 +00:00
}
2019-04-09 21:17:28 +00:00
// At this point, no further logs should be written to restoreLog since it's been uploaded
// to object storage.
2019-03-27 22:54:04 +00:00
restore . Status . Warnings = len ( restoreWarnings . Velero ) + len ( restoreWarnings . Cluster )
2019-04-09 21:17:28 +00:00
for _ , w := range restoreWarnings . Namespaces {
restore . Status . Warnings += len ( w )
2017-09-12 19:54:08 +00:00
}
2019-03-27 22:54:04 +00:00
restore . Status . Errors = len ( restoreErrors . Velero ) + len ( restoreErrors . Cluster )
2019-04-09 21:17:28 +00:00
for _ , e := range restoreErrors . Namespaces {
restore . Status . Errors += len ( e )
2017-09-12 19:54:08 +00:00
}
2019-04-16 18:57:02 +00:00
m := map [ string ] pkgrestore . Result {
2017-10-26 15:24:16 +00:00
"warnings" : restoreWarnings ,
"errors" : restoreErrors ,
}
2019-04-09 21:17:28 +00:00
if err := putResults ( restore , m , info . backupStore , c . logger ) ; err != nil {
c . logger . WithError ( err ) . Error ( "Error uploading restore results to backup storage" )
}
return nil
}
2019-04-16 18:57:02 +00:00
func putResults ( restore * api . Restore , results map [ string ] pkgrestore . Result , backupStore persistence . BackupStore , log logrus . FieldLogger ) error {
2019-04-09 21:17:28 +00:00
buf := new ( bytes . Buffer )
gzw := gzip . NewWriter ( buf )
defer gzw . Close ( )
2017-10-26 15:24:16 +00:00
2019-04-09 21:17:28 +00:00
if err := json . NewEncoder ( gzw ) . Encode ( results ) ; err != nil {
return errors . Wrap ( err , "error encoding restore results to JSON" )
2017-10-26 15:24:16 +00:00
}
2019-04-09 21:17:28 +00:00
if err := gzw . Close ( ) ; err != nil {
return errors . Wrap ( err , "error closing gzip writer" )
2017-11-14 19:39:42 +00:00
}
2019-04-09 21:17:28 +00:00
if err := backupStore . PutRestoreResults ( restore . Spec . BackupName , restore . Name , buf ) ; err != nil {
return err
2017-10-26 15:24:16 +00:00
}
2019-04-09 21:17:28 +00:00
return nil
2017-08-02 17:27:17 +00:00
}
2019-04-09 21:17:28 +00:00
func downloadToTempFile ( backupName string , backupStore persistence . BackupStore , logger logrus . FieldLogger ) ( * os . File , error ) {
2018-08-20 23:29:54 +00:00
readCloser , err := backupStore . GetBackupContents ( backupName )
2017-08-02 17:27:17 +00:00
if err != nil {
return nil , err
}
defer readCloser . Close ( )
file , err := ioutil . TempFile ( "" , backupName )
if err != nil {
2017-09-14 21:27:31 +00:00
return nil , errors . Wrap ( err , "error creating Backup temp file" )
2017-08-02 17:27:17 +00:00
}
n , err := io . Copy ( file , readCloser )
if err != nil {
2021-06-02 20:41:50 +00:00
//Temporary file has been created if we go here. And some problems occurs such as network interruption and
//so on. So we close and remove temporary file first to prevent residual file.
closeAndRemoveFile ( file , logger )
2017-09-14 21:27:31 +00:00
return nil , errors . Wrap ( err , "error copying Backup to temp file" )
2017-08-02 17:27:17 +00:00
}
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log := logger . WithField ( "backup" , backupName )
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log . WithFields ( logrus . Fields {
2017-09-14 21:27:31 +00:00
"fileName" : file . Name ( ) ,
"bytes" : n ,
} ) . Debug ( "Copied Backup to file" )
2017-08-02 17:27:17 +00:00
if _ , err := file . Seek ( 0 , 0 ) ; err != nil {
2021-06-02 20:41:50 +00:00
closeAndRemoveFile ( file , logger )
2017-09-14 21:27:31 +00:00
return nil , errors . Wrap ( err , "error resetting Backup file offset" )
2017-08-02 17:27:17 +00:00
}
return file , nil
}
2017-12-11 22:10:52 +00:00
2019-01-25 03:33:07 +00:00
func patchRestore ( original , updated * api . Restore , client velerov1client . RestoresGetter ) ( * api . Restore , error ) {
2017-12-11 22:10:52 +00:00
origBytes , err := json . Marshal ( original )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling original restore" )
}
updatedBytes , err := json . Marshal ( updated )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling updated restore" )
}
2018-05-14 21:34:24 +00:00
patchBytes , err := jsonpatch . CreateMergePatch ( origBytes , updatedBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
2018-05-14 21:34:24 +00:00
return nil , errors . Wrap ( err , "error creating json merge patch for restore" )
2017-12-11 22:10:52 +00:00
}
k8s 1.18 import (#2651)
* k8s 1.18 import wip
backup, cmd, controller, generated, restic, restore, serverstatusrequest, test and util
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go mod tidy
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* add changelog file
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go fmt
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* update code-generator and controller-gen in CI
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* checkout proper code-generator version, regen
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix remaining calls
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* regenerate CRDs with ./hack/update-generated-crd-code.sh
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use existing context in restic and server
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix test cases by resetting resource version
also use main library go context, not golang.org/x/net/context, in pkg/restore/restore.go
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* clarify changelog message
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use github.com/kubernetes-csi/external-snapshotter/v2@v2.2.0-rc1
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* run 'go mod tidy' to remove old external-snapshotter version
Signed-off-by: Andrew Lavery <laverya@umich.edu>
2020-07-16 16:21:37 +00:00
res , err := client . Restores ( original . Namespace ) . Patch ( context . TODO ( ) , original . Name , types . MergePatchType , patchBytes , metav1 . PatchOptions { } )
2017-12-11 22:10:52 +00:00
if err != nil {
return nil , errors . Wrap ( err , "error patching restore" )
}
return res , nil
}
2019-04-09 21:17:28 +00:00
type restoreLogger struct {
logrus . FieldLogger
file * os . File
w * gzip . Writer
}
2019-07-30 23:29:34 +00:00
func newRestoreLogger ( restore * api . Restore , baseLogger logrus . FieldLogger , logLevel logrus . Level , logFormat logging . Format ) ( * restoreLogger , error ) {
2019-04-09 21:17:28 +00:00
file , err := ioutil . TempFile ( "" , "" )
if err != nil {
return nil , errors . Wrap ( err , "error creating temp file" )
}
w := gzip . NewWriter ( file )
2019-07-30 23:29:34 +00:00
logger := logging . DefaultLogger ( logLevel , logFormat )
2019-04-09 21:17:28 +00:00
logger . Out = io . MultiWriter ( os . Stdout , w )
return & restoreLogger {
FieldLogger : logger . WithField ( "restore" , kubeutil . NamespaceAndName ( restore ) ) ,
file : file ,
w : w ,
} , nil
}
// done stops the restoreLogger from being able to be written to, and returns
// an io.Reader for getting the content of the logger. Any attempts to use
// restoreLogger to log after calling done will panic.
func ( l * restoreLogger ) done ( log logrus . FieldLogger ) ( io . Reader , error ) {
l . FieldLogger = nil
if err := l . w . Close ( ) ; err != nil {
log . WithError ( errors . WithStack ( err ) ) . Error ( "error closing gzip writer" )
}
if _ , err := l . file . Seek ( 0 , 0 ) ; err != nil {
return nil , errors . Wrap ( err , "error resetting log file offset to 0" )
}
return l . file , nil
}
// closeAndRemove removes the logger's underlying temporary storage. This
// method should be called when all logging and reading from the logger is
// complete.
func ( l * restoreLogger ) closeAndRemove ( log logrus . FieldLogger ) {
closeAndRemoveFile ( l . file , log )
}