2017-08-02 17:27:17 +00:00
/ *
2019-04-26 16:14:26 +00:00
Copyright 2017 , 2019 the Velero contributors .
2017-08-02 17:27:17 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package controller
import (
"bytes"
2018-05-13 13:28:09 +00:00
"compress/gzip"
2017-12-11 22:10:52 +00:00
"encoding/json"
2017-08-02 17:27:17 +00:00
"fmt"
2017-12-19 17:16:39 +00:00
"io"
2017-08-02 17:27:17 +00:00
"io/ioutil"
"os"
"time"
2018-05-14 21:34:24 +00:00
jsonpatch "github.com/evanphx/json-patch"
2017-09-14 21:27:31 +00:00
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
2018-10-24 00:38:08 +00:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2017-08-02 17:27:17 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2018-10-26 15:32:46 +00:00
"k8s.io/apimachinery/pkg/labels"
2017-12-11 22:10:52 +00:00
"k8s.io/apimachinery/pkg/types"
2017-08-02 17:27:17 +00:00
"k8s.io/apimachinery/pkg/util/clock"
2017-12-19 17:16:39 +00:00
kerrors "k8s.io/apimachinery/pkg/util/errors"
2017-08-02 17:27:17 +00:00
"k8s.io/client-go/tools/cache"
2020-03-12 21:01:14 +00:00
snapshotv1beta1api "github.com/kubernetes-csi/external-snapshotter/v2/pkg/apis/volumesnapshot/v1beta1"
2020-03-12 17:32:58 +00:00
snapshotv1beta1listers "github.com/kubernetes-csi/external-snapshotter/v2/pkg/client/listers/volumesnapshot/v1beta1"
2020-03-04 19:02:47 +00:00
2019-09-30 21:26:56 +00:00
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
pkgbackup "github.com/vmware-tanzu/velero/pkg/backup"
2020-03-19 16:18:05 +00:00
"github.com/vmware-tanzu/velero/pkg/discovery"
2020-03-03 20:57:15 +00:00
"github.com/vmware-tanzu/velero/pkg/features"
2019-09-30 21:26:56 +00:00
velerov1client "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned/typed/velero/v1"
2020-02-25 20:01:24 +00:00
velerov1informers "github.com/vmware-tanzu/velero/pkg/generated/informers/externalversions/velero/v1"
velerov1listers "github.com/vmware-tanzu/velero/pkg/generated/listers/velero/v1"
2019-09-30 21:26:56 +00:00
"github.com/vmware-tanzu/velero/pkg/label"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/persistence"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
2020-04-24 16:46:20 +00:00
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
2019-09-30 21:26:56 +00:00
"github.com/vmware-tanzu/velero/pkg/util/collections"
"github.com/vmware-tanzu/velero/pkg/util/encode"
kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
"github.com/vmware-tanzu/velero/pkg/volume"
2017-08-02 17:27:17 +00:00
)
type backupController struct {
2018-08-29 19:52:09 +00:00
* genericController
2020-03-12 17:32:58 +00:00
discoveryHelper discovery . Helper
backupper pkgbackup . Backupper
lister velerov1listers . BackupLister
client velerov1client . BackupsGetter
clock clock . Clock
backupLogLevel logrus . Level
newPluginManager func ( logrus . FieldLogger ) clientmgmt . Manager
backupTracker BackupTracker
backupLocationLister velerov1listers . BackupStorageLocationLister
defaultBackupLocation string
defaultBackupTTL time . Duration
snapshotLocationLister velerov1listers . VolumeSnapshotLocationLister
defaultSnapshotLocations map [ string ] string
metrics * metrics . ServerMetrics
newBackupStore func ( * velerov1api . BackupStorageLocation , persistence . ObjectStoreGetter , logrus . FieldLogger ) ( persistence . BackupStore , error )
formatFlag logging . Format
volumeSnapshotLister snapshotv1beta1listers . VolumeSnapshotLister
volumeSnapshotContentLister snapshotv1beta1listers . VolumeSnapshotContentLister
2017-08-02 17:27:17 +00:00
}
func NewBackupController (
2020-02-25 20:01:24 +00:00
backupInformer velerov1informers . BackupInformer ,
2019-01-25 03:33:07 +00:00
client velerov1client . BackupsGetter ,
2020-03-19 16:18:05 +00:00
discoveryHelper discovery . Helper ,
2018-09-26 22:18:45 +00:00
backupper pkgbackup . Backupper ,
2017-12-11 22:10:52 +00:00
logger logrus . FieldLogger ,
2018-08-29 19:52:09 +00:00
backupLogLevel logrus . Level ,
2019-03-15 18:32:11 +00:00
newPluginManager func ( logrus . FieldLogger ) clientmgmt . Manager ,
2018-04-06 17:08:39 +00:00
backupTracker BackupTracker ,
2020-02-25 20:01:24 +00:00
backupLocationLister velerov1listers . BackupStorageLocationLister ,
2018-08-16 22:41:59 +00:00
defaultBackupLocation string ,
2019-04-04 21:59:13 +00:00
defaultBackupTTL time . Duration ,
2020-02-25 20:01:24 +00:00
volumeSnapshotLocationLister velerov1listers . VolumeSnapshotLocationLister ,
2018-10-26 15:32:46 +00:00
defaultSnapshotLocations map [ string ] string ,
2018-06-06 21:35:06 +00:00
metrics * metrics . ServerMetrics ,
2019-07-30 23:29:34 +00:00
formatFlag logging . Format ,
2020-03-12 17:32:58 +00:00
volumeSnapshotLister snapshotv1beta1listers . VolumeSnapshotLister ,
volumeSnapshotContentLister snapshotv1beta1listers . VolumeSnapshotContentLister ,
2017-08-02 17:27:17 +00:00
) Interface {
c := & backupController {
2020-03-12 17:32:58 +00:00
genericController : newGenericController ( "backup" , logger ) ,
discoveryHelper : discoveryHelper ,
backupper : backupper ,
lister : backupInformer . Lister ( ) ,
client : client ,
clock : & clock . RealClock { } ,
backupLogLevel : backupLogLevel ,
newPluginManager : newPluginManager ,
backupTracker : backupTracker ,
backupLocationLister : backupLocationLister ,
defaultBackupLocation : defaultBackupLocation ,
defaultBackupTTL : defaultBackupTTL ,
snapshotLocationLister : volumeSnapshotLocationLister ,
defaultSnapshotLocations : defaultSnapshotLocations ,
metrics : metrics ,
formatFlag : formatFlag ,
volumeSnapshotLister : volumeSnapshotLister ,
volumeSnapshotContentLister : volumeSnapshotContentLister ,
newBackupStore : persistence . NewObjectBackupStore ,
2017-08-02 17:27:17 +00:00
}
c . syncHandler = c . processBackup
2019-04-05 02:11:53 +00:00
c . resyncFunc = c . resync
c . resyncPeriod = time . Minute
2017-08-02 17:27:17 +00:00
backupInformer . Informer ( ) . AddEventHandler (
cache . ResourceEventHandlerFuncs {
AddFunc : func ( obj interface { } ) {
2019-01-25 03:33:07 +00:00
backup := obj . ( * velerov1api . Backup )
2017-08-02 17:27:17 +00:00
switch backup . Status . Phase {
2019-01-25 03:33:07 +00:00
case "" , velerov1api . BackupPhaseNew :
2017-08-02 17:27:17 +00:00
// only process new backups
default :
2017-09-14 21:27:31 +00:00
c . logger . WithFields ( logrus . Fields {
"backup" : kubeutil . NamespaceAndName ( backup ) ,
"phase" : backup . Status . Phase ,
} ) . Debug ( "Backup is not new, skipping" )
2017-08-02 17:27:17 +00:00
return
}
key , err := cache . MetaNamespaceKeyFunc ( backup )
if err != nil {
2017-09-14 21:27:31 +00:00
c . logger . WithError ( err ) . WithField ( "backup" , backup ) . Error ( "Error creating queue key, item not added to queue" )
2017-08-02 17:27:17 +00:00
return
}
c . queue . Add ( key )
} ,
} ,
)
return c
}
2019-04-05 02:11:53 +00:00
func ( c * backupController ) resync ( ) {
2020-01-14 21:11:21 +00:00
// recompute backup_total metric
2019-04-05 02:11:53 +00:00
backups , err := c . lister . List ( labels . Everything ( ) )
if err != nil {
c . logger . Error ( err , "Error computing backup_total metric" )
} else {
c . metrics . SetBackupTotal ( int64 ( len ( backups ) ) )
}
2020-01-14 21:11:21 +00:00
// recompute backup_last_successful_timestamp metric for each
// schedule (including the empty schedule, i.e. ad-hoc backups)
2020-01-21 20:21:28 +00:00
for schedule , timestamp := range getLastSuccessBySchedule ( backups ) {
c . metrics . SetBackupLastSuccessfulTimestamp ( schedule , timestamp )
}
}
// getLastSuccessBySchedule finds the most recent completed backup for each schedule
// and returns a map of schedule name -> completion time of the most recent completed
// backup. This map includes an entry for ad-hoc/non-scheduled backups, where the key
// is the empty string.
func getLastSuccessBySchedule ( backups [ ] * velerov1api . Backup ) map [ string ] time . Time {
2020-01-14 21:11:21 +00:00
lastSuccessBySchedule := map [ string ] time . Time { }
for _ , backup := range backups {
if backup . Status . Phase != velerov1api . BackupPhaseCompleted {
continue
}
if backup . Status . CompletionTimestamp == nil {
continue
}
schedule := backup . Labels [ velerov1api . ScheduleNameLabel ]
timestamp := backup . Status . CompletionTimestamp . Time
if timestamp . After ( lastSuccessBySchedule [ schedule ] ) {
lastSuccessBySchedule [ schedule ] = timestamp
}
}
2020-01-21 20:21:28 +00:00
return lastSuccessBySchedule
2019-04-05 02:11:53 +00:00
}
2018-08-29 19:52:09 +00:00
func ( c * backupController ) processBackup ( key string ) error {
log := c . logger . WithField ( "key" , key )
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running processBackup" )
2017-08-02 17:27:17 +00:00
ns , name , err := cache . SplitMetaNamespaceKey ( key )
if err != nil {
2019-04-23 23:19:00 +00:00
log . WithError ( err ) . Errorf ( "error splitting key" )
return nil
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Getting backup" )
2018-09-26 22:18:45 +00:00
original , err := c . lister . Backups ( ns ) . Get ( name )
2018-06-29 17:54:44 +00:00
if apierrors . IsNotFound ( err ) {
2019-04-23 23:19:00 +00:00
log . Debugf ( "backup %s not found" , name )
2018-06-29 17:54:44 +00:00
return nil
}
2017-08-02 17:27:17 +00:00
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrap ( err , "error getting backup" )
2017-08-02 17:27:17 +00:00
}
2017-12-19 17:16:39 +00:00
// Double-check we have the correct phase. In the unlikely event that multiple controller
// instances are running, it's possible for controller A to succeed in changing the phase to
// InProgress, while controller B's attempt to patch the phase fails. When controller B
// reprocesses the same backup, it will either show up as New (informer hasn't seen the update
// yet) or as InProgress. In the former case, the patch attempt will fail again, until the
// informer sees the update. In the latter case, after the informer has seen the update to
// InProgress, we still need this check so we can return nil to indicate we've finished processing
// this key (even though it was a no-op).
2018-09-26 22:18:45 +00:00
switch original . Status . Phase {
2019-01-25 03:33:07 +00:00
case "" , velerov1api . BackupPhaseNew :
2017-08-02 17:27:17 +00:00
// only process new backups
default :
return nil
}
2018-09-26 22:18:45 +00:00
log . Debug ( "Preparing backup request" )
request := c . prepareBackupRequest ( original )
2017-08-02 17:27:17 +00:00
2018-09-26 22:18:45 +00:00
if len ( request . Status . ValidationErrors ) > 0 {
2019-01-25 03:33:07 +00:00
request . Status . Phase = velerov1api . BackupPhaseFailedValidation
2017-08-02 17:27:17 +00:00
} else {
2019-01-25 03:33:07 +00:00
request . Status . Phase = velerov1api . BackupPhaseInProgress
2019-10-14 16:20:28 +00:00
request . Status . StartTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2017-08-02 17:27:17 +00:00
}
// update status
2018-09-26 22:18:45 +00:00
updatedBackup , err := patchBackup ( original , request . Backup , c . client )
2017-08-02 17:27:17 +00:00
if err != nil {
2018-09-26 22:18:45 +00:00
return errors . Wrapf ( err , "error updating Backup status to %s" , request . Status . Phase )
2017-08-02 17:27:17 +00:00
}
2017-12-11 22:10:52 +00:00
// store ref to just-updated item for creating patch
original = updatedBackup
2018-09-26 22:18:45 +00:00
request . Backup = updatedBackup . DeepCopy ( )
2017-08-02 17:27:17 +00:00
2019-01-25 03:33:07 +00:00
if request . Status . Phase == velerov1api . BackupPhaseFailedValidation {
2017-08-02 17:27:17 +00:00
return nil
}
2018-09-26 22:18:45 +00:00
c . backupTracker . Add ( request . Namespace , request . Name )
defer c . backupTracker . Delete ( request . Namespace , request . Name )
2018-04-06 17:08:39 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running backup" )
2019-04-26 16:14:26 +00:00
2019-01-25 03:33:07 +00:00
backupScheduleName := request . GetLabels ( ) [ velerov1api . ScheduleNameLabel ]
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupAttempt ( backupScheduleName )
2018-06-06 21:35:06 +00:00
2019-04-26 16:14:26 +00:00
// execution & upload of backup
2018-09-26 22:18:45 +00:00
if err := c . runBackup ( request ) ; err != nil {
2019-04-26 16:14:26 +00:00
// even though runBackup sets the backup's phase prior
// to uploading artifacts to object storage, we have to
// check for an error again here and update the phase if
// one is found, because there could've been an error
// while uploading artifacts to object storage, which would
// result in the backup being Failed.
2018-08-29 19:52:09 +00:00
log . WithError ( err ) . Error ( "backup failed" )
2019-01-25 03:33:07 +00:00
request . Status . Phase = velerov1api . BackupPhaseFailed
2019-04-26 16:14:26 +00:00
}
switch request . Status . Phase {
case velerov1api . BackupPhaseCompleted :
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupSuccess ( backupScheduleName )
2019-04-26 16:14:26 +00:00
case velerov1api . BackupPhasePartiallyFailed :
c . metrics . RegisterBackupPartialFailure ( backupScheduleName )
case velerov1api . BackupPhaseFailed :
c . metrics . RegisterBackupFailed ( backupScheduleName )
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Updating backup's final status" )
2018-09-26 22:18:45 +00:00
if _ , err := patchBackup ( original , request . Backup , c . client ) ; err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( err ) . Error ( "error updating backup's final status" )
2017-08-02 17:27:17 +00:00
}
return nil
}
2019-01-25 03:33:07 +00:00
func patchBackup ( original , updated * velerov1api . Backup , client velerov1client . BackupsGetter ) ( * velerov1api . Backup , error ) {
2017-12-11 22:10:52 +00:00
origBytes , err := json . Marshal ( original )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling original backup" )
}
updatedBytes , err := json . Marshal ( updated )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling updated backup" )
}
2018-05-14 21:34:24 +00:00
patchBytes , err := jsonpatch . CreateMergePatch ( origBytes , updatedBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
2018-05-14 21:34:24 +00:00
return nil , errors . Wrap ( err , "error creating json merge patch for backup" )
2017-12-11 22:10:52 +00:00
}
2017-12-22 14:43:44 +00:00
res , err := client . Backups ( original . Namespace ) . Patch ( original . Name , types . MergePatchType , patchBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
return nil , errors . Wrap ( err , "error patching backup" )
}
return res , nil
}
2019-01-25 03:33:07 +00:00
func ( c * backupController ) prepareBackupRequest ( backup * velerov1api . Backup ) * pkgbackup . Request {
2018-09-26 22:18:45 +00:00
request := & pkgbackup . Request {
Backup : backup . DeepCopy ( ) , // don't modify items in the cache
2017-08-02 17:27:17 +00:00
}
2020-05-01 19:54:57 +00:00
// set backup major version - deprecated, use Status.FormatVersion
2018-12-05 22:56:53 +00:00
request . Status . Version = pkgbackup . BackupVersion
2018-09-26 22:18:45 +00:00
2020-05-01 19:54:57 +00:00
// set backup major, minor, and patch version
request . Status . FormatVersion = pkgbackup . BackupFormatVersion
2019-04-04 21:59:13 +00:00
if request . Spec . TTL . Duration == 0 {
// set default backup TTL
request . Spec . TTL . Duration = c . defaultBackupTTL
2017-08-02 17:27:17 +00:00
}
2019-04-04 21:59:13 +00:00
// calculate expiration
2019-10-14 16:20:28 +00:00
request . Status . Expiration = & metav1 . Time { Time : c . clock . Now ( ) . Add ( request . Spec . TTL . Duration ) }
2019-04-04 21:59:13 +00:00
2018-09-26 22:18:45 +00:00
// default storage location if not specified
if request . Spec . StorageLocation == "" {
request . Spec . StorageLocation = c . defaultBackupLocation
2018-08-16 22:41:59 +00:00
}
2018-08-21 23:52:49 +00:00
// add the storage location as a label for easy filtering later.
2018-09-26 22:18:45 +00:00
if request . Labels == nil {
request . Labels = make ( map [ string ] string )
2018-08-21 23:52:49 +00:00
}
2019-04-23 23:58:59 +00:00
request . Labels [ velerov1api . StorageLocationLabel ] = label . GetValidName ( request . Spec . StorageLocation )
2018-08-21 23:52:49 +00:00
2020-03-19 16:18:05 +00:00
// Getting all information of cluster version - useful for future skip-level migration
2020-04-13 21:02:12 +00:00
if request . Annotations == nil {
request . Annotations = make ( map [ string ] string )
}
request . Annotations [ velerov1api . SourceClusterK8sGitVersionAnnotation ] = c . discoveryHelper . ServerVersion ( ) . String ( )
request . Annotations [ velerov1api . SourceClusterK8sMajorVersionAnnotation ] = c . discoveryHelper . ServerVersion ( ) . Major
request . Annotations [ velerov1api . SourceClusterK8sMinorVersionAnnotation ] = c . discoveryHelper . ServerVersion ( ) . Minor
2020-03-19 16:18:05 +00:00
2019-04-23 23:19:00 +00:00
// validate the included/excluded resources
2018-09-26 22:18:45 +00:00
for _ , err := range collections . ValidateIncludesExcludes ( request . Spec . IncludedResources , request . Spec . ExcludedResources ) {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded resource lists: %v" , err ) )
2018-08-16 22:41:59 +00:00
}
2019-04-23 23:19:00 +00:00
// validate the included/excluded namespaces
2018-09-26 22:18:45 +00:00
for _ , err := range collections . ValidateIncludesExcludes ( request . Spec . IncludedNamespaces , request . Spec . ExcludedNamespaces ) {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded namespace lists: %v" , err ) )
}
// validate the storage location, and store the BackupStorageLocation API obj on the request
if storageLocation , err := c . backupLocationLister . BackupStorageLocations ( request . Namespace ) . Get ( request . Spec . StorageLocation ) ; err != nil {
2018-10-24 00:38:08 +00:00
if apierrors . IsNotFound ( err ) {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "a BackupStorageLocation CRD with the name specified in the backup spec needs to be created before this backup can be executed. Error: %v" , err ) )
} else {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "error getting backup storage location: %v" , err ) )
}
2018-09-26 22:18:45 +00:00
} else {
request . StorageLocation = storageLocation
2019-05-29 18:21:25 +00:00
if request . StorageLocation . Spec . AccessMode == velerov1api . BackupStorageLocationAccessModeReadOnly {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors ,
fmt . Sprintf ( "backup can't be created because backup storage location %s is currently in read-only mode" , request . StorageLocation . Name ) )
}
2018-09-26 22:18:45 +00:00
}
// validate and get the backup's VolumeSnapshotLocations, and store the
// VolumeSnapshotLocation API objs on the request
if locs , errs := c . validateAndGetSnapshotLocations ( request . Backup ) ; len ( errs ) > 0 {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , errs ... )
} else {
request . Spec . VolumeSnapshotLocations = nil
for _ , loc := range locs {
request . Spec . VolumeSnapshotLocations = append ( request . Spec . VolumeSnapshotLocations , loc . Name )
request . SnapshotLocations = append ( request . SnapshotLocations , loc )
}
}
return request
2017-08-02 17:27:17 +00:00
}
2018-09-26 22:18:45 +00:00
// validateAndGetSnapshotLocations gets a collection of VolumeSnapshotLocation objects that
// this backup will use (returned as a map of provider name -> VSL), and ensures:
// - each location name in .spec.volumeSnapshotLocations exists as a location
// - exactly 1 location per provider
// - a given provider's default location name is added to .spec.volumeSnapshotLocations if one
2018-10-26 15:32:46 +00:00
// is not explicitly specified for the provider (if there's only one location for the provider,
// it will automatically be used)
2020-04-24 16:46:20 +00:00
// if backup has snapshotVolume disabled then it returns empty VSL
2019-01-25 03:33:07 +00:00
func ( c * backupController ) validateAndGetSnapshotLocations ( backup * velerov1api . Backup ) ( map [ string ] * velerov1api . VolumeSnapshotLocation , [ ] string ) {
2018-09-26 22:18:45 +00:00
errors := [ ] string { }
2019-01-25 03:33:07 +00:00
providerLocations := make ( map [ string ] * velerov1api . VolumeSnapshotLocation )
2018-09-26 22:18:45 +00:00
2020-04-24 16:46:20 +00:00
// if snapshotVolume is set to false then we don't need to validate volumesnapshotlocation
if boolptr . IsSetToFalse ( backup . Spec . SnapshotVolumes ) {
return nil , nil
}
2018-09-26 22:18:45 +00:00
for _ , locationName := range backup . Spec . VolumeSnapshotLocations {
2018-09-25 14:51:28 +00:00
// validate each locationName exists as a VolumeSnapshotLocation
2018-09-26 22:18:45 +00:00
location , err := c . snapshotLocationLister . VolumeSnapshotLocations ( backup . Namespace ) . Get ( locationName )
2018-09-25 14:51:28 +00:00
if err != nil {
2018-10-24 00:38:08 +00:00
if apierrors . IsNotFound ( err ) {
errors = append ( errors , fmt . Sprintf ( "a VolumeSnapshotLocation CRD for the location %s with the name specified in the backup spec needs to be created before this snapshot can be executed. Error: %v" , locationName , err ) )
} else {
errors = append ( errors , fmt . Sprintf ( "error getting volume snapshot location named %s: %v" , locationName , err ) )
}
2018-09-25 14:51:28 +00:00
continue
}
2018-09-26 22:18:45 +00:00
// ensure we end up with exactly 1 location *per provider*
if providerLocation , ok := providerLocations [ location . Spec . Provider ] ; ok {
2018-09-25 14:51:28 +00:00
// if > 1 location name per provider as in ["aws-us-east-1" | "aws-us-west-1"] (same provider, multiple names)
2018-09-26 22:18:45 +00:00
if providerLocation . Name != locationName {
errors = append ( errors , fmt . Sprintf ( "more than one VolumeSnapshotLocation name specified for provider %s: %s; unexpected name was %s" , location . Spec . Provider , locationName , providerLocation . Name ) )
2018-09-25 14:51:28 +00:00
continue
}
} else {
// keep track of all valid existing locations, per provider
2018-09-26 22:18:45 +00:00
providerLocations [ location . Spec . Provider ] = location
2018-09-25 14:51:28 +00:00
}
}
if len ( errors ) > 0 {
2018-09-26 22:18:45 +00:00
return nil , errors
2018-09-25 14:51:28 +00:00
}
2018-10-26 15:32:46 +00:00
allLocations , err := c . snapshotLocationLister . VolumeSnapshotLocations ( backup . Namespace ) . List ( labels . Everything ( ) )
if err != nil {
errors = append ( errors , fmt . Sprintf ( "error listing volume snapshot locations: %v" , err ) )
return nil , errors
}
// build a map of provider->list of all locations for the provider
2019-01-25 03:33:07 +00:00
allProviderLocations := make ( map [ string ] [ ] * velerov1api . VolumeSnapshotLocation )
2018-10-26 15:32:46 +00:00
for i := range allLocations {
loc := allLocations [ i ]
allProviderLocations [ loc . Spec . Provider ] = append ( allProviderLocations [ loc . Spec . Provider ] , loc )
}
// go through each provider and make sure we have/can get a VSL
// for it
for provider , locations := range allProviderLocations {
if _ , ok := providerLocations [ provider ] ; ok {
// backup's spec had a location named for this provider
continue
}
if len ( locations ) > 1 {
// more than one possible location for the provider: check
// the defaults
defaultLocation := c . defaultSnapshotLocations [ provider ]
if defaultLocation == "" {
errors = append ( errors , fmt . Sprintf ( "provider %s has more than one possible volume snapshot location, and none were specified explicitly or as a default" , provider ) )
continue
}
location , err := c . snapshotLocationLister . VolumeSnapshotLocations ( backup . Namespace ) . Get ( defaultLocation )
if err != nil {
errors = append ( errors , fmt . Sprintf ( "error getting volume snapshot location named %s: %v" , defaultLocation , err ) )
continue
}
providerLocations [ provider ] = location
continue
2018-09-25 14:51:28 +00:00
}
2018-10-26 15:32:46 +00:00
// exactly one location for the provider: use it
providerLocations [ provider ] = locations [ 0 ]
}
if len ( errors ) > 0 {
return nil , errors
2018-09-25 14:51:28 +00:00
}
2018-09-26 22:18:45 +00:00
return providerLocations , nil
2018-09-25 14:51:28 +00:00
}
2019-04-26 16:14:26 +00:00
// runBackup runs and uploads a validated backup. Any error returned from this function
// causes the backup to be Failed; if no error is returned, the backup's status's Errors
// field is checked to see if the backup was a partial failure.
2018-09-26 22:18:45 +00:00
func ( c * backupController ) runBackup ( backup * pkgbackup . Request ) error {
2019-04-26 16:14:26 +00:00
c . logger . WithField ( "backup" , kubeutil . NamespaceAndName ( backup ) ) . Info ( "Setting up backup log" )
2017-08-11 15:05:56 +00:00
logFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
2017-12-19 17:16:39 +00:00
return errors . Wrap ( err , "error creating temp file for backup log" )
2017-08-11 15:05:56 +00:00
}
2018-05-13 13:28:09 +00:00
gzippedLogFile := gzip . NewWriter ( logFile )
// Assuming we successfully uploaded the log file, this will have already been closed below. It is safe to call
// close multiple times. If we get an error closing this, there's not really anything we can do about it.
defer gzippedLogFile . Close ( )
2018-08-29 19:52:09 +00:00
defer closeAndRemoveFile ( logFile , c . logger )
2018-05-13 13:28:09 +00:00
// Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the
// backup log failed for whatever reason.
2019-07-30 23:29:34 +00:00
logger := logging . DefaultLogger ( c . backupLogLevel , c . formatFlag )
2018-05-13 13:28:09 +00:00
logger . Out = io . MultiWriter ( os . Stdout , gzippedLogFile )
2019-04-26 16:14:26 +00:00
logCounter := logging . NewLogCounterHook ( )
logger . Hooks . Add ( logCounter )
2018-05-13 13:28:09 +00:00
2019-04-26 16:14:26 +00:00
backupLog := logger . WithField ( "backup" , kubeutil . NamespaceAndName ( backup ) )
backupLog . Info ( "Setting up backup temp file" )
2017-12-19 17:16:39 +00:00
backupFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
return errors . Wrap ( err , "error creating temp file for backup" )
}
2019-04-26 16:14:26 +00:00
defer closeAndRemoveFile ( backupFile , backupLog )
2017-08-02 17:27:17 +00:00
2019-04-26 16:14:26 +00:00
backupLog . Info ( "Setting up plugin manager" )
pluginManager := c . newPluginManager ( backupLog )
2018-08-20 23:29:54 +00:00
defer pluginManager . CleanupClients ( )
2019-04-26 16:14:26 +00:00
backupLog . Info ( "Getting backup item actions" )
2018-05-13 13:28:09 +00:00
actions , err := pluginManager . GetBackupItemActions ( )
if err != nil {
return err
}
2019-04-26 16:14:26 +00:00
backupLog . Info ( "Setting up backup store" )
backupStore , err := c . newBackupStore ( backup . StorageLocation , pluginManager , backupLog )
2017-11-15 02:35:02 +00:00
if err != nil {
return err
}
2019-04-24 17:54:43 +00:00
exists , err := backupStore . BackupExists ( backup . StorageLocation . Spec . StorageType . ObjectStorage . Bucket , backup . Name )
if exists || err != nil {
2019-04-23 23:19:00 +00:00
backup . Status . Phase = velerov1api . BackupPhaseFailed
2019-10-14 16:20:28 +00:00
backup . Status . CompletionTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2019-04-24 17:54:43 +00:00
if err != nil {
2019-04-24 21:18:24 +00:00
return errors . Wrapf ( err , "error checking if backup already exists in object storage" )
2019-04-24 17:54:43 +00:00
}
2019-04-24 21:18:24 +00:00
return errors . Errorf ( "backup already exists in object storage" )
2019-04-23 23:19:00 +00:00
}
2017-11-15 02:35:02 +00:00
2019-04-26 16:14:26 +00:00
var fatalErrs [ ] error
if err := c . backupper . Backup ( backupLog , backup , backupFile , actions , pluginManager ) ; err != nil {
fatalErrs = append ( fatalErrs , err )
2018-10-12 17:55:02 +00:00
}
2020-03-12 21:01:14 +00:00
// Empty slices here so that they can be passed in to the persistBackup call later, regardless of whether or not CSI's enabled.
// This way, we only make the Lister call if the feature flag's on.
var volumeSnapshots [ ] * snapshotv1beta1api . VolumeSnapshot
var volumeSnapshotContents [ ] * snapshotv1beta1api . VolumeSnapshotContent
2020-04-16 19:36:43 +00:00
if features . IsEnabled ( velerov1api . CSIFeatureFlag ) {
2020-05-07 18:56:13 +00:00
selector := label . NewSelectorForBackup ( backup . Name )
2020-03-12 21:01:14 +00:00
2020-04-16 19:28:29 +00:00
// Listers are wrapped in a nil check out of caution, since they may not be populated based on the
// EnableCSI feature flag. This is more to guard against programmer error, as they shouldn't be nil
// when EnableCSI is on.
if c . volumeSnapshotLister != nil {
volumeSnapshots , err = c . volumeSnapshotLister . List ( selector )
if err != nil {
backupLog . Error ( err )
}
2020-03-12 21:01:14 +00:00
}
2020-04-16 19:28:29 +00:00
if c . volumeSnapshotContentLister != nil {
2020-05-06 18:04:34 +00:00
volumeSnapshotContents , err = c . volumeSnapshotContentLister . List ( selector )
if err != nil {
backupLog . Error ( err )
2020-04-16 19:28:29 +00:00
}
2020-03-12 21:01:14 +00:00
}
}
2018-07-10 22:17:53 +00:00
// Mark completion timestamp before serializing and uploading.
// Otherwise, the JSON file in object storage has a CompletionTimestamp of 'null'.
2019-10-14 16:20:28 +00:00
backup . Status . CompletionTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2018-07-10 22:17:53 +00:00
2018-10-22 16:37:30 +00:00
backup . Status . VolumeSnapshotsAttempted = len ( backup . VolumeSnapshots )
for _ , snap := range backup . VolumeSnapshots {
if snap . Status . Phase == volume . SnapshotPhaseCompleted {
backup . Status . VolumeSnapshotsCompleted ++
}
}
2019-04-26 16:14:26 +00:00
recordBackupMetrics ( backupLog , backup . Backup , backupFile , c . metrics )
if err := gzippedLogFile . Close ( ) ; err != nil {
c . logger . WithError ( err ) . Error ( "error closing gzippedLogFile" )
}
backup . Status . Warnings = logCounter . GetCount ( logrus . WarnLevel )
backup . Status . Errors = logCounter . GetCount ( logrus . ErrorLevel )
// Assign finalize phase as close to end as possible so that any errors
// logged to backupLog are captured. This is done before uploading the
// artifacts to object storage so that the JSON representation of the
// backup in object storage has the terminal phase set.
switch {
case len ( fatalErrs ) > 0 :
backup . Status . Phase = velerov1api . BackupPhaseFailed
case logCounter . GetCount ( logrus . ErrorLevel ) > 0 :
backup . Status . Phase = velerov1api . BackupPhasePartiallyFailed
default :
backup . Status . Phase = velerov1api . BackupPhaseCompleted
}
2020-03-12 21:01:14 +00:00
if errs := persistBackup ( backup , backupFile , logFile , backupStore , c . logger , volumeSnapshots , volumeSnapshotContents ) ; len ( errs ) > 0 {
2019-04-26 16:14:26 +00:00
fatalErrs = append ( fatalErrs , errs ... )
}
2018-10-12 17:55:02 +00:00
2019-04-26 16:14:26 +00:00
c . logger . Info ( "Backup completed" )
2018-10-12 17:55:02 +00:00
2019-04-26 16:14:26 +00:00
// if we return a non-nil error, the calling function will update
// the backup's phase to Failed.
return kerrors . NewAggregate ( fatalErrs )
2018-10-12 17:55:02 +00:00
}
2019-04-26 16:14:26 +00:00
func recordBackupMetrics ( log logrus . FieldLogger , backup * velerov1api . Backup , backupFile * os . File , serverMetrics * metrics . ServerMetrics ) {
2019-01-25 03:33:07 +00:00
backupScheduleName := backup . GetLabels ( ) [ velerov1api . ScheduleNameLabel ]
2017-08-02 17:27:17 +00:00
2018-06-06 21:35:06 +00:00
var backupSizeBytes int64
if backupFileStat , err := backupFile . Stat ( ) ; err != nil {
2019-04-26 16:14:26 +00:00
log . WithError ( errors . WithStack ( err ) ) . Error ( "Error getting backup file info" )
2018-06-06 21:35:06 +00:00
} else {
backupSizeBytes = backupFileStat . Size ( )
}
2018-10-12 17:55:02 +00:00
serverMetrics . SetBackupTarballSizeBytesGauge ( backupScheduleName , backupSizeBytes )
2018-06-06 21:35:06 +00:00
2018-10-12 17:55:02 +00:00
backupDuration := backup . Status . CompletionTimestamp . Time . Sub ( backup . Status . StartTimestamp . Time )
backupDurationSeconds := float64 ( backupDuration / time . Second )
serverMetrics . RegisterBackupDuration ( backupScheduleName , backupDurationSeconds )
2018-10-23 21:04:45 +00:00
serverMetrics . RegisterVolumeSnapshotAttempts ( backupScheduleName , backup . Status . VolumeSnapshotsAttempted )
serverMetrics . RegisterVolumeSnapshotSuccesses ( backupScheduleName , backup . Status . VolumeSnapshotsCompleted )
serverMetrics . RegisterVolumeSnapshotFailures ( backupScheduleName , backup . Status . VolumeSnapshotsAttempted - backup . Status . VolumeSnapshotsCompleted )
2018-10-12 17:55:02 +00:00
}
2020-03-12 21:01:14 +00:00
func persistBackup ( backup * pkgbackup . Request ,
backupContents , backupLog * os . File ,
backupStore persistence . BackupStore ,
log logrus . FieldLogger ,
csiVolumeSnapshots [ ] * snapshotv1beta1api . VolumeSnapshot ,
2020-04-17 20:05:29 +00:00
csiVolumeSnapshotContents [ ] * snapshotv1beta1api . VolumeSnapshotContent ,
2020-03-12 21:01:14 +00:00
) [ ] error {
2020-04-17 22:26:41 +00:00
persistErrs := [ ] error { }
2018-10-12 17:55:02 +00:00
backupJSON := new ( bytes . Buffer )
2020-04-20 16:33:21 +00:00
if err := encode . EncodeTo ( backup . Backup , "json" , backupJSON ) ; err != nil {
persistErrs = append ( persistErrs , errors . Wrap ( err , "error encoding backup" ) )
2017-08-02 17:27:17 +00:00
}
2020-04-13 17:38:11 +00:00
// Velero-native volume snapshots (as opposed to CSI ones)
2020-04-17 22:26:41 +00:00
nativeVolumeSnapshots , errs := encodeToJSONGzip ( backup . VolumeSnapshots , "native volumesnapshots list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2018-10-15 20:22:00 +00:00
}
2018-06-20 18:08:07 +00:00
2020-04-17 22:26:41 +00:00
podVolumeBackups , errs := encodeToJSONGzip ( backup . PodVolumeBackups , "pod volume backups list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2019-07-24 19:51:20 +00:00
}
2020-04-17 22:26:41 +00:00
csiSnapshotJSON , errs := encodeToJSONGzip ( csiVolumeSnapshots , "csi volume snapshots list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2020-03-12 21:01:14 +00:00
}
2020-04-17 22:26:41 +00:00
csiSnapshotContentsJSON , errs := encodeToJSONGzip ( csiVolumeSnapshotContents , "csi volume snapshot contents list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2020-03-12 21:01:14 +00:00
}
2020-04-17 22:26:41 +00:00
backupResourceList , errs := encodeToJSONGzip ( backup . BackupResourceList ( ) , "backup resources list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2019-08-05 17:15:55 +00:00
}
2020-04-17 22:26:41 +00:00
if len ( persistErrs ) > 0 {
2018-10-12 17:55:02 +00:00
// Don't upload the JSON files or backup tarball if encoding to json fails.
backupJSON = nil
backupContents = nil
2020-03-12 21:01:14 +00:00
nativeVolumeSnapshots = nil
2019-08-05 17:15:55 +00:00
backupResourceList = nil
2020-03-12 21:01:14 +00:00
csiSnapshotJSON = nil
2020-04-17 20:05:29 +00:00
csiSnapshotContentsJSON = nil
2018-10-12 17:55:02 +00:00
}
2017-12-19 17:16:39 +00:00
2019-07-24 19:51:20 +00:00
backupInfo := persistence . BackupInfo {
2020-04-17 20:05:29 +00:00
Name : backup . Name ,
Metadata : backupJSON ,
Contents : backupContents ,
Log : backupLog ,
PodVolumeBackups : podVolumeBackups ,
VolumeSnapshots : nativeVolumeSnapshots ,
BackupResourceList : backupResourceList ,
CSIVolumeSnapshots : csiSnapshotJSON ,
CSIVolumeSnapshotContents : csiSnapshotContentsJSON ,
2019-07-24 19:51:20 +00:00
}
if err := backupStore . PutBackup ( backupInfo ) ; err != nil {
2020-04-17 22:26:41 +00:00
persistErrs = append ( persistErrs , err )
2018-10-12 17:55:02 +00:00
}
2020-04-17 22:26:41 +00:00
return persistErrs
2017-12-19 17:16:39 +00:00
}
func closeAndRemoveFile ( file * os . File , log logrus . FieldLogger ) {
if err := file . Close ( ) ; err != nil {
log . WithError ( err ) . WithField ( "file" , file . Name ( ) ) . Error ( "error closing file" )
2017-08-11 15:05:56 +00:00
}
2017-12-19 17:16:39 +00:00
if err := os . Remove ( file . Name ( ) ) ; err != nil {
log . WithError ( err ) . WithField ( "file" , file . Name ( ) ) . Error ( "error removing file" )
2017-08-02 17:27:17 +00:00
}
}
2020-04-13 17:38:11 +00:00
2020-04-16 19:42:57 +00:00
// encodeToJSONGzip takes arbitrary Go data and encodes it to GZip compressed JSON in a buffer, as well as a description of the data to put into an error should encoding fail.
2020-04-17 22:26:41 +00:00
func encodeToJSONGzip ( data interface { } , desc string ) ( * bytes . Buffer , [ ] error ) {
2020-04-13 17:38:11 +00:00
buf := new ( bytes . Buffer )
gzw := gzip . NewWriter ( buf )
2020-04-17 22:26:41 +00:00
// Since both encoding and closing the gzip writer could fail separately and both errors are useful,
// collect both errors to report back.
errs := [ ] error { }
2020-04-13 17:38:11 +00:00
if err := json . NewEncoder ( gzw ) . Encode ( data ) ; err != nil {
2020-04-17 22:26:41 +00:00
errs = append ( errs , errors . Wrapf ( err , "error encoding %s" , desc ) )
2020-04-13 17:38:11 +00:00
}
if err := gzw . Close ( ) ; err != nil {
2020-04-17 22:26:41 +00:00
errs = append ( errs , errors . Wrapf ( err , "error closing gzip writer for %s" , desc ) )
}
if len ( errs ) > 0 {
return nil , errs
2020-04-13 17:38:11 +00:00
}
return buf , nil
}