2017-08-02 17:27:17 +00:00
/ *
2021-09-03 15:03:35 +00:00
Copyright The Velero Contributors .
2017-08-02 17:27:17 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package controller
import (
"bytes"
2018-05-13 13:28:09 +00:00
"compress/gzip"
2020-06-24 16:55:18 +00:00
"context"
2017-12-11 22:10:52 +00:00
"encoding/json"
2017-08-02 17:27:17 +00:00
"fmt"
2017-12-19 17:16:39 +00:00
"io"
2017-08-02 17:27:17 +00:00
"io/ioutil"
"os"
2022-04-21 09:17:28 +00:00
"sync"
2017-08-02 17:27:17 +00:00
"time"
2022-05-02 08:37:28 +00:00
"github.com/apex/log"
2018-05-14 21:34:24 +00:00
jsonpatch "github.com/evanphx/json-patch"
2017-09-14 21:27:31 +00:00
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
2022-05-02 08:37:28 +00:00
"golang.org/x/sync/errgroup"
2022-04-21 09:17:28 +00:00
v1 "k8s.io/api/core/v1"
2018-10-24 00:38:08 +00:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2017-08-02 17:27:17 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2018-10-26 15:32:46 +00:00
"k8s.io/apimachinery/pkg/labels"
2017-12-11 22:10:52 +00:00
"k8s.io/apimachinery/pkg/types"
2017-08-02 17:27:17 +00:00
"k8s.io/apimachinery/pkg/util/clock"
2017-12-19 17:16:39 +00:00
kerrors "k8s.io/apimachinery/pkg/util/errors"
2022-03-31 10:09:35 +00:00
"k8s.io/apimachinery/pkg/util/sets"
2022-05-02 08:37:28 +00:00
"k8s.io/apimachinery/pkg/util/wait"
2017-08-02 17:27:17 +00:00
"k8s.io/client-go/tools/cache"
2022-03-31 10:09:35 +00:00
"github.com/vmware-tanzu/velero/pkg/util/csi"
2022-03-31 14:47:22 +00:00
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
2022-05-02 08:37:28 +00:00
snapshotterClientSet "github.com/kubernetes-csi/external-snapshotter/client/v4/clientset/versioned"
2022-03-31 14:47:22 +00:00
snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1"
2020-03-04 19:02:47 +00:00
2020-12-08 21:38:29 +00:00
"github.com/vmware-tanzu/velero/internal/storage"
2019-09-30 21:26:56 +00:00
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
pkgbackup "github.com/vmware-tanzu/velero/pkg/backup"
2020-03-19 16:18:05 +00:00
"github.com/vmware-tanzu/velero/pkg/discovery"
2020-03-03 20:57:15 +00:00
"github.com/vmware-tanzu/velero/pkg/features"
2019-09-30 21:26:56 +00:00
velerov1client "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned/typed/velero/v1"
2020-02-25 20:01:24 +00:00
velerov1informers "github.com/vmware-tanzu/velero/pkg/generated/informers/externalversions/velero/v1"
velerov1listers "github.com/vmware-tanzu/velero/pkg/generated/listers/velero/v1"
2019-09-30 21:26:56 +00:00
"github.com/vmware-tanzu/velero/pkg/label"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/persistence"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
2021-12-10 17:53:47 +00:00
"github.com/vmware-tanzu/velero/pkg/plugin/framework"
2020-04-24 16:46:20 +00:00
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
2019-09-30 21:26:56 +00:00
"github.com/vmware-tanzu/velero/pkg/util/collections"
"github.com/vmware-tanzu/velero/pkg/util/encode"
kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
"github.com/vmware-tanzu/velero/pkg/volume"
2020-06-24 16:55:18 +00:00
2022-08-03 12:00:49 +00:00
corev1api "k8s.io/api/core/v1"
2020-06-24 16:55:18 +00:00
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
2017-08-02 17:27:17 +00:00
)
type backupController struct {
2018-08-29 19:52:09 +00:00
* genericController
2020-03-12 17:32:58 +00:00
discoveryHelper discovery . Helper
backupper pkgbackup . Backupper
lister velerov1listers . BackupLister
client velerov1client . BackupsGetter
2020-06-24 16:55:18 +00:00
kbClient kbclient . Client
2020-03-12 17:32:58 +00:00
clock clock . Clock
backupLogLevel logrus . Level
newPluginManager func ( logrus . FieldLogger ) clientmgmt . Manager
backupTracker BackupTracker
defaultBackupLocation string
2020-06-15 22:26:44 +00:00
defaultVolumesToRestic bool
2020-03-12 17:32:58 +00:00
defaultBackupTTL time . Duration
2022-07-06 07:51:42 +00:00
defaultCSISnapshotTimeout time . Duration
2020-03-12 17:32:58 +00:00
snapshotLocationLister velerov1listers . VolumeSnapshotLocationLister
defaultSnapshotLocations map [ string ] string
metrics * metrics . ServerMetrics
2021-02-08 18:04:08 +00:00
backupStoreGetter persistence . ObjectBackupStoreGetter
2020-03-12 17:32:58 +00:00
formatFlag logging . Format
2022-03-31 14:47:22 +00:00
volumeSnapshotLister snapshotv1listers . VolumeSnapshotLister
2022-05-02 08:37:28 +00:00
volumeSnapshotClient * snapshotterClientSet . Clientset
2022-03-31 14:47:22 +00:00
volumeSnapshotContentLister snapshotv1listers . VolumeSnapshotContentLister
2022-03-31 10:09:35 +00:00
volumeSnapshotClassLister snapshotv1listers . VolumeSnapshotClassLister
2017-08-02 17:27:17 +00:00
}
func NewBackupController (
2020-02-25 20:01:24 +00:00
backupInformer velerov1informers . BackupInformer ,
2019-01-25 03:33:07 +00:00
client velerov1client . BackupsGetter ,
2020-03-19 16:18:05 +00:00
discoveryHelper discovery . Helper ,
2018-09-26 22:18:45 +00:00
backupper pkgbackup . Backupper ,
2017-12-11 22:10:52 +00:00
logger logrus . FieldLogger ,
2018-08-29 19:52:09 +00:00
backupLogLevel logrus . Level ,
2019-03-15 18:32:11 +00:00
newPluginManager func ( logrus . FieldLogger ) clientmgmt . Manager ,
2018-04-06 17:08:39 +00:00
backupTracker BackupTracker ,
2020-06-24 16:55:18 +00:00
kbClient kbclient . Client ,
2018-08-16 22:41:59 +00:00
defaultBackupLocation string ,
2020-06-15 22:26:44 +00:00
defaultVolumesToRestic bool ,
2019-04-04 21:59:13 +00:00
defaultBackupTTL time . Duration ,
2022-07-06 07:51:42 +00:00
defaultCSISnapshotTimeout time . Duration ,
2020-02-25 20:01:24 +00:00
volumeSnapshotLocationLister velerov1listers . VolumeSnapshotLocationLister ,
2018-10-26 15:32:46 +00:00
defaultSnapshotLocations map [ string ] string ,
2018-06-06 21:35:06 +00:00
metrics * metrics . ServerMetrics ,
2019-07-30 23:29:34 +00:00
formatFlag logging . Format ,
2022-03-31 14:47:22 +00:00
volumeSnapshotLister snapshotv1listers . VolumeSnapshotLister ,
2022-05-02 08:37:28 +00:00
volumeSnapshotClient * snapshotterClientSet . Clientset ,
2022-03-31 14:47:22 +00:00
volumeSnapshotContentLister snapshotv1listers . VolumeSnapshotContentLister ,
2022-03-31 10:09:35 +00:00
volumesnapshotClassLister snapshotv1listers . VolumeSnapshotClassLister ,
2021-02-08 18:04:08 +00:00
backupStoreGetter persistence . ObjectBackupStoreGetter ,
2017-08-02 17:27:17 +00:00
) Interface {
c := & backupController {
2020-10-06 17:58:56 +00:00
genericController : newGenericController ( Backup , logger ) ,
2020-03-12 17:32:58 +00:00
discoveryHelper : discoveryHelper ,
backupper : backupper ,
lister : backupInformer . Lister ( ) ,
client : client ,
clock : & clock . RealClock { } ,
backupLogLevel : backupLogLevel ,
newPluginManager : newPluginManager ,
backupTracker : backupTracker ,
2020-06-24 16:55:18 +00:00
kbClient : kbClient ,
2020-03-12 17:32:58 +00:00
defaultBackupLocation : defaultBackupLocation ,
2020-06-16 21:37:18 +00:00
defaultVolumesToRestic : defaultVolumesToRestic ,
2020-03-12 17:32:58 +00:00
defaultBackupTTL : defaultBackupTTL ,
2022-07-06 07:51:42 +00:00
defaultCSISnapshotTimeout : defaultCSISnapshotTimeout ,
2020-03-12 17:32:58 +00:00
snapshotLocationLister : volumeSnapshotLocationLister ,
defaultSnapshotLocations : defaultSnapshotLocations ,
metrics : metrics ,
formatFlag : formatFlag ,
volumeSnapshotLister : volumeSnapshotLister ,
2022-05-02 08:37:28 +00:00
volumeSnapshotClient : volumeSnapshotClient ,
2020-03-12 17:32:58 +00:00
volumeSnapshotContentLister : volumeSnapshotContentLister ,
2022-03-31 10:09:35 +00:00
volumeSnapshotClassLister : volumesnapshotClassLister ,
2021-02-08 18:04:08 +00:00
backupStoreGetter : backupStoreGetter ,
2017-08-02 17:27:17 +00:00
}
c . syncHandler = c . processBackup
2019-04-05 02:11:53 +00:00
c . resyncFunc = c . resync
c . resyncPeriod = time . Minute
2017-08-02 17:27:17 +00:00
backupInformer . Informer ( ) . AddEventHandler (
cache . ResourceEventHandlerFuncs {
AddFunc : func ( obj interface { } ) {
2019-01-25 03:33:07 +00:00
backup := obj . ( * velerov1api . Backup )
2017-08-02 17:27:17 +00:00
switch backup . Status . Phase {
2022-06-06 14:56:29 +00:00
case "" , velerov1api . BackupPhaseNew :
// only process new backups
2017-08-02 17:27:17 +00:00
default :
2017-09-14 21:27:31 +00:00
c . logger . WithFields ( logrus . Fields {
"backup" : kubeutil . NamespaceAndName ( backup ) ,
"phase" : backup . Status . Phase ,
2022-06-06 14:56:29 +00:00
} ) . Debug ( "Backup is not new, skipping" )
2017-08-02 17:27:17 +00:00
return
}
key , err := cache . MetaNamespaceKeyFunc ( backup )
if err != nil {
2020-10-06 17:58:56 +00:00
c . logger . WithError ( err ) . WithField ( Backup , backup ) . Error ( "Error creating queue key, item not added to queue" )
2017-08-02 17:27:17 +00:00
return
}
c . queue . Add ( key )
} ,
} ,
)
return c
}
2019-04-05 02:11:53 +00:00
func ( c * backupController ) resync ( ) {
2020-01-14 21:11:21 +00:00
// recompute backup_total metric
2019-04-05 02:11:53 +00:00
backups , err := c . lister . List ( labels . Everything ( ) )
if err != nil {
c . logger . Error ( err , "Error computing backup_total metric" )
} else {
c . metrics . SetBackupTotal ( int64 ( len ( backups ) ) )
}
2020-01-14 21:11:21 +00:00
// recompute backup_last_successful_timestamp metric for each
// schedule (including the empty schedule, i.e. ad-hoc backups)
2020-01-21 20:21:28 +00:00
for schedule , timestamp := range getLastSuccessBySchedule ( backups ) {
c . metrics . SetBackupLastSuccessfulTimestamp ( schedule , timestamp )
}
}
// getLastSuccessBySchedule finds the most recent completed backup for each schedule
// and returns a map of schedule name -> completion time of the most recent completed
// backup. This map includes an entry for ad-hoc/non-scheduled backups, where the key
// is the empty string.
func getLastSuccessBySchedule ( backups [ ] * velerov1api . Backup ) map [ string ] time . Time {
2020-01-14 21:11:21 +00:00
lastSuccessBySchedule := map [ string ] time . Time { }
for _ , backup := range backups {
if backup . Status . Phase != velerov1api . BackupPhaseCompleted {
continue
}
if backup . Status . CompletionTimestamp == nil {
continue
}
schedule := backup . Labels [ velerov1api . ScheduleNameLabel ]
timestamp := backup . Status . CompletionTimestamp . Time
if timestamp . After ( lastSuccessBySchedule [ schedule ] ) {
lastSuccessBySchedule [ schedule ] = timestamp
}
}
2020-01-21 20:21:28 +00:00
return lastSuccessBySchedule
2019-04-05 02:11:53 +00:00
}
2018-08-29 19:52:09 +00:00
func ( c * backupController ) processBackup ( key string ) error {
log := c . logger . WithField ( "key" , key )
2017-09-14 21:27:31 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running processBackup" )
2017-08-02 17:27:17 +00:00
ns , name , err := cache . SplitMetaNamespaceKey ( key )
if err != nil {
2019-04-23 23:19:00 +00:00
log . WithError ( err ) . Errorf ( "error splitting key" )
return nil
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Getting backup" )
2018-09-26 22:18:45 +00:00
original , err := c . lister . Backups ( ns ) . Get ( name )
2018-06-29 17:54:44 +00:00
if apierrors . IsNotFound ( err ) {
2019-04-23 23:19:00 +00:00
log . Debugf ( "backup %s not found" , name )
2018-06-29 17:54:44 +00:00
return nil
}
2017-08-02 17:27:17 +00:00
if err != nil {
2017-09-14 21:27:31 +00:00
return errors . Wrap ( err , "error getting backup" )
2017-08-02 17:27:17 +00:00
}
2017-12-19 17:16:39 +00:00
// Double-check we have the correct phase. In the unlikely event that multiple controller
// instances are running, it's possible for controller A to succeed in changing the phase to
// InProgress, while controller B's attempt to patch the phase fails. When controller B
// reprocesses the same backup, it will either show up as New (informer hasn't seen the update
// yet) or as InProgress. In the former case, the patch attempt will fail again, until the
// informer sees the update. In the latter case, after the informer has seen the update to
// InProgress, we still need this check so we can return nil to indicate we've finished processing
// this key (even though it was a no-op).
2018-09-26 22:18:45 +00:00
switch original . Status . Phase {
2019-01-25 03:33:07 +00:00
case "" , velerov1api . BackupPhaseNew :
2022-06-06 14:56:29 +00:00
// only process new backups
2017-08-02 17:27:17 +00:00
default :
return nil
}
2018-09-26 22:18:45 +00:00
log . Debug ( "Preparing backup request" )
request := c . prepareBackupRequest ( original )
if len ( request . Status . ValidationErrors ) > 0 {
2019-01-25 03:33:07 +00:00
request . Status . Phase = velerov1api . BackupPhaseFailedValidation
2017-08-02 17:27:17 +00:00
} else {
2019-01-25 03:33:07 +00:00
request . Status . Phase = velerov1api . BackupPhaseInProgress
2019-10-14 16:20:28 +00:00
request . Status . StartTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2017-08-02 17:27:17 +00:00
}
// update status
2018-09-26 22:18:45 +00:00
updatedBackup , err := patchBackup ( original , request . Backup , c . client )
2017-08-02 17:27:17 +00:00
if err != nil {
2018-09-26 22:18:45 +00:00
return errors . Wrapf ( err , "error updating Backup status to %s" , request . Status . Phase )
2017-08-02 17:27:17 +00:00
}
2022-04-14 12:05:55 +00:00
2017-12-11 22:10:52 +00:00
// store ref to just-updated item for creating patch
original = updatedBackup
2018-09-26 22:18:45 +00:00
request . Backup = updatedBackup . DeepCopy ( )
2017-08-02 17:27:17 +00:00
2019-01-25 03:33:07 +00:00
if request . Status . Phase == velerov1api . BackupPhaseFailedValidation {
2017-08-02 17:27:17 +00:00
return nil
}
2018-09-26 22:18:45 +00:00
c . backupTracker . Add ( request . Namespace , request . Name )
defer c . backupTracker . Delete ( request . Namespace , request . Name )
2018-04-06 17:08:39 +00:00
2018-08-29 19:52:09 +00:00
log . Debug ( "Running backup" )
2019-04-26 16:14:26 +00:00
2019-01-25 03:33:07 +00:00
backupScheduleName := request . GetLabels ( ) [ velerov1api . ScheduleNameLabel ]
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupAttempt ( backupScheduleName )
2018-06-06 21:35:06 +00:00
2019-04-26 16:14:26 +00:00
// execution & upload of backup
2018-09-26 22:18:45 +00:00
if err := c . runBackup ( request ) ; err != nil {
2019-04-26 16:14:26 +00:00
// even though runBackup sets the backup's phase prior
// to uploading artifacts to object storage, we have to
// check for an error again here and update the phase if
// one is found, because there could've been an error
// while uploading artifacts to object storage, which would
// result in the backup being Failed.
2018-08-29 19:52:09 +00:00
log . WithError ( err ) . Error ( "backup failed" )
2019-01-25 03:33:07 +00:00
request . Status . Phase = velerov1api . BackupPhaseFailed
2022-04-14 12:05:55 +00:00
request . Status . FailureReason = err . Error ( )
2019-04-26 16:14:26 +00:00
}
switch request . Status . Phase {
case velerov1api . BackupPhaseCompleted :
2018-08-29 19:52:09 +00:00
c . metrics . RegisterBackupSuccess ( backupScheduleName )
2019-04-26 16:14:26 +00:00
case velerov1api . BackupPhasePartiallyFailed :
c . metrics . RegisterBackupPartialFailure ( backupScheduleName )
case velerov1api . BackupPhaseFailed :
c . metrics . RegisterBackupFailed ( backupScheduleName )
2020-07-16 17:13:17 +00:00
case velerov1api . BackupPhaseFailedValidation :
c . metrics . RegisterBackupValidationFailure ( backupScheduleName )
2017-08-02 17:27:17 +00:00
}
2018-08-29 19:52:09 +00:00
log . Debug ( "Updating backup's final status" )
2018-09-26 22:18:45 +00:00
if _ , err := patchBackup ( original , request . Backup , c . client ) ; err != nil {
2018-08-29 19:52:09 +00:00
log . WithError ( err ) . Error ( "error updating backup's final status" )
2017-08-02 17:27:17 +00:00
}
return nil
}
2019-01-25 03:33:07 +00:00
func patchBackup ( original , updated * velerov1api . Backup , client velerov1client . BackupsGetter ) ( * velerov1api . Backup , error ) {
2017-12-11 22:10:52 +00:00
origBytes , err := json . Marshal ( original )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling original backup" )
}
updatedBytes , err := json . Marshal ( updated )
if err != nil {
return nil , errors . Wrap ( err , "error marshalling updated backup" )
}
2018-05-14 21:34:24 +00:00
patchBytes , err := jsonpatch . CreateMergePatch ( origBytes , updatedBytes )
2017-12-11 22:10:52 +00:00
if err != nil {
2018-05-14 21:34:24 +00:00
return nil , errors . Wrap ( err , "error creating json merge patch for backup" )
2017-12-11 22:10:52 +00:00
}
k8s 1.18 import (#2651)
* k8s 1.18 import wip
backup, cmd, controller, generated, restic, restore, serverstatusrequest, test and util
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go mod tidy
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* add changelog file
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* go fmt
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* update code-generator and controller-gen in CI
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* checkout proper code-generator version, regen
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix remaining calls
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* regenerate CRDs with ./hack/update-generated-crd-code.sh
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use existing context in restic and server
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* fix test cases by resetting resource version
also use main library go context, not golang.org/x/net/context, in pkg/restore/restore.go
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* clarify changelog message
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* use github.com/kubernetes-csi/external-snapshotter/v2@v2.2.0-rc1
Signed-off-by: Andrew Lavery <laverya@umich.edu>
* run 'go mod tidy' to remove old external-snapshotter version
Signed-off-by: Andrew Lavery <laverya@umich.edu>
2020-07-16 16:21:37 +00:00
res , err := client . Backups ( original . Namespace ) . Patch ( context . TODO ( ) , original . Name , types . MergePatchType , patchBytes , metav1 . PatchOptions { } )
2017-12-11 22:10:52 +00:00
if err != nil {
return nil , errors . Wrap ( err , "error patching backup" )
}
return res , nil
}
2019-01-25 03:33:07 +00:00
func ( c * backupController ) prepareBackupRequest ( backup * velerov1api . Backup ) * pkgbackup . Request {
2018-09-26 22:18:45 +00:00
request := & pkgbackup . Request {
Backup : backup . DeepCopy ( ) , // don't modify items in the cache
2017-08-02 17:27:17 +00:00
}
2020-05-01 19:54:57 +00:00
// set backup major version - deprecated, use Status.FormatVersion
2018-12-05 22:56:53 +00:00
request . Status . Version = pkgbackup . BackupVersion
2018-09-26 22:18:45 +00:00
2020-05-01 19:54:57 +00:00
// set backup major, minor, and patch version
request . Status . FormatVersion = pkgbackup . BackupFormatVersion
2019-04-04 21:59:13 +00:00
if request . Spec . TTL . Duration == 0 {
// set default backup TTL
request . Spec . TTL . Duration = c . defaultBackupTTL
2017-08-02 17:27:17 +00:00
}
2022-07-06 07:51:42 +00:00
if request . Spec . CSISnapshotTimeout . Duration == 0 {
// set default CSI VolumeSnapshot timeout
request . Spec . CSISnapshotTimeout . Duration = c . defaultCSISnapshotTimeout
}
2019-04-04 21:59:13 +00:00
// calculate expiration
2019-10-14 16:20:28 +00:00
request . Status . Expiration = & metav1 . Time { Time : c . clock . Now ( ) . Add ( request . Spec . TTL . Duration ) }
2019-04-04 21:59:13 +00:00
2020-12-09 17:38:31 +00:00
if request . Spec . DefaultVolumesToRestic == nil {
request . Spec . DefaultVolumesToRestic = & c . defaultVolumesToRestic
}
// find which storage location to use
var serverSpecified bool
2018-09-26 22:18:45 +00:00
if request . Spec . StorageLocation == "" {
2020-12-09 17:38:31 +00:00
// when the user doesn't specify a location, use the server default unless there is an existing BSL marked as default
// TODO(2.0) c.defaultBackupLocation will be deprecated
2018-09-26 22:18:45 +00:00
request . Spec . StorageLocation = c . defaultBackupLocation
2020-12-08 21:38:29 +00:00
locationList , err := storage . ListBackupStorageLocations ( context . Background ( ) , c . kbClient , request . Namespace )
if err == nil {
for _ , location := range locationList . Items {
if location . Spec . Default {
request . Spec . StorageLocation = location . Name
break
}
}
}
2020-12-09 17:38:31 +00:00
serverSpecified = true
2018-08-16 22:41:59 +00:00
}
2020-12-09 17:38:31 +00:00
// get the storage location, and store the BackupStorageLocation API obj on the request
2020-06-24 16:55:18 +00:00
storageLocation := & velerov1api . BackupStorageLocation { }
if err := c . kbClient . Get ( context . Background ( ) , kbclient . ObjectKey {
Namespace : request . Namespace ,
Name : request . Spec . StorageLocation ,
} , storageLocation ) ; err != nil {
2018-10-24 00:38:08 +00:00
if apierrors . IsNotFound ( err ) {
2020-12-09 17:38:31 +00:00
if serverSpecified {
// TODO(2.0) remove this. For now, without mentioning "server default" it could be confusing trying to grasp where the default came from.
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "an existing backup storage location wasn't specified at backup creation time and the server default '%s' doesn't exist. Please address this issue (see `velero backup-location -h` for options) and create a new backup. Error: %v" , request . Spec . StorageLocation , err ) )
} else {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "an existing backup storage location wasn't specified at backup creation time and the default '%s' wasn't found. Please address this issue (see `velero backup-location -h` for options) and create a new backup. Error: %v" , request . Spec . StorageLocation , err ) )
}
2018-10-24 00:38:08 +00:00
} else {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "error getting backup storage location: %v" , err ) )
}
2018-09-26 22:18:45 +00:00
} else {
request . StorageLocation = storageLocation
2019-05-29 18:21:25 +00:00
if request . StorageLocation . Spec . AccessMode == velerov1api . BackupStorageLocationAccessModeReadOnly {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors ,
fmt . Sprintf ( "backup can't be created because backup storage location %s is currently in read-only mode" , request . StorageLocation . Name ) )
}
2018-09-26 22:18:45 +00:00
}
2020-12-09 17:38:31 +00:00
// add the storage location as a label for easy filtering later.
if request . Labels == nil {
request . Labels = make ( map [ string ] string )
}
request . Labels [ velerov1api . StorageLocationLabel ] = label . GetValidName ( request . Spec . StorageLocation )
2018-09-26 22:18:45 +00:00
// validate and get the backup's VolumeSnapshotLocations, and store the
// VolumeSnapshotLocation API objs on the request
if locs , errs := c . validateAndGetSnapshotLocations ( request . Backup ) ; len ( errs ) > 0 {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , errs ... )
} else {
request . Spec . VolumeSnapshotLocations = nil
for _ , loc := range locs {
request . Spec . VolumeSnapshotLocations = append ( request . Spec . VolumeSnapshotLocations , loc . Name )
request . SnapshotLocations = append ( request . SnapshotLocations , loc )
}
}
2020-12-09 17:38:31 +00:00
// Getting all information of cluster version - useful for future skip-level migration
if request . Annotations == nil {
request . Annotations = make ( map [ string ] string )
}
request . Annotations [ velerov1api . SourceClusterK8sGitVersionAnnotation ] = c . discoveryHelper . ServerVersion ( ) . String ( )
request . Annotations [ velerov1api . SourceClusterK8sMajorVersionAnnotation ] = c . discoveryHelper . ServerVersion ( ) . Major
request . Annotations [ velerov1api . SourceClusterK8sMinorVersionAnnotation ] = c . discoveryHelper . ServerVersion ( ) . Minor
2022-08-03 12:00:49 +00:00
// Add namespaces with label velero.io/exclude-from-backup=true into request.Spec.ExcludedNamespaces
// Essentially, adding the label velero.io/exclude-from-backup=true to a namespace would be equivalent to setting spec.ExcludedNamespaces
2022-08-08 04:05:08 +00:00
namespaces := corev1api . NamespaceList { }
if err := c . kbClient . List ( context . Background ( ) , & namespaces , kbclient . MatchingLabels { "velero.io/exclude-from-backup" : "true" } ) ; err == nil {
2022-08-03 12:00:49 +00:00
for _ , ns := range namespaces . Items {
request . Spec . ExcludedNamespaces = append ( request . Spec . ExcludedNamespaces , ns . Name )
}
2022-08-08 04:05:08 +00:00
} else {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "error getting namespace list: %v" , err ) )
2022-08-03 12:00:49 +00:00
}
2020-12-09 17:38:31 +00:00
// validate the included/excluded resources
for _ , err := range collections . ValidateIncludesExcludes ( request . Spec . IncludedResources , request . Spec . ExcludedResources ) {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded resource lists: %v" , err ) )
}
// validate the included/excluded namespaces
2021-09-03 15:03:35 +00:00
for _ , err := range collections . ValidateNamespaceIncludesExcludes ( request . Spec . IncludedNamespaces , request . Spec . ExcludedNamespaces ) {
2020-12-09 17:38:31 +00:00
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "Invalid included/excluded namespace lists: %v" , err ) )
}
2022-02-15 14:52:47 +00:00
// validate that only one exists orLabelSelector or just labelSelector (singular)
if request . Spec . OrLabelSelectors != nil && request . Spec . LabelSelector != nil {
request . Status . ValidationErrors = append ( request . Status . ValidationErrors , fmt . Sprintf ( "encountered labelSelector as well as orLabelSelectors in backup spec, only one can be specified" ) )
}
2018-09-26 22:18:45 +00:00
return request
2017-08-02 17:27:17 +00:00
}
2018-09-26 22:18:45 +00:00
// validateAndGetSnapshotLocations gets a collection of VolumeSnapshotLocation objects that
// this backup will use (returned as a map of provider name -> VSL), and ensures:
// - each location name in .spec.volumeSnapshotLocations exists as a location
// - exactly 1 location per provider
// - a given provider's default location name is added to .spec.volumeSnapshotLocations if one
2018-10-26 15:32:46 +00:00
// is not explicitly specified for the provider (if there's only one location for the provider,
// it will automatically be used)
2020-04-24 16:46:20 +00:00
// if backup has snapshotVolume disabled then it returns empty VSL
2019-01-25 03:33:07 +00:00
func ( c * backupController ) validateAndGetSnapshotLocations ( backup * velerov1api . Backup ) ( map [ string ] * velerov1api . VolumeSnapshotLocation , [ ] string ) {
2018-09-26 22:18:45 +00:00
errors := [ ] string { }
2019-01-25 03:33:07 +00:00
providerLocations := make ( map [ string ] * velerov1api . VolumeSnapshotLocation )
2018-09-26 22:18:45 +00:00
2020-04-24 16:46:20 +00:00
// if snapshotVolume is set to false then we don't need to validate volumesnapshotlocation
if boolptr . IsSetToFalse ( backup . Spec . SnapshotVolumes ) {
return nil , nil
}
2018-09-26 22:18:45 +00:00
for _ , locationName := range backup . Spec . VolumeSnapshotLocations {
2018-09-25 14:51:28 +00:00
// validate each locationName exists as a VolumeSnapshotLocation
2018-09-26 22:18:45 +00:00
location , err := c . snapshotLocationLister . VolumeSnapshotLocations ( backup . Namespace ) . Get ( locationName )
2018-09-25 14:51:28 +00:00
if err != nil {
2018-10-24 00:38:08 +00:00
if apierrors . IsNotFound ( err ) {
errors = append ( errors , fmt . Sprintf ( "a VolumeSnapshotLocation CRD for the location %s with the name specified in the backup spec needs to be created before this snapshot can be executed. Error: %v" , locationName , err ) )
} else {
errors = append ( errors , fmt . Sprintf ( "error getting volume snapshot location named %s: %v" , locationName , err ) )
}
2018-09-25 14:51:28 +00:00
continue
}
2018-09-26 22:18:45 +00:00
// ensure we end up with exactly 1 location *per provider*
if providerLocation , ok := providerLocations [ location . Spec . Provider ] ; ok {
2018-09-25 14:51:28 +00:00
// if > 1 location name per provider as in ["aws-us-east-1" | "aws-us-west-1"] (same provider, multiple names)
2018-09-26 22:18:45 +00:00
if providerLocation . Name != locationName {
errors = append ( errors , fmt . Sprintf ( "more than one VolumeSnapshotLocation name specified for provider %s: %s; unexpected name was %s" , location . Spec . Provider , locationName , providerLocation . Name ) )
2018-09-25 14:51:28 +00:00
continue
}
} else {
// keep track of all valid existing locations, per provider
2018-09-26 22:18:45 +00:00
providerLocations [ location . Spec . Provider ] = location
2018-09-25 14:51:28 +00:00
}
}
if len ( errors ) > 0 {
2018-09-26 22:18:45 +00:00
return nil , errors
2018-09-25 14:51:28 +00:00
}
2018-10-26 15:32:46 +00:00
allLocations , err := c . snapshotLocationLister . VolumeSnapshotLocations ( backup . Namespace ) . List ( labels . Everything ( ) )
if err != nil {
errors = append ( errors , fmt . Sprintf ( "error listing volume snapshot locations: %v" , err ) )
return nil , errors
}
// build a map of provider->list of all locations for the provider
2019-01-25 03:33:07 +00:00
allProviderLocations := make ( map [ string ] [ ] * velerov1api . VolumeSnapshotLocation )
2018-10-26 15:32:46 +00:00
for i := range allLocations {
loc := allLocations [ i ]
allProviderLocations [ loc . Spec . Provider ] = append ( allProviderLocations [ loc . Spec . Provider ] , loc )
}
// go through each provider and make sure we have/can get a VSL
// for it
for provider , locations := range allProviderLocations {
if _ , ok := providerLocations [ provider ] ; ok {
// backup's spec had a location named for this provider
continue
}
if len ( locations ) > 1 {
// more than one possible location for the provider: check
// the defaults
defaultLocation := c . defaultSnapshotLocations [ provider ]
if defaultLocation == "" {
errors = append ( errors , fmt . Sprintf ( "provider %s has more than one possible volume snapshot location, and none were specified explicitly or as a default" , provider ) )
continue
}
location , err := c . snapshotLocationLister . VolumeSnapshotLocations ( backup . Namespace ) . Get ( defaultLocation )
if err != nil {
errors = append ( errors , fmt . Sprintf ( "error getting volume snapshot location named %s: %v" , defaultLocation , err ) )
continue
}
providerLocations [ provider ] = location
continue
2018-09-25 14:51:28 +00:00
}
2018-10-26 15:32:46 +00:00
// exactly one location for the provider: use it
providerLocations [ provider ] = locations [ 0 ]
}
if len ( errors ) > 0 {
return nil , errors
2018-09-25 14:51:28 +00:00
}
2018-09-26 22:18:45 +00:00
return providerLocations , nil
2018-09-25 14:51:28 +00:00
}
2019-04-26 16:14:26 +00:00
// runBackup runs and uploads a validated backup. Any error returned from this function
// causes the backup to be Failed; if no error is returned, the backup's status's Errors
// field is checked to see if the backup was a partial failure.
2018-09-26 22:18:45 +00:00
func ( c * backupController ) runBackup ( backup * pkgbackup . Request ) error {
2020-10-06 17:58:56 +00:00
c . logger . WithField ( Backup , kubeutil . NamespaceAndName ( backup ) ) . Info ( "Setting up backup log" )
2017-08-11 15:05:56 +00:00
logFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
2017-12-19 17:16:39 +00:00
return errors . Wrap ( err , "error creating temp file for backup log" )
2017-08-11 15:05:56 +00:00
}
2018-05-13 13:28:09 +00:00
gzippedLogFile := gzip . NewWriter ( logFile )
// Assuming we successfully uploaded the log file, this will have already been closed below. It is safe to call
// close multiple times. If we get an error closing this, there's not really anything we can do about it.
defer gzippedLogFile . Close ( )
2020-11-02 21:39:32 +00:00
defer closeAndRemoveFile ( logFile , c . logger . WithField ( Backup , kubeutil . NamespaceAndName ( backup ) ) )
2018-05-13 13:28:09 +00:00
// Log the backup to both a backup log file and to stdout. This will help see what happened if the upload of the
// backup log failed for whatever reason.
2019-07-30 23:29:34 +00:00
logger := logging . DefaultLogger ( c . backupLogLevel , c . formatFlag )
2018-05-13 13:28:09 +00:00
logger . Out = io . MultiWriter ( os . Stdout , gzippedLogFile )
2019-04-26 16:14:26 +00:00
logCounter := logging . NewLogCounterHook ( )
logger . Hooks . Add ( logCounter )
2018-05-13 13:28:09 +00:00
2020-10-06 17:58:56 +00:00
backupLog := logger . WithField ( Backup , kubeutil . NamespaceAndName ( backup ) )
2019-04-26 16:14:26 +00:00
backupLog . Info ( "Setting up backup temp file" )
2017-12-19 17:16:39 +00:00
backupFile , err := ioutil . TempFile ( "" , "" )
if err != nil {
return errors . Wrap ( err , "error creating temp file for backup" )
}
2019-04-26 16:14:26 +00:00
defer closeAndRemoveFile ( backupFile , backupLog )
2017-08-02 17:27:17 +00:00
2019-04-26 16:14:26 +00:00
backupLog . Info ( "Setting up plugin manager" )
pluginManager := c . newPluginManager ( backupLog )
2018-08-20 23:29:54 +00:00
defer pluginManager . CleanupClients ( )
2019-04-26 16:14:26 +00:00
backupLog . Info ( "Getting backup item actions" )
2018-05-13 13:28:09 +00:00
actions , err := pluginManager . GetBackupItemActions ( )
if err != nil {
return err
}
2021-12-10 17:53:47 +00:00
itemSnapshotters , err := pluginManager . GetItemSnapshotters ( )
if err != nil {
return err
}
2018-05-13 13:28:09 +00:00
2020-05-27 23:03:52 +00:00
backupLog . Info ( "Setting up backup store to check for backup existence" )
2021-02-08 18:04:08 +00:00
backupStore , err := c . backupStoreGetter . Get ( backup . StorageLocation , pluginManager , backupLog )
2017-11-15 02:35:02 +00:00
if err != nil {
return err
}
2019-04-24 17:54:43 +00:00
exists , err := backupStore . BackupExists ( backup . StorageLocation . Spec . StorageType . ObjectStorage . Bucket , backup . Name )
if exists || err != nil {
2019-04-23 23:19:00 +00:00
backup . Status . Phase = velerov1api . BackupPhaseFailed
2019-10-14 16:20:28 +00:00
backup . Status . CompletionTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2019-04-24 17:54:43 +00:00
if err != nil {
2019-04-24 21:18:24 +00:00
return errors . Wrapf ( err , "error checking if backup already exists in object storage" )
2019-04-24 17:54:43 +00:00
}
2019-04-24 21:18:24 +00:00
return errors . Errorf ( "backup already exists in object storage" )
2019-04-23 23:19:00 +00:00
}
2017-11-15 02:35:02 +00:00
2021-12-10 17:53:47 +00:00
backupItemActionsResolver := framework . NewBackupItemActionResolver ( actions )
itemSnapshottersResolver := framework . NewItemSnapshotterResolver ( itemSnapshotters )
2019-04-26 16:14:26 +00:00
var fatalErrs [ ] error
2021-12-10 17:53:47 +00:00
if err := c . backupper . BackupWithResolvers ( backupLog , backup , backupFile , backupItemActionsResolver ,
itemSnapshottersResolver , pluginManager ) ; err != nil {
2019-04-26 16:14:26 +00:00
fatalErrs = append ( fatalErrs , err )
2018-10-12 17:55:02 +00:00
}
2020-03-12 21:01:14 +00:00
// Empty slices here so that they can be passed in to the persistBackup call later, regardless of whether or not CSI's enabled.
// This way, we only make the Lister call if the feature flag's on.
2022-03-31 14:47:22 +00:00
var volumeSnapshots [ ] * snapshotv1api . VolumeSnapshot
var volumeSnapshotContents [ ] * snapshotv1api . VolumeSnapshotContent
2022-03-31 10:09:35 +00:00
var volumeSnapshotClasses [ ] * snapshotv1api . VolumeSnapshotClass
2020-04-16 19:36:43 +00:00
if features . IsEnabled ( velerov1api . CSIFeatureFlag ) {
2020-05-07 18:56:13 +00:00
selector := label . NewSelectorForBackup ( backup . Name )
2020-04-16 19:28:29 +00:00
// Listers are wrapped in a nil check out of caution, since they may not be populated based on the
// EnableCSI feature flag. This is more to guard against programmer error, as they shouldn't be nil
// when EnableCSI is on.
if c . volumeSnapshotLister != nil {
volumeSnapshots , err = c . volumeSnapshotLister . List ( selector )
if err != nil {
backupLog . Error ( err )
}
2022-04-20 15:51:21 +00:00
2022-07-06 07:51:42 +00:00
err = c . checkVolumeSnapshotReadyToUse ( context . Background ( ) , volumeSnapshots , backup . Spec . CSISnapshotTimeout . Duration )
2022-05-02 08:37:28 +00:00
if err != nil {
backupLog . Errorf ( "fail to wait VolumeSnapshot change to Ready: %s" , err . Error ( ) )
}
2022-04-20 15:51:21 +00:00
backup . CSISnapshots = volumeSnapshots
2020-03-12 21:01:14 +00:00
}
2020-04-16 19:28:29 +00:00
if c . volumeSnapshotContentLister != nil {
2020-05-06 18:04:34 +00:00
volumeSnapshotContents , err = c . volumeSnapshotContentLister . List ( selector )
if err != nil {
backupLog . Error ( err )
2020-04-16 19:28:29 +00:00
}
2020-03-12 21:01:14 +00:00
}
2022-03-31 10:09:35 +00:00
vsClassSet := sets . NewString ( )
for _ , vsc := range volumeSnapshotContents {
// persist the volumesnapshotclasses referenced by vsc
if c . volumeSnapshotClassLister != nil &&
vsc . Spec . VolumeSnapshotClassName != nil &&
! vsClassSet . Has ( * vsc . Spec . VolumeSnapshotClassName ) {
if vsClass , err := c . volumeSnapshotClassLister . Get ( * vsc . Spec . VolumeSnapshotClassName ) ; err != nil {
backupLog . Error ( err )
} else {
vsClassSet . Insert ( * vsc . Spec . VolumeSnapshotClassName )
volumeSnapshotClasses = append ( volumeSnapshotClasses , vsClass )
}
}
if err := csi . ResetVolumeSnapshotContent ( vsc ) ; err != nil {
backupLog . Error ( err )
}
}
2022-04-21 09:17:28 +00:00
// Delete the VolumeSnapshots created in the backup, when CSI feature is enabled.
c . deleteVolumeSnapshot ( volumeSnapshots , volumeSnapshotContents , * backup , backupLog )
2020-03-12 21:01:14 +00:00
}
2018-07-10 22:17:53 +00:00
// Mark completion timestamp before serializing and uploading.
// Otherwise, the JSON file in object storage has a CompletionTimestamp of 'null'.
2019-10-14 16:20:28 +00:00
backup . Status . CompletionTimestamp = & metav1 . Time { Time : c . clock . Now ( ) }
2018-07-10 22:17:53 +00:00
2018-10-22 16:37:30 +00:00
backup . Status . VolumeSnapshotsAttempted = len ( backup . VolumeSnapshots )
for _ , snap := range backup . VolumeSnapshots {
if snap . Status . Phase == volume . SnapshotPhaseCompleted {
backup . Status . VolumeSnapshotsCompleted ++
}
}
2022-04-09 04:45:06 +00:00
backup . Status . CSIVolumeSnapshotsAttempted = len ( backup . CSISnapshots )
for _ , vs := range backup . CSISnapshots {
2022-05-02 21:23:06 +00:00
if vs . Status != nil && boolptr . IsSetToTrue ( vs . Status . ReadyToUse ) {
2022-04-09 04:45:06 +00:00
backup . Status . CSIVolumeSnapshotsCompleted ++
2022-04-08 16:57:28 +00:00
}
}
2021-10-28 17:19:05 +00:00
backup . Status . Warnings = logCounter . GetCount ( logrus . WarnLevel )
backup . Status . Errors = logCounter . GetCount ( logrus . ErrorLevel )
2019-04-26 16:14:26 +00:00
recordBackupMetrics ( backupLog , backup . Backup , backupFile , c . metrics )
if err := gzippedLogFile . Close ( ) ; err != nil {
2020-11-02 21:39:32 +00:00
c . logger . WithField ( Backup , kubeutil . NamespaceAndName ( backup ) ) . WithError ( err ) . Error ( "error closing gzippedLogFile" )
2019-04-26 16:14:26 +00:00
}
// Assign finalize phase as close to end as possible so that any errors
// logged to backupLog are captured. This is done before uploading the
// artifacts to object storage so that the JSON representation of the
// backup in object storage has the terminal phase set.
switch {
case len ( fatalErrs ) > 0 :
backup . Status . Phase = velerov1api . BackupPhaseFailed
case logCounter . GetCount ( logrus . ErrorLevel ) > 0 :
backup . Status . Phase = velerov1api . BackupPhasePartiallyFailed
default :
backup . Status . Phase = velerov1api . BackupPhaseCompleted
}
2020-05-27 23:03:52 +00:00
// re-instantiate the backup store because credentials could have changed since the original
// instantiation, if this was a long-running backup
backupLog . Info ( "Setting up backup store to persist the backup" )
2021-02-08 18:04:08 +00:00
backupStore , err = c . backupStoreGetter . Get ( backup . StorageLocation , pluginManager , backupLog )
2020-05-27 23:03:52 +00:00
if err != nil {
return err
}
2022-03-31 10:09:35 +00:00
if errs := persistBackup ( backup , backupFile , logFile , backupStore , c . logger . WithField ( Backup , kubeutil . NamespaceAndName ( backup ) ) , volumeSnapshots , volumeSnapshotContents , volumeSnapshotClasses ) ; len ( errs ) > 0 {
2019-04-26 16:14:26 +00:00
fatalErrs = append ( fatalErrs , errs ... )
}
2018-10-12 17:55:02 +00:00
2020-11-02 21:39:32 +00:00
c . logger . WithField ( Backup , kubeutil . NamespaceAndName ( backup ) ) . Info ( "Backup completed" )
2018-10-12 17:55:02 +00:00
2019-04-26 16:14:26 +00:00
// if we return a non-nil error, the calling function will update
// the backup's phase to Failed.
return kerrors . NewAggregate ( fatalErrs )
2018-10-12 17:55:02 +00:00
}
2019-04-26 16:14:26 +00:00
func recordBackupMetrics ( log logrus . FieldLogger , backup * velerov1api . Backup , backupFile * os . File , serverMetrics * metrics . ServerMetrics ) {
2019-01-25 03:33:07 +00:00
backupScheduleName := backup . GetLabels ( ) [ velerov1api . ScheduleNameLabel ]
2017-08-02 17:27:17 +00:00
2018-06-06 21:35:06 +00:00
var backupSizeBytes int64
if backupFileStat , err := backupFile . Stat ( ) ; err != nil {
2019-04-26 16:14:26 +00:00
log . WithError ( errors . WithStack ( err ) ) . Error ( "Error getting backup file info" )
2018-06-06 21:35:06 +00:00
} else {
backupSizeBytes = backupFileStat . Size ( )
}
2018-10-12 17:55:02 +00:00
serverMetrics . SetBackupTarballSizeBytesGauge ( backupScheduleName , backupSizeBytes )
2018-06-06 21:35:06 +00:00
2018-10-12 17:55:02 +00:00
backupDuration := backup . Status . CompletionTimestamp . Time . Sub ( backup . Status . StartTimestamp . Time )
backupDurationSeconds := float64 ( backupDuration / time . Second )
serverMetrics . RegisterBackupDuration ( backupScheduleName , backupDurationSeconds )
2018-10-23 21:04:45 +00:00
serverMetrics . RegisterVolumeSnapshotAttempts ( backupScheduleName , backup . Status . VolumeSnapshotsAttempted )
serverMetrics . RegisterVolumeSnapshotSuccesses ( backupScheduleName , backup . Status . VolumeSnapshotsCompleted )
serverMetrics . RegisterVolumeSnapshotFailures ( backupScheduleName , backup . Status . VolumeSnapshotsAttempted - backup . Status . VolumeSnapshotsCompleted )
2022-04-08 16:57:28 +00:00
if features . IsEnabled ( velerov1api . CSIFeatureFlag ) {
2022-04-09 04:45:06 +00:00
serverMetrics . RegisterCSISnapshotAttempts ( backupScheduleName , backup . Name , backup . Status . CSIVolumeSnapshotsAttempted )
serverMetrics . RegisterCSISnapshotSuccesses ( backupScheduleName , backup . Name , backup . Status . CSIVolumeSnapshotsCompleted )
serverMetrics . RegisterCSISnapshotFailures ( backupScheduleName , backup . Name , backup . Status . CSIVolumeSnapshotsAttempted - backup . Status . CSIVolumeSnapshotsCompleted )
2022-04-08 16:57:28 +00:00
}
2021-10-28 17:19:05 +00:00
if backup . Status . Progress != nil {
serverMetrics . RegisterBackupItemsTotalGauge ( backupScheduleName , backup . Status . Progress . TotalItems )
}
serverMetrics . RegisterBackupItemsErrorsGauge ( backupScheduleName , backup . Status . Errors )
2018-10-12 17:55:02 +00:00
}
2020-03-12 21:01:14 +00:00
func persistBackup ( backup * pkgbackup . Request ,
backupContents , backupLog * os . File ,
backupStore persistence . BackupStore ,
log logrus . FieldLogger ,
2022-03-31 14:47:22 +00:00
csiVolumeSnapshots [ ] * snapshotv1api . VolumeSnapshot ,
csiVolumeSnapshotContents [ ] * snapshotv1api . VolumeSnapshotContent ,
2022-03-31 10:09:35 +00:00
csiVolumesnapshotClasses [ ] * snapshotv1api . VolumeSnapshotClass ,
2020-03-12 21:01:14 +00:00
) [ ] error {
2020-04-17 22:26:41 +00:00
persistErrs := [ ] error { }
2018-10-12 17:55:02 +00:00
backupJSON := new ( bytes . Buffer )
2020-04-20 16:33:21 +00:00
if err := encode . EncodeTo ( backup . Backup , "json" , backupJSON ) ; err != nil {
persistErrs = append ( persistErrs , errors . Wrap ( err , "error encoding backup" ) )
2017-08-02 17:27:17 +00:00
}
2020-04-13 17:38:11 +00:00
// Velero-native volume snapshots (as opposed to CSI ones)
2020-04-17 22:26:41 +00:00
nativeVolumeSnapshots , errs := encodeToJSONGzip ( backup . VolumeSnapshots , "native volumesnapshots list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2018-10-15 20:22:00 +00:00
}
2018-06-20 18:08:07 +00:00
2020-04-17 22:26:41 +00:00
podVolumeBackups , errs := encodeToJSONGzip ( backup . PodVolumeBackups , "pod volume backups list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2019-07-24 19:51:20 +00:00
}
2020-04-17 22:26:41 +00:00
csiSnapshotJSON , errs := encodeToJSONGzip ( csiVolumeSnapshots , "csi volume snapshots list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2020-03-12 21:01:14 +00:00
}
2020-04-17 22:26:41 +00:00
csiSnapshotContentsJSON , errs := encodeToJSONGzip ( csiVolumeSnapshotContents , "csi volume snapshot contents list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2020-03-12 21:01:14 +00:00
}
2022-03-31 10:09:35 +00:00
csiSnapshotClassesJSON , errs := encodeToJSONGzip ( csiVolumesnapshotClasses , "csi volume snapshot classes list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
}
2020-03-12 21:01:14 +00:00
2020-04-17 22:26:41 +00:00
backupResourceList , errs := encodeToJSONGzip ( backup . BackupResourceList ( ) , "backup resources list" )
if errs != nil {
persistErrs = append ( persistErrs , errs ... )
2019-08-05 17:15:55 +00:00
}
2020-04-17 22:26:41 +00:00
if len ( persistErrs ) > 0 {
2018-10-12 17:55:02 +00:00
// Don't upload the JSON files or backup tarball if encoding to json fails.
backupJSON = nil
backupContents = nil
2020-03-12 21:01:14 +00:00
nativeVolumeSnapshots = nil
2019-08-05 17:15:55 +00:00
backupResourceList = nil
2020-03-12 21:01:14 +00:00
csiSnapshotJSON = nil
2020-04-17 20:05:29 +00:00
csiSnapshotContentsJSON = nil
2022-03-31 10:09:35 +00:00
csiSnapshotClassesJSON = nil
2018-10-12 17:55:02 +00:00
}
2017-12-19 17:16:39 +00:00
2019-07-24 19:51:20 +00:00
backupInfo := persistence . BackupInfo {
2020-04-17 20:05:29 +00:00
Name : backup . Name ,
Metadata : backupJSON ,
Contents : backupContents ,
Log : backupLog ,
PodVolumeBackups : podVolumeBackups ,
VolumeSnapshots : nativeVolumeSnapshots ,
BackupResourceList : backupResourceList ,
CSIVolumeSnapshots : csiSnapshotJSON ,
CSIVolumeSnapshotContents : csiSnapshotContentsJSON ,
2022-03-31 10:09:35 +00:00
CSIVolumeSnapshotClasses : csiSnapshotClassesJSON ,
2019-07-24 19:51:20 +00:00
}
if err := backupStore . PutBackup ( backupInfo ) ; err != nil {
2020-04-17 22:26:41 +00:00
persistErrs = append ( persistErrs , err )
2018-10-12 17:55:02 +00:00
}
2020-04-17 22:26:41 +00:00
return persistErrs
2017-12-19 17:16:39 +00:00
}
func closeAndRemoveFile ( file * os . File , log logrus . FieldLogger ) {
2020-11-30 18:58:34 +00:00
if file == nil {
log . Debug ( "Skipping removal of file due to nil file pointer" )
return
}
2017-12-19 17:16:39 +00:00
if err := file . Close ( ) ; err != nil {
log . WithError ( err ) . WithField ( "file" , file . Name ( ) ) . Error ( "error closing file" )
2017-08-11 15:05:56 +00:00
}
2017-12-19 17:16:39 +00:00
if err := os . Remove ( file . Name ( ) ) ; err != nil {
log . WithError ( err ) . WithField ( "file" , file . Name ( ) ) . Error ( "error removing file" )
2017-08-02 17:27:17 +00:00
}
}
2020-04-13 17:38:11 +00:00
2020-04-16 19:42:57 +00:00
// encodeToJSONGzip takes arbitrary Go data and encodes it to GZip compressed JSON in a buffer, as well as a description of the data to put into an error should encoding fail.
2020-04-17 22:26:41 +00:00
func encodeToJSONGzip ( data interface { } , desc string ) ( * bytes . Buffer , [ ] error ) {
2020-04-13 17:38:11 +00:00
buf := new ( bytes . Buffer )
gzw := gzip . NewWriter ( buf )
2020-04-17 22:26:41 +00:00
// Since both encoding and closing the gzip writer could fail separately and both errors are useful,
// collect both errors to report back.
errs := [ ] error { }
2020-04-13 17:38:11 +00:00
if err := json . NewEncoder ( gzw ) . Encode ( data ) ; err != nil {
2020-04-17 22:26:41 +00:00
errs = append ( errs , errors . Wrapf ( err , "error encoding %s" , desc ) )
2020-04-13 17:38:11 +00:00
}
if err := gzw . Close ( ) ; err != nil {
2020-04-17 22:26:41 +00:00
errs = append ( errs , errors . Wrapf ( err , "error closing gzip writer for %s" , desc ) )
}
if len ( errs ) > 0 {
return nil , errs
2020-04-13 17:38:11 +00:00
}
return buf , nil
}
2022-05-02 08:37:28 +00:00
// Waiting for VolumeSnapshot ReadyTosue to true is time consuming. Try to make the process parallel by
// using goroutine here instead of waiting in CSI plugin, because it's not easy to make BackupItemAction
// parallel by now. After BackupItemAction parallel is implemented, this logic should be moved to CSI plugin
// as https://github.com/vmware-tanzu/velero-plugin-for-csi/pull/100
2022-07-06 07:51:42 +00:00
func ( c * backupController ) checkVolumeSnapshotReadyToUse ( ctx context . Context , volumesnapshots [ ] * snapshotv1api . VolumeSnapshot ,
csiSnapshotTimeout time . Duration ) error {
2022-05-02 08:37:28 +00:00
eg , _ := errgroup . WithContext ( ctx )
2022-07-06 07:51:42 +00:00
timeout := csiSnapshotTimeout
2022-05-02 08:37:28 +00:00
interval := 5 * time . Second
for _ , vs := range volumesnapshots {
volumeSnapshot := vs
eg . Go ( func ( ) error {
err := wait . PollImmediate ( interval , timeout , func ( ) ( bool , error ) {
tmpVS , err := c . volumeSnapshotClient . SnapshotV1 ( ) . VolumeSnapshots ( volumeSnapshot . Namespace ) . Get ( ctx , volumeSnapshot . Name , metav1 . GetOptions { } )
if err != nil {
return false , errors . Wrapf ( err , fmt . Sprintf ( "failed to get volumesnapshot %s/%s" , volumeSnapshot . Namespace , volumeSnapshot . Name ) )
}
if tmpVS . Status == nil || tmpVS . Status . BoundVolumeSnapshotContentName == nil || ! boolptr . IsSetToTrue ( tmpVS . Status . ReadyToUse ) {
log . Infof ( "Waiting for CSI driver to reconcile volumesnapshot %s/%s. Retrying in %ds" , volumeSnapshot . Namespace , volumeSnapshot . Name , interval / time . Second )
return false , nil
}
return true , nil
} )
if err == wait . ErrWaitTimeout {
log . Errorf ( "Timed out awaiting reconciliation of volumesnapshot %s/%s" , volumeSnapshot . Namespace , volumeSnapshot . Name )
}
return err
} )
}
return eg . Wait ( )
}
2022-04-21 09:17:28 +00:00
// deleteVolumeSnapshot delete VolumeSnapshot created during backup.
// This is used to avoid deleting namespace in cluster triggers the VolumeSnapshot deletion,
// which will cause snapshot deletion on cloud provider, then backup cannot restore the PV.
// If DeletionPolicy is Retain, just delete it. If DeletionPolicy is Delete, need to
// change DeletionPolicy to Retain before deleting VS, then change DeletionPolicy back to Delete.
func ( c * backupController ) deleteVolumeSnapshot ( volumeSnapshots [ ] * snapshotv1api . VolumeSnapshot ,
volumeSnapshotContents [ ] * snapshotv1api . VolumeSnapshotContent ,
backup pkgbackup . Request , logger logrus . FieldLogger ) {
var wg sync . WaitGroup
vscMap := make ( map [ string ] * snapshotv1api . VolumeSnapshotContent )
for _ , vsc := range volumeSnapshotContents {
vscMap [ vsc . Name ] = vsc
}
for _ , vs := range volumeSnapshots {
wg . Add ( 1 )
go func ( vs * snapshotv1api . VolumeSnapshot ) {
defer wg . Done ( )
var vsc * snapshotv1api . VolumeSnapshotContent
modifyVSCFlag := false
if vs . Status . BoundVolumeSnapshotContentName != nil &&
len ( * vs . Status . BoundVolumeSnapshotContentName ) > 0 {
vsc = vscMap [ * vs . Status . BoundVolumeSnapshotContentName ]
2022-08-16 01:28:57 +00:00
if nil == vsc {
logger . Errorf ( "Not find %s from the vscMap" , vs . Status . BoundVolumeSnapshotContentName )
return
}
2022-04-21 09:17:28 +00:00
if vsc . Spec . DeletionPolicy == snapshotv1api . VolumeSnapshotContentDelete {
modifyVSCFlag = true
}
}
// Change VolumeSnapshotContent's DeletionPolicy to Retain before deleting VolumeSnapshot,
// because VolumeSnapshotContent will be deleted by deleting VolumeSnapshot, when
// DeletionPolicy is set to Delete, but Velero needs VSC for cleaning snapshot on cloud
// in backup deletion.
if modifyVSCFlag {
logger . Debugf ( "Patching VolumeSnapshotContent %s" , vsc . Name )
2022-06-07 13:07:44 +00:00
original := vsc . DeepCopy ( )
vsc . Spec . DeletionPolicy = snapshotv1api . VolumeSnapshotContentRetain
if err := c . kbClient . Patch ( context . Background ( ) , vsc , kbclient . MergeFrom ( original ) ) ; err != nil {
2022-04-21 09:17:28 +00:00
logger . Errorf ( "fail to modify VolumeSnapshotContent %s DeletionPolicy to Retain: %s" , vsc . Name , err . Error ( ) )
return
}
defer func ( ) {
logger . Debugf ( "Start to recreate VolumeSnapshotContent %s" , vsc . Name )
err := c . recreateVolumeSnapshotContent ( vsc )
if err != nil {
logger . Errorf ( "fail to recreate VolumeSnapshotContent %s: %s" , vsc . Name , err . Error ( ) )
}
} ( )
}
// Delete VolumeSnapshot from cluster
logger . Debugf ( "Deleting VolumeSnapshotContent %s" , vsc . Name )
err := c . volumeSnapshotClient . SnapshotV1 ( ) . VolumeSnapshots ( vs . Namespace ) . Delete ( context . TODO ( ) , vs . Name , metav1 . DeleteOptions { } )
if err != nil {
logger . Errorf ( "fail to delete VolumeSnapshot %s/%s: %s" , vs . Namespace , vs . Name , err . Error ( ) )
}
} ( vs )
}
wg . Wait ( )
}
// recreateVolumeSnapshotContent will delete then re-create VolumeSnapshotContent,
// because some parameter in VolumeSnapshotContent Spec is immutable, e.g. VolumeSnapshotRef
// and Source. Source is updated to let csi-controller thinks the VSC is statically provsisioned with VS.
// Set VolumeSnapshotRef's UID to nil will let the csi-controller finds out the related VS is gone, then
// VSC can be deleted.
func ( c * backupController ) recreateVolumeSnapshotContent ( vsc * snapshotv1api . VolumeSnapshotContent ) error {
timeout := 1 * time . Minute
interval := 1 * time . Second
err := c . volumeSnapshotClient . SnapshotV1 ( ) . VolumeSnapshotContents ( ) . Delete ( context . TODO ( ) , vsc . Name , metav1 . DeleteOptions { } )
if err != nil {
return errors . Wrapf ( err , "fail to delete VolumeSnapshotContent: %s" , vsc . Name )
}
// Check VolumeSnapshotContents is already deleted, before re-creating it.
err = wait . PollImmediate ( interval , timeout , func ( ) ( bool , error ) {
_ , err := c . volumeSnapshotClient . SnapshotV1 ( ) . VolumeSnapshotContents ( ) . Get ( context . TODO ( ) , vsc . Name , metav1 . GetOptions { } )
if err != nil {
if apierrors . IsNotFound ( err ) {
return true , nil
}
return false , errors . Wrapf ( err , fmt . Sprintf ( "failed to get VolumeSnapshotContent %s" , vsc . Name ) )
}
return false , nil
} )
if err != nil {
return errors . Wrapf ( err , "fail to retrieve VolumeSnapshotContent %s info" , vsc . Name )
}
// Make the VolumeSnapshotContent static
vsc . Spec . Source = snapshotv1api . VolumeSnapshotContentSource {
SnapshotHandle : vsc . Status . SnapshotHandle ,
}
// Set VolumeSnapshotRef to none exist one, because VolumeSnapshotContent
// validation webhook will check whether name and namespace are nil.
// external-snapshotter needs Source pointing to snapshot and VolumeSnapshot
// reference's UID to nil to determine the VolumeSnapshotContent is deletable.
vsc . Spec . VolumeSnapshotRef = v1 . ObjectReference {
APIVersion : snapshotv1api . SchemeGroupVersion . String ( ) ,
Kind : "VolumeSnapshot" ,
Namespace : "ns-" + string ( vsc . UID ) ,
Name : "name-" + string ( vsc . UID ) ,
}
// ResourceVersion shouldn't exist for new creation.
vsc . ResourceVersion = ""
_ , err = c . volumeSnapshotClient . SnapshotV1 ( ) . VolumeSnapshotContents ( ) . Create ( context . TODO ( ) , vsc , metav1 . CreateOptions { } )
if err != nil {
return errors . Wrapf ( err , "fail to create VolumeSnapshotContent %s" , vsc . Name )
}
return nil
}