velero/pkg/podvolume/backupper.go

560 lines
19 KiB
Go

/*
Copyright 2018 the Velero contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podvolume
import (
"context"
"fmt"
"sync"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
corev1api "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/cache"
ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/internal/resourcepolicies"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
veleroclient "github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/label"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/podvolume/configs"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
uploaderutil "github.com/vmware-tanzu/velero/pkg/uploader/util"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
const (
indexNamePod = "POD"
pvbKeyPattern = "%s+%s+%s"
)
// Backupper can execute pod volume backups of volumes in a pod.
type Backupper interface {
// BackupPodVolumes backs up all specified volumes in a pod.
BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api.Pod, volumesToBackup []string, resPolicies *resourcepolicies.Policies, log logrus.FieldLogger) ([]*velerov1api.PodVolumeBackup, *PVCBackupSummary, []error)
WaitAllPodVolumesProcessed(log logrus.FieldLogger) []*velerov1api.PodVolumeBackup
GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1api.PodVolumeBackup, error)
ListPodVolumeBackupsByPod(podNamespace, podName string) ([]*velerov1api.PodVolumeBackup, error)
}
type backupper struct {
ctx context.Context
repoLocker *repository.RepoLocker
repoEnsurer *repository.Ensurer
crClient ctrlclient.Client
uploaderType string
pvbInformer ctrlcache.Informer
handlerRegistration cache.ResourceEventHandlerRegistration
wg sync.WaitGroup
// pvbIndexer holds all PVBs created by this backuper and is capable to search
// the PVBs based on specific properties quickly because of the embedded indexes.
// The statuses of the PVBs are got updated when Informer receives update events.
pvbIndexer cache.Indexer
}
type skippedPVC struct {
PVC *corev1api.PersistentVolumeClaim
Reason string
}
// PVCBackupSummary is a summary for which PVCs are skipped, which are backed up after each execution of the Backupper
// The scope should be within one pod, so the volume name is the key for the maps
type PVCBackupSummary struct {
Backedup map[string]*corev1api.PersistentVolumeClaim
Skipped map[string]*skippedPVC
pvcMap map[string]*corev1api.PersistentVolumeClaim
}
func NewPVCBackupSummary() *PVCBackupSummary {
return &PVCBackupSummary{
Backedup: make(map[string]*corev1api.PersistentVolumeClaim),
Skipped: make(map[string]*skippedPVC),
pvcMap: make(map[string]*corev1api.PersistentVolumeClaim),
}
}
func (pbs *PVCBackupSummary) addBackedup(volumeName string) {
if pvc, ok := pbs.pvcMap[volumeName]; ok {
pbs.Backedup[volumeName] = pvc
delete(pbs.Skipped, volumeName)
}
}
func (pbs *PVCBackupSummary) addSkipped(volumeName string, reason string) {
if pvc, ok := pbs.pvcMap[volumeName]; ok {
if _, ok2 := pbs.Backedup[volumeName]; !ok2 { // if it's not backed up, add it to skipped
pbs.Skipped[volumeName] = &skippedPVC{
PVC: pvc,
Reason: reason,
}
}
}
}
func podIndexFunc(obj any) ([]string, error) {
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
return nil, errors.Errorf("expected PodVolumeBackup, but got %T", obj)
}
if pvb == nil {
return nil, errors.New("PodVolumeBackup is nil")
}
return []string{cache.NewObjectName(pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name).String()}, nil
}
// the PVB's name is auto-generated when creating the PVB, we cannot get the name or uid before creating it.
// So we cannot use namespace&name or uid as the key because we need to insert PVB into the indexer before creating it in API server
func podVolumeBackupKey(obj any) (string, error) {
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
return "", fmt.Errorf("expected PodVolumeBackup, but got %T", obj)
}
return fmt.Sprintf(pvbKeyPattern, pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name, pvb.Spec.Volume), nil
}
func newBackupper(
ctx context.Context,
log logrus.FieldLogger,
repoLocker *repository.RepoLocker,
repoEnsurer *repository.Ensurer,
pvbInformer ctrlcache.Informer,
crClient ctrlclient.Client,
uploaderType string,
backup *velerov1api.Backup,
) *backupper {
b := &backupper{
ctx: ctx,
repoLocker: repoLocker,
repoEnsurer: repoEnsurer,
crClient: crClient,
uploaderType: uploaderType,
pvbInformer: pvbInformer,
wg: sync.WaitGroup{},
pvbIndexer: cache.NewIndexer(podVolumeBackupKey, cache.Indexers{
indexNamePod: podIndexFunc,
}),
}
b.handlerRegistration, _ = pvbInformer.AddEventHandler(
cache.ResourceEventHandlerFuncs{
UpdateFunc: func(_, obj any) {
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
log.Errorf("expected PodVolumeBackup, but got %T", obj)
return
}
if pvb.GetLabels()[velerov1api.BackupUIDLabel] != string(backup.UID) {
return
}
if pvb.Status.Phase != velerov1api.PodVolumeBackupPhaseCompleted &&
pvb.Status.Phase != velerov1api.PodVolumeBackupPhaseFailed &&
pvb.Status.Phase != velerov1api.PodVolumeBackupPhaseCanceled {
return
}
statusChangedToFinal := true
existObj, exist, err := b.pvbIndexer.Get(pvb)
if err == nil && exist {
existPVB, ok := existObj.(*velerov1api.PodVolumeBackup)
// the PVB in the indexer is already in final status, no need to call WaitGroup.Done()
if ok && (existPVB.Status.Phase == velerov1api.PodVolumeBackupPhaseCompleted ||
existPVB.Status.Phase == velerov1api.PodVolumeBackupPhaseFailed ||
pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseCanceled) {
statusChangedToFinal = false
}
}
// the Indexer inserts PVB directly if the PVB to be updated doesn't exist
if err := b.pvbIndexer.Update(pvb); err != nil {
log.WithError(err).Errorf("failed to update PVB %s/%s in indexer", pvb.Namespace, pvb.Name)
}
// call WaitGroup.Done() once only when the PVB changes to final status the first time.
// This avoid the cases that the handler gets multiple update events whose PVBs are all in final status
// which causes panic with "negative WaitGroup counter" error
if statusChangedToFinal {
b.wg.Done()
}
},
},
)
return b
}
func resultsKey(ns, name string) string {
return fmt.Sprintf("%s/%s", ns, name)
}
func (b *backupper) getMatchAction(resPolicies *resourcepolicies.Policies, pvc *corev1api.PersistentVolumeClaim, volume *corev1api.Volume) (*resourcepolicies.Action, error) {
if pvc != nil {
pv := new(corev1api.PersistentVolume)
err := b.crClient.Get(context.TODO(), ctrlclient.ObjectKey{Name: pvc.Spec.VolumeName}, pv)
if err != nil {
return nil, errors.Wrapf(err, "error getting pv for pvc %s", pvc.Spec.VolumeName)
}
vfd := resourcepolicies.NewVolumeFilterData(pv, nil, pvc)
return resPolicies.GetMatchAction(vfd)
}
if volume != nil {
vfd := resourcepolicies.NewVolumeFilterData(nil, volume, pvc)
return resPolicies.GetMatchAction(vfd)
}
return nil, errors.Errorf("failed to check resource policies for empty volume")
}
var funcGetRepositoryType = getRepositoryType
func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api.Pod, volumesToBackup []string, resPolicies *resourcepolicies.Policies, log logrus.FieldLogger) ([]*velerov1api.PodVolumeBackup, *PVCBackupSummary, []error) {
if len(volumesToBackup) == 0 {
return nil, nil, nil
}
log.Infof("pod %s/%s has volumes to backup: %v", pod.Namespace, pod.Name, volumesToBackup)
var (
pvcSummary = NewPVCBackupSummary()
podVolumes = make(map[string]corev1api.Volume)
errs = []error{}
)
// put the pod's volumes and the PVC associated in maps for efficient lookup below
for _, podVolume := range pod.Spec.Volumes {
podVolumes[podVolume.Name] = podVolume
if podVolume.PersistentVolumeClaim != nil {
pvc := new(corev1api.PersistentVolumeClaim)
err := b.crClient.Get(context.TODO(), ctrlclient.ObjectKey{Namespace: pod.Namespace, Name: podVolume.PersistentVolumeClaim.ClaimName}, pvc)
if err != nil {
errs = append(errs, errors.Wrap(err, "error getting persistent volume claim for volume"))
continue
}
pvcSummary.pvcMap[podVolume.Name] = pvc
}
}
if msg, err := uploader.ValidateUploaderType(b.uploaderType); err != nil {
skipAllPodVolumes(pod, volumesToBackup, err, pvcSummary, log)
return nil, pvcSummary, []error{err}
} else if msg != "" {
log.Warn(msg)
}
if err := kube.IsPodRunning(pod); err != nil {
skipAllPodVolumes(pod, volumesToBackup, err, pvcSummary, log)
return nil, pvcSummary, nil
}
err := nodeagent.IsRunningInNode(b.ctx, backup.Namespace, pod.Spec.NodeName, b.crClient)
if err != nil {
skipAllPodVolumes(pod, volumesToBackup, err, pvcSummary, log)
return nil, pvcSummary, []error{err}
}
repositoryType := funcGetRepositoryType(b.uploaderType)
if repositoryType == "" {
err := errors.Errorf("empty repository type, uploader %s", b.uploaderType)
skipAllPodVolumes(pod, volumesToBackup, err, pvcSummary, log)
return nil, pvcSummary, []error{err}
}
repo, err := b.repoEnsurer.EnsureRepo(b.ctx, backup.Namespace, pod.Namespace, backup.Spec.StorageLocation, repositoryType)
if err != nil {
skipAllPodVolumes(pod, volumesToBackup, err, pvcSummary, log)
return nil, pvcSummary, []error{err}
}
// get a single non-exclusive lock since we'll wait for all individual
// backups to be complete before releasing it.
b.repoLocker.Lock(repo.Name)
defer b.repoLocker.Unlock(repo.Name)
var (
podVolumeBackups []*velerov1api.PodVolumeBackup
mountedPodVolumes = sets.Set[string]{}
attachedPodDevices = sets.Set[string]{}
)
for _, container := range pod.Spec.Containers {
for _, volumeMount := range container.VolumeMounts {
mountedPodVolumes.Insert(volumeMount.Name)
}
for _, volumeDevice := range container.VolumeDevices {
attachedPodDevices.Insert(volumeDevice.Name)
}
}
repoIdentifier := ""
if repositoryType == velerov1api.BackupRepositoryTypeRestic {
repoIdentifier = repo.Spec.ResticIdentifier
}
for _, volumeName := range volumesToBackup {
volume, ok := podVolumes[volumeName]
if !ok {
log.Warnf("No volume named %s found in pod %s/%s, skipping", volumeName, pod.Namespace, pod.Name)
continue
}
var pvc *corev1api.PersistentVolumeClaim
if volume.PersistentVolumeClaim != nil {
pvc, ok = pvcSummary.pvcMap[volumeName]
if !ok {
// there should have been error happened retrieving the PVC and it's recorded already
continue
}
}
if resPolicies != nil {
if action, err := b.getMatchAction(resPolicies, pvc, &volume); err != nil {
errs = append(errs, errors.Wrapf(err, "error getting pv for pvc %s", pvc.Spec.VolumeName))
continue
} else if action != nil && action.Type == resourcepolicies.Skip {
log.Infof("skip backup of volume %s for the matched resource policies", volumeName)
pvcSummary.addSkipped(volumeName, "matched action is 'skip' in chosen resource policies")
continue
}
}
// hostPath volumes are not supported because they're not mounted into /var/lib/kubelet/pods, so our
// daemonset pod has no way to access their data.
isHostPath, err := isHostPathVolume(&volume, pvc, b.crClient)
if err != nil {
errs = append(errs, errors.Wrap(err, "error checking if volume is a hostPath volume"))
continue
}
if isHostPath {
log.Warnf("Volume %s in pod %s/%s is a hostPath volume which is not supported for pod volume backup, skipping", volumeName, pod.Namespace, pod.Name)
continue
}
// check if volume is a block volume
if attachedPodDevices.Has(volumeName) {
msg := fmt.Sprintf("volume %s declared in pod %s/%s is a block volume. Block volumes are not supported for fs backup, skipping",
volumeName, pod.Namespace, pod.Name)
log.Warn(msg)
pvcSummary.addSkipped(volumeName, msg)
continue
}
// volumes that are not mounted by any container should not be backed up, because
// its directory is not created
if !mountedPodVolumes.Has(volumeName) {
msg := fmt.Sprintf("volume %s is declared in pod %s/%s but not mounted by any container, skipping", volumeName, pod.Namespace, pod.Name)
log.Warn(msg)
pvcSummary.addSkipped(volumeName, msg)
continue
}
volumeBackup := newPodVolumeBackup(backup, pod, volume, repoIdentifier, b.uploaderType, pvc)
// the PVB must be added into the indexer before creating it in API server otherwise unexpected behavior may happen:
// the PVB may be handled very quickly by the controller and the informer handler will insert the PVB before "b.pvbIndexer.Add(volumeBackup)" runs,
// this causes the PVB inserted by "b.pvbIndexer.Add(volumeBackup)" overrides the PVB in the indexer while the PVB inserted by "b.pvbIndexer.Add(volumeBackup)"
// contains empty "Status"
if err := b.pvbIndexer.Add(volumeBackup); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to add PodVolumeBackup %s/%s to indexer", volumeBackup.Namespace, volumeBackup.Name))
continue
}
// similar with above: the PVB may be handled very quickly by the controller and the informer handler will call "b.wg.Done()" before "b.wg.Add(1)" runs which causes panic
// see https://github.com/vmware-tanzu/velero/issues/8657
b.wg.Add(1)
if err := veleroclient.CreateRetryGenerateName(b.crClient, b.ctx, volumeBackup); err != nil {
b.wg.Done()
errs = append(errs, err)
continue
}
podVolumeBackups = append(podVolumeBackups, volumeBackup)
pvcSummary.addBackedup(volumeName)
}
return podVolumeBackups, pvcSummary, errs
}
func (b *backupper) WaitAllPodVolumesProcessed(log logrus.FieldLogger) []*velerov1api.PodVolumeBackup {
defer func() {
if err := b.pvbInformer.RemoveEventHandler(b.handlerRegistration); err != nil {
log.Debugf("failed to remove the event handler for PVB: %v", err)
}
}()
log.Info("Waiting for completion of PVB")
var podVolumeBackups []*velerov1api.PodVolumeBackup
// if no pod volume backups are tracked, return directly to avoid issue mentioned in
// https://github.com/vmware-tanzu/velero/issues/8723
if len(b.pvbIndexer.List()) == 0 {
return podVolumeBackups
}
done := make(chan struct{})
go func() {
defer close(done)
b.wg.Wait()
}()
select {
case <-b.ctx.Done():
log.Error("timed out waiting for all PodVolumeBackups to complete")
case <-done:
for _, obj := range b.pvbIndexer.List() {
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
log.Errorf("expected PodVolumeBackup, but got %T", obj)
continue
}
podVolumeBackups = append(podVolumeBackups, pvb)
if pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseFailed {
log.Errorf("pod volume backup failed: %s", pvb.Status.Message)
} else if pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseCanceled {
log.Errorf("pod volume backup canceled: %s", pvb.Status.Message)
}
}
}
return podVolumeBackups
}
func (b *backupper) GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1api.PodVolumeBackup, error) {
obj, exist, err := b.pvbIndexer.GetByKey(fmt.Sprintf(pvbKeyPattern, podNamespace, podName, volume))
if err != nil {
return nil, err
}
if !exist {
return nil, nil
}
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
return nil, errors.Errorf("expected PodVolumeBackup, but got %T", obj)
}
return pvb, nil
}
func (b *backupper) ListPodVolumeBackupsByPod(podNamespace, podName string) ([]*velerov1api.PodVolumeBackup, error) {
objs, err := b.pvbIndexer.ByIndex(indexNamePod, cache.NewObjectName(podNamespace, podName).String())
if err != nil {
return nil, err
}
var pvbs []*velerov1api.PodVolumeBackup
for _, obj := range objs {
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
return nil, errors.Errorf("expected PodVolumeBackup, but got %T", obj)
}
pvbs = append(pvbs, pvb)
}
return pvbs, nil
}
func skipAllPodVolumes(pod *corev1api.Pod, volumesToBackup []string, err error, pvcSummary *PVCBackupSummary, log logrus.FieldLogger) {
for _, volumeName := range volumesToBackup {
log.WithError(err).Warnf("Skip pod volume %s", volumeName)
pvcSummary.addSkipped(volumeName, fmt.Sprintf("encountered a problem with backing up the PVC of pod %s/%s: %v", pod.Namespace, pod.Name, err))
}
}
// isHostPathVolume returns true if the volume is either a hostPath pod volume or a persistent
// volume claim on a hostPath persistent volume, or false otherwise.
func isHostPathVolume(volume *corev1api.Volume, pvc *corev1api.PersistentVolumeClaim, crClient ctrlclient.Client) (bool, error) {
if volume.HostPath != nil {
return true, nil
}
if pvc == nil || pvc.Spec.VolumeName == "" {
return false, nil
}
pv := new(corev1api.PersistentVolume)
err := crClient.Get(context.TODO(), ctrlclient.ObjectKey{Name: pvc.Spec.VolumeName}, pv)
if err != nil {
return false, errors.WithStack(err)
}
return pv.Spec.HostPath != nil, nil
}
func newPodVolumeBackup(backup *velerov1api.Backup, pod *corev1api.Pod, volume corev1api.Volume, repoIdentifier, uploaderType string, pvc *corev1api.PersistentVolumeClaim) *velerov1api.PodVolumeBackup {
pvb := &velerov1api.PodVolumeBackup{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
GenerateName: backup.Name + "-",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: velerov1api.SchemeGroupVersion.String(),
Kind: "Backup",
Name: backup.Name,
UID: backup.UID,
Controller: boolptr.True(),
},
},
Labels: map[string]string{
velerov1api.BackupNameLabel: label.GetValidName(backup.Name),
velerov1api.BackupUIDLabel: string(backup.UID),
},
},
Spec: velerov1api.PodVolumeBackupSpec{
Node: pod.Spec.NodeName,
Pod: corev1api.ObjectReference{
Kind: "Pod",
Namespace: pod.Namespace,
Name: pod.Name,
UID: pod.UID,
},
Volume: volume.Name,
Tags: map[string]string{
"backup": backup.Name,
"backup-uid": string(backup.UID),
"pod": pod.Name,
"pod-uid": string(pod.UID),
"ns": pod.Namespace,
"volume": volume.Name,
},
BackupStorageLocation: backup.Spec.StorageLocation,
RepoIdentifier: repoIdentifier,
UploaderType: uploaderType,
},
}
if pvc != nil {
// this annotation is used in pkg/restore to identify if a PVC
// has a pod volume backup.
pvb.Annotations = map[string]string{
configs.PVCNameAnnotation: pvc.Name,
}
// this label is used by the pod volume backup controller to tell
// if a pod volume backup is for a PVC.
pvb.Labels[velerov1api.PVCUIDLabel] = string(pvc.UID)
// this tag is not used by velero, but useful for debugging.
pvb.Spec.Tags["pvc-uid"] = string(pvc.UID)
}
if backup.Spec.UploaderConfig != nil {
pvb.Spec.UploaderSettings = uploaderutil.StoreBackupConfig(backup.Spec.UploaderConfig)
}
return pvb
}