diff --git a/changelogs/unreleased/8679-ywk253100 b/changelogs/unreleased/8679-ywk253100 new file mode 100644 index 000000000..b326ffb34 --- /dev/null +++ b/changelogs/unreleased/8679-ywk253100 @@ -0,0 +1 @@ +Fix #8657: WaitGroup panic issue \ No newline at end of file diff --git a/pkg/backup/backup.go b/pkg/backup/backup.go index 4ab8bb8a8..717aed051 100644 --- a/pkg/backup/backup.go +++ b/pkg/backup/backup.go @@ -780,7 +780,7 @@ func (kb *kubernetesBackupper) waitUntilPVBsProcessed(ctx context.Context, log l if processed { continue } - updatedPVB, err := itemBlock.itemBackupper.podVolumeBackupper.GetPodVolumeBackup(pvb.Namespace, pvb.Name) + updatedPVB, err := itemBlock.itemBackupper.podVolumeBackupper.GetPodVolumeBackupByPodAndVolume(pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name, pvb.Spec.Volume) if err != nil { allProcessed = false log.Infof("failed to get PVB: %v", err) diff --git a/pkg/backup/backup_test.go b/pkg/backup/backup_test.go index eb420e317..2cab53a8f 100644 --- a/pkg/backup/backup_test.go +++ b/pkg/backup/backup_test.go @@ -3978,9 +3978,9 @@ func (b *fakePodVolumeBackupper) WaitAllPodVolumesProcessed(log logrus.FieldLogg return b.pvbs } -func (b *fakePodVolumeBackupper) GetPodVolumeBackup(namespace, name string) (*velerov1.PodVolumeBackup, error) { +func (b *fakePodVolumeBackupper) GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1.PodVolumeBackup, error) { for _, pvb := range b.pvbs { - if pvb.Namespace == namespace && pvb.Name == name { + if pvb.Spec.Pod.Namespace == podNamespace && pvb.Spec.Pod.Name == podName && pvb.Spec.Volume == volume { return pvb, nil } } diff --git a/pkg/podvolume/backupper.go b/pkg/podvolume/backupper.go index 3b0dbe499..75f36c3c3 100644 --- a/pkg/podvolume/backupper.go +++ b/pkg/podvolume/backupper.go @@ -43,12 +43,17 @@ import ( "github.com/vmware-tanzu/velero/pkg/util/kube" ) +const ( + indexNamePod = "POD" + pvbKeyPattern = "%s+%s+%s" +) + // Backupper can execute pod volume backups of volumes in a pod. type Backupper interface { // BackupPodVolumes backs up all specified volumes in a pod. BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api.Pod, volumesToBackup []string, resPolicies *resourcepolicies.Policies, log logrus.FieldLogger) ([]*velerov1api.PodVolumeBackup, *PVCBackupSummary, []error) WaitAllPodVolumesProcessed(log logrus.FieldLogger) []*velerov1api.PodVolumeBackup - GetPodVolumeBackup(namespace, name string) (*velerov1api.PodVolumeBackup, error) + GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1api.PodVolumeBackup, error) ListPodVolumeBackupsByPod(podNamespace, podName string) ([]*velerov1api.PodVolumeBackup, error) } @@ -106,8 +111,6 @@ func (pbs *PVCBackupSummary) addSkipped(volumeName string, reason string) { } } -const indexNamePod = "POD" - func podIndexFunc(obj any) ([]string, error) { pvb, ok := obj.(*velerov1api.PodVolumeBackup) if !ok { @@ -119,6 +122,16 @@ func podIndexFunc(obj any) ([]string, error) { return []string{cache.NewObjectName(pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name).String()}, nil } +// the PVB's name is auto-generated when creating the PVB, we cannot get the name or uid before creating it. +// So we cannot use namespace&name or uid as the key because we need to insert PVB into the indexer before creating it in API server +func podVolumeBackupKey(obj any) (string, error) { + pvb, ok := obj.(*velerov1api.PodVolumeBackup) + if !ok { + return "", fmt.Errorf("expected PodVolumeBackup, but got %T", obj) + } + return fmt.Sprintf(pvbKeyPattern, pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name, pvb.Spec.Volume), nil +} + func newBackupper( ctx context.Context, log logrus.FieldLogger, @@ -137,7 +150,7 @@ func newBackupper( uploaderType: uploaderType, pvbInformer: pvbInformer, wg: sync.WaitGroup{}, - pvbIndexer: cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{ + pvbIndexer: cache.NewIndexer(podVolumeBackupKey, cache.Indexers{ indexNamePod: podIndexFunc, }), } @@ -341,16 +354,22 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api. } volumeBackup := newPodVolumeBackup(backup, pod, volume, repoIdentifier, b.uploaderType, pvc) - if err := veleroclient.CreateRetryGenerateName(b.crClient, b.ctx, volumeBackup); err != nil { - errs = append(errs, err) - continue - } - b.wg.Add(1) - + // the PVB must be added into the indexer before creating it in API server otherwise unexpected behavior may happen: + // the PVB may be handled very quickly by the controller and the informer handler will insert the PVB before "b.pvbIndexer.Add(volumeBackup)" runs, + // this causes the PVB inserted by "b.pvbIndexer.Add(volumeBackup)" overrides the PVB in the indexer while the PVB inserted by "b.pvbIndexer.Add(volumeBackup)" + // contains empty "Status" if err := b.pvbIndexer.Add(volumeBackup); err != nil { errs = append(errs, errors.Wrapf(err, "failed to add PodVolumeBackup %s/%s to indexer", volumeBackup.Namespace, volumeBackup.Name)) continue } + // similar with above: the PVB may be handled very quickly by the controller and the informer handler will call "b.wg.Done()" before "b.wg.Add(1)" runs which causes panic + // see https://github.com/vmware-tanzu/velero/issues/8657 + b.wg.Add(1) + if err := veleroclient.CreateRetryGenerateName(b.crClient, b.ctx, volumeBackup); err != nil { + b.wg.Done() + errs = append(errs, err) + continue + } podVolumeBackups = append(podVolumeBackups, volumeBackup) pvcSummary.addBackedup(volumeName) @@ -392,8 +411,8 @@ func (b *backupper) WaitAllPodVolumesProcessed(log logrus.FieldLogger) []*velero return podVolumeBackups } -func (b *backupper) GetPodVolumeBackup(namespace, name string) (*velerov1api.PodVolumeBackup, error) { - obj, exist, err := b.pvbIndexer.GetByKey(cache.NewObjectName(namespace, name).String()) +func (b *backupper) GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1api.PodVolumeBackup, error) { + obj, exist, err := b.pvbIndexer.GetByKey(fmt.Sprintf(pvbKeyPattern, podNamespace, podName, volume)) if err != nil { return nil, err } diff --git a/pkg/podvolume/backupper_test.go b/pkg/podvolume/backupper_test.go index 1456bdf11..026cd5bb3 100644 --- a/pkg/podvolume/backupper_test.go +++ b/pkg/podvolume/backupper_test.go @@ -620,37 +620,44 @@ func TestBackupPodVolumes(t *testing.T) { } } -func TestGetPodVolumeBackup(t *testing.T) { +func TestGetPodVolumeBackupByPodAndVolume(t *testing.T) { backupper := &backupper{ - pvbIndexer: cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{ + pvbIndexer: cache.NewIndexer(podVolumeBackupKey, cache.Indexers{ indexNamePod: podIndexFunc, }), } obj := &velerov1api.PodVolumeBackup{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "velero", - Name: "pvb", - }, Spec: velerov1api.PodVolumeBackupSpec{ Pod: corev1api.ObjectReference{ Kind: "Pod", Namespace: "default", Name: "pod", }, + Volume: "volume", }, } err := backupper.pvbIndexer.Add(obj) require.NoError(t, err) - // not exist PVB - pvb, err := backupper.GetPodVolumeBackup("invalid-namespace", "invalid-name") + // incorrect pod namespace + pvb, err := backupper.GetPodVolumeBackupByPodAndVolume("invalid-namespace", "pod", "volume") require.NoError(t, err) assert.Nil(t, pvb) - // exist PVB - pvb, err = backupper.GetPodVolumeBackup("velero", "pvb") + // incorrect pod name + pvb, err = backupper.GetPodVolumeBackupByPodAndVolume("default", "invalid-pod", "volume") + require.NoError(t, err) + assert.Nil(t, pvb) + + // incorrect volume + pvb, err = backupper.GetPodVolumeBackupByPodAndVolume("default", "pod", "invalid-volume") + require.NoError(t, err) + assert.Nil(t, pvb) + + // correct pod namespace, name and volume + pvb, err = backupper.GetPodVolumeBackupByPodAndVolume("default", "pod", "volume") require.NoError(t, err) assert.NotNil(t, pvb) }