Add checkpoint for FS backup deletion test

As per PR #7281, if repository count is more than 1, then snapshots deletion is achieved with a fast way, then we should have more than 1 FS backup repository per backup.

Signed-off-by: danfengl <>
danfengl 2024-04-03 05:49:31 +00:00
parent 3cbf2eb4e2
commit 61c4d7b148
4 changed files with 251 additions and 65 deletions

View File

@ -19,6 +19,7 @@ import (
@ -33,8 +34,6 @@ import (
. ""
const deletionTest = "deletion-workload"
// Test backup and restore of Kibishi using restic
func BackupDeletionWithSnapshots() {
@ -45,11 +44,7 @@ func BackupDeletionWithRestic() {
func backup_deletion_test(useVolumeSnapshots bool) {
var (
backupName string
veleroCfg VeleroConfig
veleroCfg = VeleroCfg
veleroCfg := VeleroCfg
veleroCfg.UseVolumeSnapshots = useVolumeSnapshots
veleroCfg.UseNodeAgent = !useVolumeSnapshots
@ -76,16 +71,23 @@ func backup_deletion_test(useVolumeSnapshots bool) {
When("kibishii is the sample workload", func() {
It("Deleted backups are deleted from object storage and backups deleted from object storage can be deleted locally", func() {
backupName = "backup-" + UUIDgen.String()
Expect(runBackupDeletionTests(*veleroCfg.ClientToInstallVelero, veleroCfg, backupName, "", useVolumeSnapshots, veleroCfg.KibishiiDirectory)).To(Succeed(),
Expect(runBackupDeletionTests(*veleroCfg.ClientToInstallVelero, veleroCfg, "", useVolumeSnapshots, veleroCfg.KibishiiDirectory)).To(Succeed(),
"Failed to run backup deletion test")
// runBackupDeletionTests runs upgrade test on the provider by kibishii.
func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupName, backupLocation string,
// runUpgradeTests runs upgrade test on the provider by kibishii.
func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupLocation string,
useVolumeSnapshots bool, kibishiiDirectory string) error {
var err error
var snapshotCheckPoint SnapshotCheckPoint
backupName := "backup-" + UUIDgen.String()
workloadNamespaceList := []string{"backup-deletion-1-" + UUIDgen.String(), "backup-deletion-2-" + UUIDgen.String()}
nsCount := len(workloadNamespaceList)
workloadNamespaces := strings.Join(workloadNamespaceList[:], ",")
if useVolumeSnapshots && veleroCfg.CloudProvider == "kind" {
Skip("Volume snapshots not supported on kind")
@ -98,29 +100,30 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupNam
bslPrefix := veleroCfg.BSLPrefix
bslConfig := veleroCfg.BSLConfig
veleroFeatures := veleroCfg.Features
if err := CreateNamespace(oneHourTimeout, client, deletionTest); err != nil {
return errors.Wrapf(err, "Failed to create namespace %s to install Kibishii workload", deletionTest)
if !veleroCfg.Debug {
defer func() {
if err := DeleteNamespace(context.Background(), client, deletionTest, true); err != nil {
fmt.Println(errors.Wrapf(err, "failed to delete the namespace %q", deletionTest))
for _, ns := range workloadNamespaceList {
if err := CreateNamespace(oneHourTimeout, client, ns); err != nil {
return errors.Wrapf(err, "Failed to create namespace %s to install Kibishii workload", ns)
if !veleroCfg.Debug {
defer func() {
if err := DeleteNamespace(context.Background(), client, ns, true); err != nil {
fmt.Println(errors.Wrapf(err, "failed to delete the namespace %q", ns))
if err := KibishiiPrepareBeforeBackup(oneHourTimeout, client, providerName, ns,
registryCredentialFile, veleroFeatures, kibishiiDirectory, useVolumeSnapshots, DefaultKibishiiData); err != nil {
return errors.Wrapf(err, "Failed to install and prepare data for kibishii %s", ns)
err := ObjectsShouldNotBeInBucket(veleroCfg.ObjectStoreProvider, veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLPrefix, veleroCfg.BSLConfig, backupName, BackupObjectsPrefix, 1)
if err != nil {
return err
if err := KibishiiPrepareBeforeBackup(oneHourTimeout, client, providerName, deletionTest,
registryCredentialFile, veleroFeatures, kibishiiDirectory, useVolumeSnapshots, DefaultKibishiiData); err != nil {
return errors.Wrapf(err, "Failed to install and prepare data for kibishii %s", deletionTest)
err := ObjectsShouldNotBeInBucket(veleroCfg.ObjectStoreProvider, veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLPrefix, veleroCfg.BSLConfig, backupName, BackupObjectsPrefix, 1)
if err != nil {
return err
var BackupCfg BackupConfig
BackupCfg.BackupName = backupName
BackupCfg.Namespace = deletionTest
BackupCfg.Namespace = workloadNamespaces
BackupCfg.BackupLocation = backupLocation
BackupCfg.UseVolumeSnapshots = useVolumeSnapshots
BackupCfg.DefaultVolumesToFsBackup = !useVolumeSnapshots
@ -133,34 +136,70 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupNam
return "Fail to backup workload"
if providerName == Vsphere && useVolumeSnapshots {
// Wait for uploads started by the Velero Plugin for vSphere to complete
// TODO - remove after upload progress monitoring is implemented
fmt.Println("Waiting for vSphere uploads to complete")
if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, deletionTest, 2); err != nil {
return errors.Wrapf(err, "Error waiting for uploads to complete")
for _, ns := range workloadNamespaceList {
if providerName == Vsphere && useVolumeSnapshots {
// Wait for uploads started by the Velero Plugin for vSphere to complete
// TODO - remove after upload progress monitoring is implemented
fmt.Println("Waiting for vSphere uploads to complete")
if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, ns, DefaultKibishiiWorkerCounts); err != nil {
return errors.Wrapf(err, "Error waiting for uploads to complete")
err = ObjectsShouldBeInBucket(veleroCfg.ObjectStoreProvider, veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslPrefix, bslConfig, backupName, BackupObjectsPrefix)
if err != nil {
return err
var snapshotCheckPoint SnapshotCheckPoint
if useVolumeSnapshots {
snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, 2, deletionTest, backupName, KibishiiPVCNameList)
Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint")
err = SnapshotsShouldBeCreatedInCloud(veleroCfg.CloudProvider,
veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslConfig,
backupName, snapshotCheckPoint)
// Check for snapshots existence
if veleroCfg.CloudProvider == Vsphere {
// For vSphere, checking snapshot should base on namespace and backup name
for _, ns := range workloadNamespaceList {
snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts, ns, backupName, KibishiiPVCNameList)
Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint")
err = SnapshotsShouldBeCreatedInCloud(veleroCfg.CloudProvider,
veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslConfig,
backupName, snapshotCheckPoint)
if err != nil {
return errors.Wrap(err, "exceed waiting for snapshot created in cloud")
} else {
// For public cloud, When using backup name to index VolumeSnapshotContents, make sure count of VolumeSnapshotContents should including PVs in all namespace
// so VolumeSnapshotContents count should be equal to "namespace count" * "Kibishii worker count per namespace".
snapshotCheckPoint, err = GetSnapshotCheckPoint(client, veleroCfg, DefaultKibishiiWorkerCounts*nsCount, "", backupName, KibishiiPVCNameList)
Expect(err).NotTo(HaveOccurred(), "Fail to get Azure CSI snapshot checkpoint")
// Get all snapshots base on backup name, regardless of namespaces
err = SnapshotsShouldBeCreatedInCloud(veleroCfg.CloudProvider,
veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, bslConfig,
backupName, snapshotCheckPoint)
if err != nil {
return errors.Wrap(err, "exceed waiting for snapshot created in cloud")
} else {
// Check for BackupRepository and DeleteRequest
var brList, pvbList []string
brList, err = KubectlGetBackupRepository(oneHourTimeout, "kopia", veleroCfg.VeleroNamespace)
if err != nil {
return errors.Wrap(err, "exceed waiting for snapshot created in cloud")
return err
pvbList, err = KubectlGetPodVolumeBackup(oneHourTimeout, BackupCfg.BackupName, veleroCfg.VeleroNamespace)
if err != nil {
return err
err = DeleteBackupResource(context.Background(), backupName, &veleroCfg)
err = DeleteBackup(context.Background(), backupName, &veleroCfg)
if err != nil {
return err
if useVolumeSnapshots {
err = SnapshotsShouldNotExistInCloud(veleroCfg.CloudProvider,
veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLConfig,
@ -207,7 +246,7 @@ func runBackupDeletionTests(client TestClient, veleroCfg VeleroConfig, backupNam
return err
err = DeleteBackupResource(context.Background(), backupName, &veleroCfg)
err = DeleteBackup(context.Background(), backupName, &veleroCfg)
if err != nil {
return errors.Wrapf(err, "|| UNEXPECTED || - Failed to delete backup %q", backupName)
} else {

View File

@ -25,6 +25,7 @@ import (
. ""
. ""
apierrors ""
waitutil ""
kbclient ""
@ -66,8 +67,8 @@ func SSRTest() {
ssrListResp := new(v1.ServerStatusRequestList)
By(fmt.Sprintf("Check ssr object in %s namespace", veleroCfg.VeleroNamespace))
err = waitutil.PollImmediate(5*time.Second, time.Minute,
func() (bool, error) {
err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, time.Minute, true,
func(context.Context) (bool, error) {
if err = veleroCfg.ClientToInstallVelero.Kubebuilder.List(ctx, ssrListResp, &kbclient.ListOptions{Namespace: veleroCfg.VeleroNamespace}); err != nil {
return false, fmt.Errorf("failed to list ssr object in %s namespace with err %v", veleroCfg.VeleroNamespace, err)
@ -85,9 +86,8 @@ func SSRTest() {
return true, nil
if err == waitutil.ErrWaitTimeout {
fmt.Printf("exceed test case deadline and failed to check ssr object in %s namespace", veleroCfg.VeleroNamespace)
fmt.Printf("exceed test case deadline and failed to check ssr object in %s namespace", veleroCfg.VeleroNamespace)
Expect(err).To(Succeed(), fmt.Sprintf("Failed to check ssr object in %s namespace", veleroCfg.VeleroNamespace))
By(fmt.Sprintf("Check ssr object in %s namespace", testNS))

View File

@ -20,11 +20,12 @@ func GetListByCmdPipes(ctx context.Context, cmdlines []*OsCommandLine) ([]string
var buf bytes.Buffer
var err error
var cmds []*exec.Cmd
for _, cmdline := range cmdlines {
cmd := exec.Command(cmdline.Cmd, cmdline.Args...)
cmds = append(cmds, cmd)
for i := 0; i < len(cmds); i++ {
if i == len(cmds)-1 {
@ -55,7 +56,6 @@ func GetListByCmdPipes(ctx context.Context, cmdlines []*OsCommandLine) ([]string
if err := scanner.Err(); err != nil {
return nil, err
return ret, nil

View File

@ -929,7 +929,7 @@ func getVeleroCliTarball(cliTarballUrl string) (*os.File, error) {
return tmpfile, nil
func DeleteBackupResource(ctx context.Context, backupName string, velerocfg *VeleroConfig) error {
func DeleteBackup(ctx context.Context, backupName string, velerocfg *VeleroConfig) error {
veleroCLI := velerocfg.VeleroCLI
args := []string{"--namespace", velerocfg.VeleroNamespace, "backup", "delete", backupName, "--confirm"}
@ -945,20 +945,53 @@ func DeleteBackupResource(ctx context.Context, backupName string, velerocfg *Vel
args = []string{"--namespace", velerocfg.VeleroNamespace, "backup", "get", backupName}
retryTimes := 5
for i := 1; i < retryTimes+1; i++ {
cmd = exec.CommandContext(ctx, veleroCLI, args...)
fmt.Printf("Try %d times to delete backup %s \n", i, cmd.String())
stdout, stderr, err = veleroexec.RunCommand(cmd)
if err != nil {
if strings.Contains(stderr, "not found") {
fmt.Printf("|| EXPECTED || - Backup %s was deleted successfully according to message %s\n", backupName, stderr)
return nil
err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, time.Minute, true,
func(context.Context) (bool, error) {
cmd = exec.CommandContext(ctx, veleroCLI, args...)
fmt.Printf("Try to get backup with cmd: %s \n", cmd.String())
stdout, stderr, err = veleroexec.RunCommand(cmd)
if err != nil {
if strings.Contains(stderr, "not found") {
fmt.Printf("|| EXPECTED || - Backup %s was deleted successfully according to message %s\n", backupName, stderr)
return true, nil
return false, errors.Wrapf(err, "Fail to perform get backup, stdout=%s, stderr=%s", stdout, stderr)
return errors.Wrapf(err, "Fail to perform get backup, stdout=%s, stderr=%s", stdout, stderr)
var status string
var drList []string
drList, err = KubectlGetAllDeleteBackupRequest(context.Background(), backupName, velerocfg.VeleroNamespace)
if len(drList) > 1 {
return false, errors.New(fmt.Sprintf("Count of DeleteBackupRequest %d is not expected", len(drList)))
// Record DeleteBackupRequest status for debugging
for _, dr := range drList {
status, err = KubectlGetDeleteBackupRequestStatus(context.Background(), dr, velerocfg.VeleroNamespace)
fmt.Printf("DeleteBackupRequest status: %s\n", status)
return true, nil
// Waiting for completion of handling deleteBackupRequest CR
time.Sleep(1 * time.Minute)
// Verify deleteBackupRequest are all gone because they are handled successfully
var drList []string
drList, err = KubectlGetAllDeleteBackupRequest(context.Background(), backupName, velerocfg.VeleroNamespace)
if len(drList) > 1 {
// Log deleteBackupRequest details for debug
for _, dr := range drList {
details, err := KubectlGetDeleteBackupRequestDetails(context.Background(), dr, velerocfg.VeleroNamespace)
if err != nil {
return errors.Wrapf(err, "fail to get DeleteBackupRequest %s details", dr)
fmt.Printf("Failed DeleteBackupRequest details: %s", details)
time.Sleep(1 * time.Minute)
return errors.New(fmt.Sprintf("Count of DeleteBackupRequest %d is not expected", len(drList)))
return nil
@ -1635,3 +1668,117 @@ func CleanAllRetainedPV(ctx context.Context, client TestClient) {
func KubectlGetBackupRepository(ctx context.Context, uploaderType, veleroNamespace string) ([]string, error) {
args1 := []string{"get", "backuprepository", "-n", veleroNamespace}
cmds := []*common.OsCommandLine{}
cmd := &common.OsCommandLine{
Cmd: "kubectl",
Args: args1,
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "grep",
Args: []string{uploaderType},
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "awk",
Args: []string{"{print $1}"},
cmds = append(cmds, cmd)
return common.GetListByCmdPipes(ctx, cmds)
func KubectlGetPodVolumeBackup(ctx context.Context, backupName, veleroNamespace string) ([]string, error) {
args1 := []string{"get", "podvolumebackup", "-n", veleroNamespace}
cmds := []*common.OsCommandLine{}
cmd := &common.OsCommandLine{
Cmd: "kubectl",
Args: args1,
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "grep",
Args: []string{backupName},
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "awk",
Args: []string{"{print $1}"},
cmds = append(cmds, cmd)
return common.GetListByCmdPipes(ctx, cmds)
func KubectlGetDeleteBackupRequestDetails(ctx context.Context, deleteBackupRequest, veleroNamespace string) (string, error) {
cmd := exec.CommandContext(ctx, "kubectl", "get", "deletebackuprequests", "-n", veleroNamespace, deleteBackupRequest, "-o", "json")
fmt.Printf("Get DeleteBackupRequest details cmd =%v\n", cmd)
stdout, stderr, err := veleroexec.RunCommand(cmd)
if err != nil {
return "", errors.Wrap(err, fmt.Sprintf("failed to run command %s", cmd))
return stdout, err
func KubectlGetDeleteBackupRequestStatus(ctx context.Context, deleteBackupRequest, veleroNamespace string) (string, error) {
args1 := []string{"get", "deletebackuprequests", "-n", veleroNamespace, deleteBackupRequest, "-o", "json"}
cmds := []*common.OsCommandLine{}
cmd := &common.OsCommandLine{
Cmd: "kubectl",
Args: args1,
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "jq",
Args: []string{"-r", ".status.phase"},
cmds = append(cmds, cmd)
ret, err := common.GetListByCmdPipes(ctx, cmds)
if len(ret) != 1 {
return "", errors.New(fmt.Sprintf("fail to get status of deletebackuprequests %s", deleteBackupRequest))
return ret[0], err
func KubectlGetAllDeleteBackupRequest(ctx context.Context, backupName, veleroNamespace string) ([]string, error) {
args1 := []string{"get", "deletebackuprequests", "-n", veleroNamespace}
cmds := []*common.OsCommandLine{}
cmd := &common.OsCommandLine{
Cmd: "kubectl",
Args: args1,
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "grep",
Args: []string{backupName},
cmds = append(cmds, cmd)
cmd = &common.OsCommandLine{
Cmd: "awk",
Args: []string{"{print $1}"},
cmds = append(cmds, cmd)
return common.GetListByCmdPipes(ctx, cmds)