velero/test/e2e/util/kibishii/kibishii_utils.go

366 lines
16 KiB
Go

/*
Copyright the Velero contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kibishii
import (
"fmt"
"os/exec"
"strconv"
"strings"
"time"
"github.com/pkg/errors"
"golang.org/x/net/context"
"k8s.io/apimachinery/pkg/util/wait"
veleroexec "github.com/vmware-tanzu/velero/pkg/util/exec"
. "github.com/vmware-tanzu/velero/test/e2e"
. "github.com/vmware-tanzu/velero/test/e2e/util/k8s"
. "github.com/vmware-tanzu/velero/test/e2e/util/providers"
. "github.com/vmware-tanzu/velero/test/e2e/util/velero"
)
const (
jumpPadPod = "jump-pad"
)
type KibishiiData struct {
Levels int
DirsPerLevel int
FilesPerLevel int
FileLength int
BlockSize int
PassNum int
ExpectedNodes int
}
var DefaultKibishiiWorkerCounts = 2
var DefaultKibishiiData = &KibishiiData{2, 10, 10, 1024, 1024, 0, DefaultKibishiiWorkerCounts}
var KibishiiPVCNameList = []string{"kibishii-data-kibishii-deployment-0", "kibishii-data-kibishii-deployment-1"}
var KibishiiStorageClassName = "kibishii-storage-class"
// RunKibishiiTests runs kibishii tests on the provider.
func RunKibishiiTests(veleroCfg VeleroConfig, backupName, restoreName, backupLocation, kibishiiNamespace string,
useVolumeSnapshots, defaultVolumesToFsBackup bool) error {
pvCount := len(KibishiiPVCNameList)
client := *veleroCfg.ClientToInstallVelero
oneHourTimeout, ctxCancel := context.WithTimeout(context.Background(), time.Minute*60)
defer ctxCancel()
veleroCLI := veleroCfg.VeleroCLI
providerName := veleroCfg.CloudProvider
veleroNamespace := veleroCfg.VeleroNamespace
registryCredentialFile := veleroCfg.RegistryCredentialFile
veleroFeatures := veleroCfg.Features
kibishiiDirectory := veleroCfg.KibishiiDirectory
if _, err := GetNamespace(context.Background(), client, kibishiiNamespace); err == nil {
fmt.Printf("Workload namespace %s exists, delete it first.\n", kibishiiNamespace)
if err = DeleteNamespace(context.Background(), client, kibishiiNamespace, true); err != nil {
fmt.Println(errors.Wrapf(err, "failed to delete the namespace %q", kibishiiNamespace))
}
}
if err := CreateNamespace(oneHourTimeout, client, kibishiiNamespace); err != nil {
return errors.Wrapf(err, "Failed to create namespace %s to install Kibishii workload", kibishiiNamespace)
}
defer func() {
if !veleroCfg.Debug {
if err := DeleteNamespace(context.Background(), client, kibishiiNamespace, true); err != nil {
fmt.Println(errors.Wrapf(err, "failed to delete the namespace %q", kibishiiNamespace))
}
}
}()
fmt.Printf("KibishiiPrepareBeforeBackup %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := KibishiiPrepareBeforeBackup(oneHourTimeout, client, providerName,
kibishiiNamespace, registryCredentialFile, veleroFeatures,
kibishiiDirectory, useVolumeSnapshots, DefaultKibishiiData); err != nil {
return errors.Wrapf(err, "Failed to install and prepare data for kibishii %s", kibishiiNamespace)
}
fmt.Printf("KibishiiPrepareBeforeBackup done %s\n", time.Now().Format("2006-01-02 15:04:05"))
var BackupCfg BackupConfig
BackupCfg.BackupName = backupName
BackupCfg.Namespace = kibishiiNamespace
BackupCfg.BackupLocation = backupLocation
BackupCfg.UseVolumeSnapshots = useVolumeSnapshots
BackupCfg.DefaultVolumesToFsBackup = defaultVolumesToFsBackup
BackupCfg.Selector = ""
BackupCfg.ProvideSnapshotsVolumeParam = veleroCfg.ProvideSnapshotsVolumeParam
fmt.Printf("VeleroBackupNamespace %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := VeleroBackupNamespace(oneHourTimeout, veleroCLI, veleroNamespace, BackupCfg); err != nil {
RunDebug(context.Background(), veleroCLI, veleroNamespace, backupName, "")
return errors.Wrapf(err, "Failed to backup kibishii namespace %s", kibishiiNamespace)
}
fmt.Printf("VeleroBackupNamespace done %s\n", time.Now().Format("2006-01-02 15:04:05"))
if useVolumeSnapshots {
if providerName == "vsphere" {
// Wait for uploads started by the Velero Plugin for vSphere to complete
// TODO - remove after upload progress monitoring is implemented
fmt.Println("Waiting for vSphere uploads to complete")
if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, kibishiiNamespace, 2); err != nil {
return errors.Wrapf(err, "Error waiting for uploads to complete")
}
}
snapshotCheckPoint, err := GetSnapshotCheckPoint(client, veleroCfg, 2, kibishiiNamespace, backupName, KibishiiPVCNameList)
if err != nil {
return errors.Wrap(err, "Fail to get snapshot checkpoint")
}
err = SnapshotsShouldBeCreatedInCloud(veleroCfg.CloudProvider,
veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLConfig,
backupName, snapshotCheckPoint)
if err != nil {
return errors.Wrap(err, "exceed waiting for snapshot created in cloud")
}
} else {
pvbs, err := GetPVB(oneHourTimeout, veleroCfg.VeleroNamespace, kibishiiNamespace)
if err != nil {
return errors.Wrapf(err, "failed to get PVB for namespace %s", kibishiiNamespace)
}
if len(pvbs) != pvCount {
return errors.New(fmt.Sprintf("PVB count %d should be %d in namespace %s", len(pvbs), pvCount, kibishiiNamespace))
}
if providerName == "vsphere" {
// Wait for uploads started by the Velero Plugin for vSphere to complete
// TODO - remove after upload progress monitoring is implemented
// TODO[High] - uncomment code block below when vSphere plugin PR #500 is included in release version.
// https://github.com/vmware-tanzu/velero-plugin-for-vsphere/pull/500/
// fmt.Println("Make sure no vSphere snapshot uploads created")
// if err := WaitForVSphereUploadCompletion(oneHourTimeout, time.Hour, kibishiiNamespace, 0); err != nil {
// return errors.Wrapf(err, "Error get vSphere snapshot uploads")
// }
} else {
// wait for a period to confirm no snapshots exist for the backup
time.Sleep(5 * time.Minute)
if strings.EqualFold(veleroFeatures, "EnableCSI") {
_, err = GetSnapshotCheckPoint(*veleroCfg.ClientToInstallVelero, veleroCfg, 0,
kibishiiNamespace, backupName, KibishiiPVCNameList)
if err != nil {
return errors.Wrap(err, "failed to get snapshot checkPoint")
}
} else {
err = SnapshotsShouldNotExistInCloud(veleroCfg.CloudProvider,
veleroCfg.CloudCredentialsFile, veleroCfg.BSLBucket, veleroCfg.BSLConfig,
backupName, SnapshotCheckPoint{})
if err != nil {
return errors.Wrap(err, "exceed waiting for snapshot created in cloud")
}
}
}
}
fmt.Printf("Simulating a disaster by removing namespace %s %s\n", kibishiiNamespace, time.Now().Format("2006-01-02 15:04:05"))
if err := DeleteNamespace(oneHourTimeout, client, kibishiiNamespace, true); err != nil {
return errors.Wrapf(err, "failed to delete namespace %s", kibishiiNamespace)
}
// the snapshots of AWS may be still in pending status when do the restore, wait for a while
// to avoid this https://github.com/vmware-tanzu/velero/issues/1799
// TODO remove this after https://github.com/vmware-tanzu/velero/issues/3533 is fixed
if useVolumeSnapshots {
fmt.Println("Waiting 5 minutes to make sure the snapshots are ready...")
time.Sleep(5 * time.Minute)
}
fmt.Printf("VeleroRestore %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := VeleroRestore(oneHourTimeout, veleroCLI, veleroNamespace, restoreName, backupName, ""); err != nil {
RunDebug(context.Background(), veleroCLI, veleroNamespace, "", restoreName)
return errors.Wrapf(err, "Restore %s failed from backup %s", restoreName, backupName)
}
if !useVolumeSnapshots && providerName != "vsphere" {
pvrs, err := GetPVR(oneHourTimeout, veleroCfg.VeleroNamespace, kibishiiNamespace)
if err != nil {
return errors.Wrapf(err, "failed to get PVR for namespace %s", kibishiiNamespace)
} else if len(pvrs) != pvCount {
return errors.New(fmt.Sprintf("PVR count %d is not as expected %d", len(pvrs), pvCount))
}
}
fmt.Printf("KibishiiVerifyAfterRestore %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := KibishiiVerifyAfterRestore(client, kibishiiNamespace, oneHourTimeout, DefaultKibishiiData); err != nil {
return errors.Wrapf(err, "Error verifying kibishii after restore")
}
fmt.Printf("kibishii test completed successfully %s\n", time.Now().Format("2006-01-02 15:04:05"))
return nil
}
func installKibishii(ctx context.Context, namespace string, cloudPlatform, veleroFeatures,
kibishiiDirectory string, useVolumeSnapshots bool, workerReplicas int) error {
if strings.EqualFold(cloudPlatform, "azure") &&
strings.EqualFold(veleroFeatures, "EnableCSI") {
cloudPlatform = "azure-csi"
}
if strings.EqualFold(cloudPlatform, "aws") &&
strings.EqualFold(veleroFeatures, "EnableCSI") {
cloudPlatform = "aws-csi"
}
// We use kustomize to generate YAML for Kibishii from the checked-in yaml directories
kibishiiInstallCmd := exec.CommandContext(ctx, "kubectl", "apply", "-n", namespace, "-k",
kibishiiDirectory+cloudPlatform, "--timeout=90s")
_, stderr, err := veleroexec.RunCommand(kibishiiInstallCmd)
fmt.Printf("Install Kibishii cmd: %s\n", kibishiiInstallCmd)
if err != nil {
return errors.Wrapf(err, "failed to install kibishii, stderr=%s", stderr)
}
labelNamespaceCmd := exec.CommandContext(ctx, "kubectl", "label", "namespace", namespace, "pod-security.kubernetes.io/enforce=baseline", "pod-security.kubernetes.io/enforce-version=latest", "--overwrite=true")
_, stderr, err = veleroexec.RunCommand(labelNamespaceCmd)
fmt.Printf("Label namespace with PSA policy: %s\n", labelNamespaceCmd)
if err != nil {
return errors.Wrapf(err, "failed to label namespace with PSA policy, stderr=%s", stderr)
}
if workerReplicas != DefaultKibishiiWorkerCounts {
err = ScaleStatefulSet(ctx, namespace, "kibishii-deployment", workerReplicas)
if err != nil {
return errors.Wrapf(err, "failed to scale statefulset, stderr=%s", err.Error())
}
}
kibishiiSetWaitCmd := exec.CommandContext(ctx, "kubectl", "rollout", "status", "statefulset.apps/kibishii-deployment",
"-n", namespace, "-w", "--timeout=30m")
_, stderr, err = veleroexec.RunCommand(kibishiiSetWaitCmd)
if err != nil {
return errors.Wrapf(err, "failed to rollout, stderr=%s", stderr)
}
fmt.Printf("Waiting for kibishii jump-pad pod to be ready\n")
jumpPadWaitCmd := exec.CommandContext(ctx, "kubectl", "wait", "--for=condition=ready", "-n", namespace, "pod/jump-pad")
_, stderr, err = veleroexec.RunCommand(jumpPadWaitCmd)
if err != nil {
return errors.Wrapf(err, "Failed to wait for ready status of pod %s/%s, stderr=%s", namespace, jumpPadPod, stderr)
}
return err
}
func generateData(ctx context.Context, namespace string, kibishiiData *KibishiiData) error {
timeout := 30 * time.Minute
interval := 1 * time.Second
err := wait.PollImmediate(interval, timeout, func() (bool, error) {
timeout, ctxCancel := context.WithTimeout(context.Background(), time.Minute*20)
defer ctxCancel()
kibishiiGenerateCmd := exec.CommandContext(timeout, "kubectl", "exec", "-n", namespace, "jump-pad", "--",
"/usr/local/bin/generate.sh", strconv.Itoa(kibishiiData.Levels), strconv.Itoa(kibishiiData.DirsPerLevel),
strconv.Itoa(kibishiiData.FilesPerLevel), strconv.Itoa(kibishiiData.FileLength),
strconv.Itoa(kibishiiData.BlockSize), strconv.Itoa(kibishiiData.PassNum), strconv.Itoa(kibishiiData.ExpectedNodes))
fmt.Printf("kibishiiGenerateCmd cmd =%v\n", kibishiiGenerateCmd)
stdout, stderr, err := veleroexec.RunCommand(kibishiiGenerateCmd)
if err != nil || strings.Contains(stderr, "Timeout occurred") || strings.Contains(stderr, "dialing backend") {
fmt.Printf("Kibishi generate stdout Timeout occurred: %s stderr: %s err: %s", stdout, stderr, err)
return false, nil
}
return true, nil
})
if err != nil {
return errors.Wrapf(err, fmt.Sprintf("Failed to wait generate data in namespace %s", namespace))
}
return nil
}
func verifyData(ctx context.Context, namespace string, kibishiiData *KibishiiData) error {
timeout := 10 * time.Minute
interval := 5 * time.Second
err := wait.PollImmediate(interval, timeout, func() (bool, error) {
timeout, ctxCancel := context.WithTimeout(context.Background(), time.Minute*20)
defer ctxCancel()
kibishiiVerifyCmd := exec.CommandContext(timeout, "kubectl", "exec", "-n", namespace, "jump-pad", "--",
"/usr/local/bin/verify.sh", strconv.Itoa(kibishiiData.Levels), strconv.Itoa(kibishiiData.DirsPerLevel),
strconv.Itoa(kibishiiData.FilesPerLevel), strconv.Itoa(kibishiiData.FileLength),
strconv.Itoa(kibishiiData.BlockSize), strconv.Itoa(kibishiiData.PassNum),
strconv.Itoa(kibishiiData.ExpectedNodes))
fmt.Printf("kibishiiVerifyCmd cmd =%v\n", kibishiiVerifyCmd)
stdout, stderr, err := veleroexec.RunCommand(kibishiiVerifyCmd)
if strings.Contains(stderr, "Timeout occurred") {
return false, nil
}
if err != nil {
fmt.Printf("Kibishi verify stdout Timeout occurred: %s stderr: %s err: %s", stdout, stderr, err)
return false, nil
}
return true, nil
})
if err != nil {
return errors.Wrapf(err, fmt.Sprintf("Failed to wait generate data in namespace %s", namespace))
}
return nil
}
func waitForKibishiiPods(ctx context.Context, client TestClient, kibishiiNamespace string) error {
return WaitForPods(ctx, client, kibishiiNamespace, []string{"jump-pad", "etcd0", "etcd1", "etcd2", "kibishii-deployment-0", "kibishii-deployment-1"})
}
func KibishiiPrepareBeforeBackup(oneHourTimeout context.Context, client TestClient,
providerName, kibishiiNamespace, registryCredentialFile, veleroFeatures,
kibishiiDirectory string, useVolumeSnapshots bool, kibishiiData *KibishiiData) error {
fmt.Printf("installKibishii %s\n", time.Now().Format("2006-01-02 15:04:05"))
serviceAccountName := "default"
// wait until the service account is created before patch the image pull secret
if err := WaitUntilServiceAccountCreated(oneHourTimeout, client, kibishiiNamespace, serviceAccountName, 10*time.Minute); err != nil {
return errors.Wrapf(err, "failed to wait the service account %q created under the namespace %q", serviceAccountName, kibishiiNamespace)
}
// add the image pull secret to avoid the image pull limit issue of Docker Hub
if err := PatchServiceAccountWithImagePullSecret(oneHourTimeout, client, kibishiiNamespace, serviceAccountName, registryCredentialFile); err != nil {
return errors.Wrapf(err, "failed to patch the service account %q under the namespace %q", serviceAccountName, kibishiiNamespace)
}
if err := installKibishii(oneHourTimeout, kibishiiNamespace, providerName, veleroFeatures,
kibishiiDirectory, useVolumeSnapshots, kibishiiData.ExpectedNodes); err != nil {
return errors.Wrap(err, "Failed to install Kibishii workload")
}
// wait for kibishii pod startup
// TODO - Fix kibishii so we can check that it is ready to go
fmt.Printf("Waiting for kibishii pods to be ready %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := waitForKibishiiPods(oneHourTimeout, client, kibishiiNamespace); err != nil {
return errors.Wrapf(err, "Failed to wait for ready status of kibishii pods in %s", kibishiiNamespace)
}
if kibishiiData == nil {
kibishiiData = DefaultKibishiiData
}
fmt.Printf("generateData %s\n", time.Now().Format("2006-01-02 15:04:05"))
if err := generateData(oneHourTimeout, kibishiiNamespace, kibishiiData); err != nil {
return errors.Wrap(err, "Failed to generate data")
}
fmt.Printf("generateData done %s\n", time.Now().Format("2006-01-02 15:04:05"))
return nil
}
func KibishiiVerifyAfterRestore(client TestClient, kibishiiNamespace string, oneHourTimeout context.Context,
kibishiiData *KibishiiData) error {
if kibishiiData == nil {
kibishiiData = DefaultKibishiiData
}
// wait for kibishii pod startup
// TODO - Fix kibishii so we can check that it is ready to go
fmt.Printf("Waiting for kibishii pods to be ready\n")
if err := waitForKibishiiPods(oneHourTimeout, client, kibishiiNamespace); err != nil {
return errors.Wrapf(err, "Failed to wait for ready status of kibishii pods in %s", kibishiiNamespace)
}
// TODO - check that namespace exists
fmt.Printf("running kibishii verify\n")
if err := verifyData(oneHourTimeout, kibishiiNamespace, kibishiiData); err != nil {
return errors.Wrap(err, "Failed to verify data generated by kibishii")
}
return nil
}