Merge pull request #8555 from Lyndon-Li/data-mover-backup-for-windows-nodes

Data mover backup for Windows nodes
pull/8572/head
Wenkai Yin(尹文开) 2025-01-02 11:15:54 +08:00 committed by GitHub
commit 03d0bd9d22
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 339 additions and 33 deletions

View File

@ -0,0 +1 @@
Fix issue #8418, support data mover backup for Windows nodes

View File

@ -168,7 +168,24 @@ func newdataMoverBackup(logger logrus.FieldLogger, factory client.Factory, confi
return nil, errors.Wrap(err, "error to create client")
}
cache, err := ctlcache.New(clientConfig, cacheOption)
var cache ctlcache.Cache
retry := 10
for {
cache, err = ctlcache.New(clientConfig, cacheOption)
if err == nil {
break
}
retry--
if retry == 0 {
break
}
logger.WithError(err).Warn("Failed to create client cache, need retry")
time.Sleep(time.Second)
}
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create client cache")

View File

@ -182,7 +182,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
hostingPodLabels := map[string]string{velerov1api.DataDownloadLabel: dd.Name}
for _, k := range util.ThirdPartyLabels {
if v, err := nodeagent.GetLabelValue(ctx, r.kubeClient, dd.Namespace, k); err != nil {
if v, err := nodeagent.GetLabelValue(ctx, r.kubeClient, dd.Namespace, k, kube.NodeOSLinux); err != nil {
if err != nodeagent.ErrNodeAgentLabelNotFound {
log.WithError(err).Warnf("Failed to check node-agent label, skip adding host pod label %s", k)
}

View File

@ -803,6 +803,11 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload
return nil, errors.Wrapf(err, "failed to get PVC %s/%s", du.Spec.SourceNamespace, du.Spec.SourcePVC)
}
nodeOS, err := kube.GetPVCAttachingNodeOS(pvc, r.kubeClient.CoreV1(), r.kubeClient.StorageV1(), r.logger)
if err != nil {
return nil, errors.Wrapf(err, "failed to get attaching node OS for PVC %s/%s", du.Spec.SourceNamespace, du.Spec.SourcePVC)
}
accessMode := exposer.AccessModeFileSystem
if pvc.Spec.VolumeMode != nil && *pvc.Spec.VolumeMode == corev1.PersistentVolumeBlock {
accessMode = exposer.AccessModeBlock
@ -810,7 +815,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload
hostingPodLabels := map[string]string{velerov1api.DataUploadLabel: du.Name}
for _, k := range util.ThirdPartyLabels {
if v, err := nodeagent.GetLabelValue(context.Background(), r.kubeClient, du.Namespace, k); err != nil {
if v, err := nodeagent.GetLabelValue(context.Background(), r.kubeClient, du.Namespace, k, nodeOS); err != nil {
if err != nodeagent.ErrNodeAgentLabelNotFound {
r.logger.WithError(err).Warnf("Failed to check node-agent label, skip adding host pod label %s", k)
}
@ -831,6 +836,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload
Affinity: r.loadAffinity,
BackupPVCConfig: r.backupPVCConfig,
Resources: r.podResources,
NodeOS: nodeOS,
}, nil
}
return nil, nil

View File

@ -73,6 +73,9 @@ type CSISnapshotExposeParam struct {
// Resources defines the resource requirements of the hosting pod
Resources corev1.ResourceRequirements
// NodeOS specifies the OS of node that the source volume is attaching
NodeOS string
}
// CSISnapshotExposeWaitParam define the input param for WaitExposed of CSI snapshots
@ -212,6 +215,7 @@ func (e *csiSnapshotExposer) Expose(ctx context.Context, ownerObject corev1.Obje
csiExposeParam.Resources,
backupPVCReadOnly,
spcNoRelabeling,
csiExposeParam.NodeOS,
)
if err != nil {
return errors.Wrap(err, "error to create backup pod")
@ -517,13 +521,14 @@ func (e *csiSnapshotExposer) createBackupPod(
resources corev1.ResourceRequirements,
backupPVCReadOnly bool,
spcNoRelabeling bool,
nodeOS string,
) (*corev1.Pod, error) {
podName := ownerObject.Name
containerName := string(ownerObject.UID)
volumeName := string(ownerObject.UID)
podInfo, err := getInheritedPodInfo(ctx, e.kubeClient, ownerObject.Namespace)
podInfo, err := getInheritedPodInfo(ctx, e.kubeClient, ownerObject.Namespace, nodeOS)
if err != nil {
return nil, errors.Wrap(err, "error to get inherited pod info from node-agent")
}
@ -567,13 +572,40 @@ func (e *csiSnapshotExposer) createBackupPod(
args = append(args, podInfo.logFormatArgs...)
args = append(args, podInfo.logLevelArgs...)
userID := int64(0)
affinityList := make([]*kube.LoadAffinity, 0)
if affinity != nil {
affinityList = append(affinityList, affinity)
}
var securityCtx *corev1.PodSecurityContext
nodeSelector := map[string]string{}
podOS := corev1.PodOS{}
if nodeOS == kube.NodeOSWindows {
userID := "ContainerAdministrator"
securityCtx = &corev1.PodSecurityContext{
WindowsOptions: &corev1.WindowsSecurityContextOptions{
RunAsUserName: &userID,
},
}
nodeSelector[kube.NodeOSLabel] = kube.NodeOSWindows
podOS.Name = kube.NodeOSWindows
} else {
userID := int64(0)
securityCtx = &corev1.PodSecurityContext{
RunAsUser: &userID,
}
if spcNoRelabeling {
securityCtx.SELinuxOptions = &corev1.SELinuxOptions{
Type: "spc_t",
}
}
nodeSelector[kube.NodeOSLabel] = kube.NodeOSLinux
podOS.Name = kube.NodeOSLinux
}
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
@ -602,7 +634,9 @@ func (e *csiSnapshotExposer) createBackupPod(
},
},
},
Affinity: kube.ToSystemAffinity(affinityList),
NodeSelector: nodeSelector,
OS: &podOS,
Affinity: kube.ToSystemAffinity(affinityList),
Containers: []corev1.Container{
{
Name: containerName,
@ -625,17 +659,9 @@ func (e *csiSnapshotExposer) createBackupPod(
TerminationGracePeriodSeconds: &gracePeriod,
Volumes: volumes,
RestartPolicy: corev1.RestartPolicyNever,
SecurityContext: &corev1.PodSecurityContext{
RunAsUser: &userID,
},
SecurityContext: securityCtx,
},
}
if spcNoRelabeling {
pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{
Type: "spc_t",
}
}
return e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Create(ctx, pod, metav1.CreateOptions{})
}

View File

@ -354,7 +354,7 @@ func (e *genericRestoreExposer) createRestorePod(ctx context.Context, ownerObjec
containerName := string(ownerObject.UID)
volumeName := string(ownerObject.UID)
podInfo, err := getInheritedPodInfo(ctx, e.kubeClient, ownerObject.Namespace)
podInfo, err := getInheritedPodInfo(ctx, e.kubeClient, ownerObject.Namespace, kube.NodeOSLinux)
if err != nil {
return nil, errors.Wrap(err, "error to get inherited pod info from node-agent")
}

View File

@ -38,10 +38,10 @@ type inheritedPodInfo struct {
logFormatArgs []string
}
func getInheritedPodInfo(ctx context.Context, client kubernetes.Interface, veleroNamespace string) (inheritedPodInfo, error) {
func getInheritedPodInfo(ctx context.Context, client kubernetes.Interface, veleroNamespace string, osType string) (inheritedPodInfo, error) {
podInfo := inheritedPodInfo{}
podSpec, err := nodeagent.GetPodSpec(ctx, client, veleroNamespace)
podSpec, err := nodeagent.GetPodSpec(ctx, client, veleroNamespace, osType)
if err != nil {
return podInfo, errors.Wrap(err, "error to get node-agent pod template")
}

View File

@ -26,6 +26,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"github.com/vmware-tanzu/velero/pkg/util/kube"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/fake"
@ -322,7 +324,7 @@ func TestGetInheritedPodInfo(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
info, err := getInheritedPodInfo(context.Background(), fakeKubeClient, test.namespace)
info, err := getInheritedPodInfo(context.Background(), fakeKubeClient, test.namespace, kube.NodeOSLinux)
if test.expectErr == "" {
assert.NoError(t, err)

View File

@ -157,10 +157,15 @@ func isRunningInNode(ctx context.Context, namespace string, nodeName string, crC
return errors.Errorf("daemonset pod not found in running state in node %s", nodeName)
}
func GetPodSpec(ctx context.Context, kubeClient kubernetes.Interface, namespace string) (*v1.PodSpec, error) {
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, daemonSet, metav1.GetOptions{})
func GetPodSpec(ctx context.Context, kubeClient kubernetes.Interface, namespace string, osType string) (*v1.PodSpec, error) {
dsName := daemonSet
if osType == kube.NodeOSWindows {
dsName = daemonsetWindows
}
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
if err != nil {
return nil, errors.Wrap(err, "error to get node-agent daemonset")
return nil, errors.Wrapf(err, "error to get %s daemonset", dsName)
}
return &ds.Spec.Template.Spec, nil
@ -190,10 +195,15 @@ func GetConfigs(ctx context.Context, namespace string, kubeClient kubernetes.Int
return configs, nil
}
func GetLabelValue(ctx context.Context, kubeClient kubernetes.Interface, namespace string, key string) (string, error) {
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, daemonSet, metav1.GetOptions{})
func GetLabelValue(ctx context.Context, kubeClient kubernetes.Interface, namespace string, key string, osType string) (string, error) {
dsName := daemonSet
if osType == kube.NodeOSWindows {
dsName = daemonsetWindows
}
ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, dsName, metav1.GetOptions{})
if err != nil {
return "", errors.Wrap(err, "error getting node-agent daemonset")
return "", errors.Wrapf(err, "error getting %s daemonset", dsName)
}
if ds.Spec.Template.Labels == nil {

View File

@ -31,6 +31,7 @@ import (
clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake"
"github.com/vmware-tanzu/velero/pkg/builder"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
type reactor struct {
@ -229,7 +230,7 @@ func TestGetPodSpec(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
spec, err := GetPodSpec(context.TODO(), fakeKubeClient, test.namespace)
spec, err := GetPodSpec(context.TODO(), fakeKubeClient, test.namespace, kube.NodeOSLinux)
if test.expectErr == "" {
assert.NoError(t, err)
assert.Equal(t, *spec, test.expectSpec)
@ -450,7 +451,7 @@ func TestGetLabelValue(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
value, err := GetLabelValue(context.TODO(), fakeKubeClient, test.namespace, "fake-label")
value, err := GetLabelValue(context.TODO(), fakeKubeClient, test.namespace, "fake-label", kube.NodeOSLinux)
if test.expectErr == "" {
assert.NoError(t, err)
assert.Equal(t, test.expectedValue, value)

View File

@ -21,23 +21,31 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
corev1api "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
const (
NodeOSLinux = "linux"
NodeOSWindows = "windows"
NodeOSLabel = "kubernetes.io/os"
)
func IsLinuxNode(ctx context.Context, nodeName string, client client.Client) error {
node := &corev1api.Node{}
if err := client.Get(ctx, types.NamespacedName{Name: nodeName}, node); err != nil {
return errors.Wrapf(err, "error getting node %s", nodeName)
}
os, found := node.Labels["kubernetes.io/os"]
os, found := node.Labels[NodeOSLabel]
if !found {
return errors.Errorf("no os type label for node %s", nodeName)
}
if os != "linux" {
if os != NodeOSLinux {
return errors.Errorf("os type %s for node %s is not linux", os, nodeName)
}
@ -45,11 +53,11 @@ func IsLinuxNode(ctx context.Context, nodeName string, client client.Client) err
}
func WithLinuxNode(ctx context.Context, client client.Client, log logrus.FieldLogger) bool {
return withOSNode(ctx, client, "linux", log)
return withOSNode(ctx, client, NodeOSLinux, log)
}
func WithWindowsNode(ctx context.Context, client client.Client, log logrus.FieldLogger) bool {
return withOSNode(ctx, client, "windows", log)
return withOSNode(ctx, client, NodeOSWindows, log)
}
func withOSNode(ctx context.Context, client client.Client, osType string, log logrus.FieldLogger) bool {
@ -61,7 +69,7 @@ func withOSNode(ctx context.Context, client client.Client, osType string, log lo
allNodeLabeled := true
for _, node := range nodeList.Items {
os, found := node.Labels["kubernetes.io/os"]
os, found := node.Labels[NodeOSLabel]
if os == osType {
return true
@ -78,3 +86,16 @@ func withOSNode(ctx context.Context, client client.Client, osType string, log lo
return false
}
func GetNodeOS(ctx context.Context, nodeName string, nodeClient corev1client.CoreV1Interface) (string, error) {
node, err := nodeClient.Nodes().Get(context.Background(), nodeName, metav1.GetOptions{})
if err != nil {
return "", errors.Wrapf(err, "error getting node %s", nodeName)
}
if node.Labels == nil {
return "", nil
}
return node.Labels[NodeOSLabel], nil
}

View File

@ -26,6 +26,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/builder"
kubeClientFake "k8s.io/client-go/kubernetes/fake"
clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
@ -130,3 +131,53 @@ func TestWithLinuxNode(t *testing.T) {
})
}
}
func TestGetNodeOSType(t *testing.T) {
nodeNoOSLabel := builder.ForNode("fake-node").Result()
nodeWindows := builder.ForNode("fake-node").Labels(map[string]string{"kubernetes.io/os": "windows"}).Result()
nodeLinux := builder.ForNode("fake-node").Labels(map[string]string{"kubernetes.io/os": "linux"}).Result()
scheme := runtime.NewScheme()
corev1.AddToScheme(scheme)
tests := []struct {
name string
kubeClientObj []runtime.Object
err string
expectedOSType string
}{
{
name: "error getting node",
err: "error getting node fake-node: nodes \"fake-node\" not found",
},
{
name: "no os label",
kubeClientObj: []runtime.Object{
nodeNoOSLabel,
},
},
{
name: "windows node",
kubeClientObj: []runtime.Object{
nodeWindows,
},
expectedOSType: "windows",
},
{
name: "linux node",
kubeClientObj: []runtime.Object{
nodeLinux,
},
expectedOSType: "linux",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := kubeClientFake.NewSimpleClientset(test.kubeClientObj...)
osType, err := GetNodeOS(context.TODO(), "fake-node", fakeKubeClient.CoreV1())
if err != nil {
assert.EqualError(t, err, test.err)
} else {
assert.Equal(t, test.expectedOSType, osType)
}
})
}
}

View File

@ -20,6 +20,7 @@ import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
jsonpatch "github.com/evanphx/json-patch/v5"
@ -427,3 +428,42 @@ func DiagnosePV(pv *corev1api.PersistentVolume) string {
diag := fmt.Sprintf("PV %s, phase %s, reason %s, message %s\n", pv.Name, pv.Status.Phase, pv.Status.Reason, pv.Status.Message)
return diag
}
func GetPVCAttachingNodeOS(pvc *corev1api.PersistentVolumeClaim, nodeClient corev1client.CoreV1Interface,
storageClient storagev1.StorageV1Interface, log logrus.FieldLogger) (string, error) {
var nodeOS string
var scFsType string
if value := pvc.Annotations[KubeAnnSelectedNode]; value != "" {
os, err := GetNodeOS(context.Background(), value, nodeClient)
if err != nil {
return "", errors.Wrapf(err, "error to get os from node %s for PVC %s/%s", value, pvc.Namespace, pvc.Name)
}
nodeOS = os
}
if pvc.Spec.StorageClassName != nil {
sc, err := storageClient.StorageClasses().Get(context.Background(), *pvc.Spec.StorageClassName, metav1.GetOptions{})
if err != nil {
return "", errors.Wrapf(err, "error to get storage class %s", *pvc.Spec.StorageClassName)
}
if sc.Parameters != nil {
scFsType = strings.ToLower(sc.Parameters["csi.storage.k8s.io/fstype"])
}
}
if nodeOS != "" {
log.Infof("Deduced node os %s from selected node for PVC %s/%s (fsType %s)", nodeOS, pvc.Namespace, pvc.Name, scFsType)
return nodeOS, nil
}
if scFsType == "ntfs" {
log.Infof("Deduced Windows node os from fsType for PVC %s/%s", pvc.Namespace, pvc.Name)
return NodeOSWindows, nil
}
log.Warnf("Cannot deduce node os for PVC %s/%s, default to linux", pvc.Namespace, pvc.Name)
return NodeOSLinux, nil
}

View File

@ -33,6 +33,7 @@ import (
clientTesting "k8s.io/client-go/testing"
"github.com/vmware-tanzu/velero/pkg/builder"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
)
@ -1550,3 +1551,133 @@ func TestDiagnosePV(t *testing.T) {
})
}
}
func TestGetPVCAttachingNodeOS(t *testing.T) {
storageClass := "fake-storage-class"
nodeNoOSLabel := builder.ForNode("fake-node").Result()
nodeWindows := builder.ForNode("fake-node").Labels(map[string]string{"kubernetes.io/os": "windows"}).Result()
pvcObj := &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-namespace",
Name: "fake-pvc",
},
}
pvcObjWithNode := &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-namespace",
Name: "fake-pvc",
Annotations: map[string]string{KubeAnnSelectedNode: "fake-node"},
},
}
pvcObjWithStorageClass := &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-namespace",
Name: "fake-pvc",
},
Spec: corev1api.PersistentVolumeClaimSpec{
StorageClassName: &storageClass,
},
}
pvcObjWithBoth := &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-namespace",
Name: "fake-pvc",
Annotations: map[string]string{KubeAnnSelectedNode: "fake-node"},
},
Spec: corev1api.PersistentVolumeClaimSpec{
StorageClassName: &storageClass,
},
}
scObjWithoutFSType := &storagev1api.StorageClass{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-storage-class",
},
}
scObjWithFSType := &storagev1api.StorageClass{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-storage-class",
},
Parameters: map[string]string{"csi.storage.k8s.io/fstype": "ntfs"},
}
tests := []struct {
name string
pvc *corev1api.PersistentVolumeClaim
kubeClientObj []runtime.Object
expectedNodeOS string
err string
}{
{
name: "no selected node and storage class",
pvc: pvcObj,
expectedNodeOS: NodeOSLinux,
},
{
name: "node doesn't exist",
pvc: pvcObjWithNode,
err: "error to get os from node fake-node for PVC fake-namespace/fake-pvc: error getting node fake-node: nodes \"fake-node\" not found",
},
{
name: "node without os label",
pvc: pvcObjWithNode,
kubeClientObj: []runtime.Object{
nodeNoOSLabel,
},
expectedNodeOS: NodeOSLinux,
},
{
name: "sc doesn't exist",
pvc: pvcObjWithStorageClass,
err: "error to get storage class fake-storage-class: storageclasses.storage.k8s.io \"fake-storage-class\" not found",
},
{
name: "sc without fsType",
pvc: pvcObjWithStorageClass,
kubeClientObj: []runtime.Object{
scObjWithoutFSType,
},
expectedNodeOS: NodeOSLinux,
},
{
name: "deduce from node os",
pvc: pvcObjWithBoth,
kubeClientObj: []runtime.Object{
nodeWindows,
scObjWithFSType,
},
expectedNodeOS: NodeOSWindows,
},
{
name: "deduce from sc",
pvc: pvcObjWithBoth,
kubeClientObj: []runtime.Object{
nodeNoOSLabel,
scObjWithFSType,
},
expectedNodeOS: NodeOSWindows,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
var kubeClient kubernetes.Interface = fakeKubeClient
nodeOS, err := GetPVCAttachingNodeOS(test.pvc, kubeClient.CoreV1(), kubeClient.StorageV1(), velerotest.NewLogger())
if err != nil {
assert.EqualError(t, err, test.err)
} else {
assert.NoError(t, err)
}
assert.Equal(t, test.expectedNodeOS, nodeOS)
})
}
}