Merge pull request #8482 from Lyndon-Li/data-mover-exposer-diagnostic

Data mover exposer diagnostic
pull/8512/head
Wenkai Yin(尹文开) 2024-12-13 14:28:37 +08:00 committed by GitHub
commit 0224d99889
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 1192 additions and 2 deletions

View File

@ -0,0 +1 @@
Fix issue #8125, log diagnostic info for data mover exposers when expose timeout

View File

@ -19,6 +19,7 @@ package controller
import (
"context"
"fmt"
"strings"
"time"
"github.com/pkg/errors"
@ -684,6 +685,11 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler
return
}
diags := strings.Split(r.restoreExposer.DiagnoseExpose(ctx, getDataDownloadOwnerObject(dd)), "\n")
for _, diag := range diags {
log.Warnf("[Diagnose DD expose]%s", diag)
}
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd))
log.Info("Dataupload has been cleaned up")

View File

@ -971,6 +971,10 @@ func (dt *ddResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReferenc
return nil
}
func (dt *ddResumeTestHelper) DiagnoseExpose(context.Context, corev1.ObjectReference) string {
return ""
}
func (dt *ddResumeTestHelper) RebindVolume(context.Context, corev1.ObjectReference, string, string, time.Duration) error {
return nil
}

View File

@ -19,6 +19,7 @@ package controller
import (
"context"
"fmt"
"strings"
"time"
snapshotter "github.com/kubernetes-csi/external-snapshotter/client/v7/clientset/versioned/typed/volumesnapshot/v1"
@ -751,6 +752,11 @@ func (r *DataUploadReconciler) onPrepareTimeout(ctx context.Context, du *velerov
volumeSnapshotName = du.Spec.CSISnapshot.VolumeSnapshot
}
diags := strings.Split(ep.DiagnoseExpose(ctx, getOwnerObject(du)), "\n")
for _, diag := range diags {
log.Warnf("[Diagnose DU expose]%s", diag)
}
ep.CleanUp(ctx, getOwnerObject(du), volumeSnapshotName, du.Spec.SourceNamespace)
log.Info("Dataupload has been cleaned up")

View File

@ -300,6 +300,10 @@ func (f *fakeSnapshotExposer) PeekExposed(ctx context.Context, ownerObject corev
return f.peekErr
}
func (f *fakeSnapshotExposer) DiagnoseExpose(context.Context, corev1.ObjectReference) string {
return ""
}
func (f *fakeSnapshotExposer) CleanUp(context.Context, corev1.ObjectReference, string, string) {
}
@ -1043,6 +1047,10 @@ func (dt *duResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReferenc
return nil
}
func (dt *duResumeTestHelper) DiagnoseExpose(context.Context, corev1.ObjectReference) string {
return ""
}
func (dt *duResumeTestHelper) CleanUp(context.Context, corev1.ObjectReference, string, string) {}
func (dt *duResumeTestHelper) newMicroServiceBRWatcher(kbclient.Client, kubernetes.Interface, manager.Manager, string, string, string, string, string, string,

View File

@ -308,6 +308,70 @@ func (e *csiSnapshotExposer) PeekExposed(ctx context.Context, ownerObject corev1
return nil
}
func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject corev1.ObjectReference) string {
backupPodName := ownerObject.Name
backupPVCName := ownerObject.Name
backupVSName := ownerObject.Name
diag := "begin diagnose CSI exposer\n"
pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, backupPodName, metav1.GetOptions{})
if err != nil {
pod = nil
diag += fmt.Sprintf("error getting backup pod %s, err: %v\n", backupPodName, err)
}
pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, backupPVCName, metav1.GetOptions{})
if err != nil {
pvc = nil
diag += fmt.Sprintf("error getting backup pvc %s, err: %v\n", backupPVCName, err)
}
vs, err := e.csiSnapshotClient.VolumeSnapshots(ownerObject.Namespace).Get(ctx, backupVSName, metav1.GetOptions{})
if err != nil {
vs = nil
diag += fmt.Sprintf("error getting backup vs %s, err: %v\n", backupVSName, err)
}
if pod != nil {
diag += kube.DiagnosePod(pod)
if pod.Spec.NodeName != "" {
if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil {
diag += fmt.Sprintf("node-agent is not running in node %s, err: %v\n", pod.Spec.NodeName, err)
}
}
}
if pvc != nil {
diag += kube.DiagnosePVC(pvc)
if pvc.Spec.VolumeName != "" {
if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil {
diag += fmt.Sprintf("error getting backup pv %s, err: %v\n", pvc.Spec.VolumeName, err)
} else {
diag += kube.DiagnosePV(pv)
}
}
}
if vs != nil {
diag += csi.DiagnoseVS(vs)
if vs.Status != nil && vs.Status.BoundVolumeSnapshotContentName != nil && *vs.Status.BoundVolumeSnapshotContentName != "" {
if vsc, err := e.csiSnapshotClient.VolumeSnapshotContents().Get(ctx, *vs.Status.BoundVolumeSnapshotContentName, metav1.GetOptions{}); err != nil {
diag += fmt.Sprintf("error getting backup vsc %s, err: %v\n", *vs.Status.BoundVolumeSnapshotContentName, err)
} else {
diag += csi.DiagnoseVSC(vsc)
}
}
}
diag += "end diagnose CSI exposer"
return diag
}
const cleanUpTimeout = time.Minute
func (e *csiSnapshotExposer) CleanUp(ctx context.Context, ownerObject corev1.ObjectReference, vsName string, sourceNamespace string) {

View File

@ -959,3 +959,394 @@ func Test_csiSnapshotExposer_createBackupPVC(t *testing.T) {
})
}
}
func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) {
backup := &velerov1.Backup{
TypeMeta: metav1.TypeMeta{
APIVersion: velerov1.SchemeGroupVersion.String(),
Kind: "Backup",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
UID: "fake-uid",
},
}
backupPodWithoutNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
backupPodWithNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
backupPVCWithoutVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
backupPVCWithVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Spec: corev1.PersistentVolumeClaimSpec{
VolumeName: "fake-pv",
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
backupPV := corev1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pv",
},
Status: corev1.PersistentVolumeStatus{
Phase: corev1.VolumePending,
Message: "fake-pv-message",
},
}
readyToUse := false
vscMessage := "fake-vsc-message"
backupVSC := snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &vscMessage,
},
},
}
backupVSWithoutStatus := snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
}
backupVSWithoutVSC := snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: &snapshotv1api.VolumeSnapshotStatus{},
}
vsMessage := "fake-vs-message"
backupVSWithVSC := snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-backup",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: backup.APIVersion,
Kind: backup.Kind,
Name: backup.Name,
UID: backup.UID,
},
},
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &backupVSC.Name,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &vsMessage,
},
},
}
nodeAgentPod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "node-agent-pod-1",
Labels: map[string]string{"name": "node-agent"},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
},
}
tests := []struct {
name string
ownerBackup *velerov1.Backup
kubeClientObj []runtime.Object
snapshotClientObj []runtime.Object
expected string
}{
{
name: "no pod, pvc, vs",
ownerBackup: backup,
expected: `begin diagnose CSI exposer
error getting backup pod fake-backup, err: pods "fake-backup" not found
error getting backup pvc fake-backup, err: persistentvolumeclaims "fake-backup" not found
error getting backup vs fake-backup, err: volumesnapshots.snapshot.storage.k8s.io "fake-backup" not found
end diagnose CSI exposer`,
},
{
name: "pod without node name, pvc without volume name, vs without status",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithoutNodeName,
&backupPVCWithoutVolumeName,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutStatus,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pod without node name, pvc without volume name, vs without VSC",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithoutNodeName,
&backupPVCWithoutVolumeName,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pod with node name, no node agent",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithoutVolumeName,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
node-agent is not running in node fake-node, err: daemonset pod not found in running state in node fake-node
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pod with node name, node agent is running",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithoutVolumeName,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pvc with volume name, no pv",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
error getting backup pv fake-pv, err: persistentvolumes "fake-pv" not found
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "pvc with volume name, pv exists",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&backupPV,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithoutVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
VS velero/fake-backup, bind to , readyToUse false, errMessage
end diagnose CSI exposer`,
},
{
name: "vs with vsc, vsc doesn't exist",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&backupPV,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message
error getting backup vsc fake-vsc, err: volumesnapshotcontents.snapshot.storage.k8s.io "fake-vsc" not found
end diagnose CSI exposer`,
},
{
name: "vs with vsc, vsc exists",
ownerBackup: backup,
kubeClientObj: []runtime.Object{
&backupPodWithNodeName,
&backupPVCWithVolumeName,
&backupPV,
&nodeAgentPod,
},
snapshotClientObj: []runtime.Object{
&backupVSWithVSC,
&backupVSC,
},
expected: `begin diagnose CSI exposer
Pod velero/fake-backup, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-backup, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message
VSC fake-vsc, readyToUse false, errMessage fake-vsc-message, handle
end diagnose CSI exposer`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(tt.kubeClientObj...)
fakeSnapshotClient := snapshotFake.NewSimpleClientset(tt.snapshotClientObj...)
e := &csiSnapshotExposer{
kubeClient: fakeKubeClient,
csiSnapshotClient: fakeSnapshotClient.SnapshotV1(),
log: velerotest.NewLogger(),
}
var ownerObject corev1.ObjectReference
if tt.ownerBackup != nil {
ownerObject = corev1.ObjectReference{
Kind: tt.ownerBackup.Kind,
Namespace: tt.ownerBackup.Namespace,
Name: tt.ownerBackup.Name,
UID: tt.ownerBackup.UID,
APIVersion: tt.ownerBackup.APIVersion,
}
}
diag := e.DiagnoseExpose(context.Background(), ownerObject)
assert.Equal(t, tt.expected, diag)
})
}
}

View File

@ -30,6 +30,7 @@ import (
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
@ -49,6 +50,10 @@ type GenericRestoreExposer interface {
// Otherwise, it returns nil immediately.
PeekExposed(context.Context, corev1.ObjectReference) error
// DiagnoseExpose generate the diagnostic info when the expose is not finished for a long time.
// If it finds any problem, it returns an string about the problem.
DiagnoseExpose(context.Context, corev1.ObjectReference) string
// RebindVolume unexposes the restored PV and rebind it to the target PVC
RebindVolume(context.Context, corev1.ObjectReference, string, string, time.Duration) error
@ -195,6 +200,51 @@ func (e *genericRestoreExposer) PeekExposed(ctx context.Context, ownerObject cor
return nil
}
func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject corev1.ObjectReference) string {
restorePodName := ownerObject.Name
restorePVCName := ownerObject.Name
diag := "begin diagnose restore exposer\n"
pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, restorePodName, metav1.GetOptions{})
if err != nil {
pod = nil
diag += fmt.Sprintf("error getting restore pod %s, err: %v\n", restorePodName, err)
}
pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, restorePVCName, metav1.GetOptions{})
if err != nil {
pvc = nil
diag += fmt.Sprintf("error getting restore pvc %s, err: %v\n", restorePVCName, err)
}
if pod != nil {
diag += kube.DiagnosePod(pod)
if pod.Spec.NodeName != "" {
if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil {
diag += fmt.Sprintf("node-agent is not running in node %s, err: %v\n", pod.Spec.NodeName, err)
}
}
}
if pvc != nil {
diag += kube.DiagnosePVC(pvc)
if pvc.Spec.VolumeName != "" {
if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil {
diag += fmt.Sprintf("error getting restore pv %s, err: %v\n", pvc.Spec.VolumeName, err)
} else {
diag += kube.DiagnosePV(pv)
}
}
}
diag += "end diagnose restore exposer"
return diag
}
func (e *genericRestoreExposer) CleanUp(ctx context.Context, ownerObject corev1.ObjectReference) {
restorePodName := ownerObject.Name
restorePVCName := ownerObject.Name

View File

@ -507,3 +507,258 @@ func TestRestorePeekExpose(t *testing.T) {
})
}
}
func Test_ReastoreDiagnoseExpose(t *testing.T) {
restore := &velerov1.Restore{
TypeMeta: metav1.TypeMeta{
APIVersion: velerov1.SchemeGroupVersion.String(),
Kind: "Restore",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
UID: "fake-uid",
},
}
restorePodWithoutNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
restorePodWithNodeName := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodPending,
Conditions: []corev1.PodCondition{
{
Type: corev1.PodInitialized,
Status: corev1.ConditionTrue,
Message: "fake-pod-message",
},
},
},
}
restorePVCWithoutVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
restorePVCWithVolumeName := corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "fake-restore",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: restore.APIVersion,
Kind: restore.Kind,
Name: restore.Name,
UID: restore.UID,
},
},
},
Spec: corev1.PersistentVolumeClaimSpec{
VolumeName: "fake-pv",
},
Status: corev1.PersistentVolumeClaimStatus{
Phase: corev1.ClaimPending,
},
}
restorePV := corev1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pv",
},
Status: corev1.PersistentVolumeStatus{
Phase: corev1.VolumePending,
Message: "fake-pv-message",
},
}
nodeAgentPod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "node-agent-pod-1",
Labels: map[string]string{"name": "node-agent"},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
},
}
tests := []struct {
name string
ownerRestore *velerov1.Restore
kubeClientObj []runtime.Object
expected string
}{
{
name: "no pod, pvc",
ownerRestore: restore,
expected: `begin diagnose restore exposer
error getting restore pod fake-restore, err: pods "fake-restore" not found
error getting restore pvc fake-restore, err: persistentvolumeclaims "fake-restore" not found
end diagnose restore exposer`,
},
{
name: "pod without node name, pvc without volume name, vs without status",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithoutNodeName,
&restorePVCWithoutVolumeName,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pod without node name, pvc without volume name",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithoutNodeName,
&restorePVCWithoutVolumeName,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pod with node name, no node agent",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithoutVolumeName,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
node-agent is not running in node fake-node, err: daemonset pod not found in running state in node fake-node
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pod with node name, node agent is running",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithoutVolumeName,
&nodeAgentPod,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to
end diagnose restore exposer`,
},
{
name: "pvc with volume name, no pv",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithVolumeName,
&nodeAgentPod,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to fake-pv
error getting restore pv fake-pv, err: persistentvolumes "fake-pv" not found
end diagnose restore exposer`,
},
{
name: "pvc with volume name, pv exists",
ownerRestore: restore,
kubeClientObj: []runtime.Object{
&restorePodWithNodeName,
&restorePVCWithVolumeName,
&restorePV,
&nodeAgentPod,
},
expected: `begin diagnose restore exposer
Pod velero/fake-restore, phase Pending, node name fake-node
Pod condition Initialized, status True, reason , message fake-pod-message
PVC velero/fake-restore, phase Pending, binding to fake-pv
PV fake-pv, phase Pending, reason , message fake-pv-message
end diagnose restore exposer`,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...)
e := genericRestoreExposer{
kubeClient: fakeKubeClient,
log: velerotest.NewLogger(),
}
var ownerObject corev1api.ObjectReference
if test.ownerRestore != nil {
ownerObject = corev1api.ObjectReference{
Kind: test.ownerRestore.Kind,
Namespace: test.ownerRestore.Namespace,
Name: test.ownerRestore.Name,
UID: test.ownerRestore.UID,
APIVersion: test.ownerRestore.APIVersion,
}
}
diag := e.DiagnoseExpose(context.Background(), ownerObject)
assert.Equal(t, test.expected, diag)
})
}
}

View File

@ -26,6 +26,24 @@ func (_m *GenericRestoreExposer) CleanUp(_a0 context.Context, _a1 v1.ObjectRefer
_m.Called(_a0, _a1)
}
// DiagnoseExpose provides a mock function with given fields: _a0, _a1
func (_m *GenericRestoreExposer) DiagnoseExpose(_a0 context.Context, _a1 v1.ObjectReference) string {
ret := _m.Called(_a0, _a1)
if len(ret) == 0 {
panic("no return value specified for DiagnoseExpose")
}
var r0 string
if rf, ok := ret.Get(0).(func(context.Context, v1.ObjectReference) string); ok {
r0 = rf(_a0, _a1)
} else {
r0 = ret.Get(0).(string)
}
return r0
}
// Expose provides a mock function with given fields: _a0, _a1, _a2, _a3, _a4, _a5, _a6
func (_m *GenericRestoreExposer) Expose(_a0 context.Context, _a1 v1.ObjectReference, _a2 string, _a3 string, _a4 map[string]string, _a5 v1.ResourceRequirements, _a6 time.Duration) error {
ret := _m.Called(_a0, _a1, _a2, _a3, _a4, _a5, _a6)

View File

@ -37,6 +37,10 @@ type SnapshotExposer interface {
// Otherwise, it returns nil immediately.
PeekExposed(context.Context, corev1.ObjectReference) error
// DiagnoseExpose generate the diagnostic info when the expose is not finished for a long time.
// If it finds any problem, it returns an string about the problem.
DiagnoseExpose(context.Context, corev1.ObjectReference) string
// CleanUp cleans up any objects generated during the snapshot expose
CleanUp(context.Context, corev1.ObjectReference, string, string)
}

View File

@ -100,8 +100,17 @@ func IsRunning(ctx context.Context, kubeClient kubernetes.Interface, namespace s
}
}
// IsRunningInNode checks if the node agent pod is running properly in a specified node. If not, return the error found
// KbClientIsRunningInNode checks if the node agent pod is running properly in a specified node through kube client. If not, return the error found
func KbClientIsRunningInNode(ctx context.Context, namespace string, nodeName string, kubeClient kubernetes.Interface) error {
return isRunningInNode(ctx, namespace, nodeName, nil, kubeClient)
}
// IsRunningInNode checks if the node agent pod is running properly in a specified node through controller client. If not, return the error found
func IsRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client) error {
return isRunningInNode(ctx, namespace, nodeName, crClient, nil)
}
func isRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client, kubeClient kubernetes.Interface) error {
if nodeName == "" {
return errors.New("node name is empty")
}
@ -112,7 +121,12 @@ func IsRunningInNode(ctx context.Context, namespace string, nodeName string, crC
return errors.Wrap(err, "fail to parse selector")
}
err = crClient.List(ctx, pods, &ctrlclient.ListOptions{LabelSelector: parsedSelector})
if crClient != nil {
err = crClient.List(ctx, pods, &ctrlclient.ListOptions{LabelSelector: parsedSelector})
} else {
pods, err = kubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: parsedSelector.String()})
}
if err != nil {
return errors.Wrap(err, "failed to list daemonset pods")
}

View File

@ -786,3 +786,51 @@ func WaitUntilVSCHandleIsReady(
return vsc, nil
}
func DiagnoseVS(vs *snapshotv1api.VolumeSnapshot) string {
vscName := ""
readyToUse := false
errMessage := ""
if vs.Status != nil {
if vs.Status.BoundVolumeSnapshotContentName != nil {
vscName = *vs.Status.BoundVolumeSnapshotContentName
}
if vs.Status.ReadyToUse != nil {
readyToUse = *vs.Status.ReadyToUse
}
if vs.Status.Error != nil && vs.Status.Error.Message != nil {
errMessage = *vs.Status.Error.Message
}
}
diag := fmt.Sprintf("VS %s/%s, bind to %s, readyToUse %v, errMessage %s\n", vs.Namespace, vs.Name, vscName, readyToUse, errMessage)
return diag
}
func DiagnoseVSC(vsc *snapshotv1api.VolumeSnapshotContent) string {
handle := ""
readyToUse := false
errMessage := ""
if vsc.Status != nil {
if vsc.Status.SnapshotHandle != nil {
handle = *vsc.Status.SnapshotHandle
}
if vsc.Status.ReadyToUse != nil {
readyToUse = *vsc.Status.ReadyToUse
}
if vsc.Status.Error != nil && vsc.Status.Error.Message != nil {
errMessage = *vsc.Status.Error.Message
}
}
diag := fmt.Sprintf("VSC %s, readyToUse %v, errMessage %s, handle %s\n", vsc.Name, readyToUse, errMessage, handle)
return diag
}

View File

@ -1732,3 +1732,197 @@ func TestWaitUntilVSCHandleIsReady(t *testing.T) {
})
}
}
func TestDiagnoseVS(t *testing.T) {
vscName := "fake-vsc"
readyToUse := true
message := "fake-message"
testCases := []struct {
name string
vs *snapshotv1api.VolumeSnapshot
expected string
}{
{
name: "VS with no status",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
},
expected: "VS fake-ns/fake-vs, bind to , readyToUse false, errMessage \n",
},
{
name: "VS with empty status",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{},
},
expected: "VS fake-ns/fake-vs, bind to , readyToUse false, errMessage \n",
},
{
name: "VS with VSC name",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse false, errMessage \n",
},
{
name: "VS with VSC name+ready",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
ReadyToUse: &readyToUse,
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \n",
},
{
name: "VS with VSC name+ready+empty error",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
ReadyToUse: &readyToUse,
Error: &snapshotv1api.VolumeSnapshotError{},
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \n",
},
{
name: "VS with VSC name+ready+error",
vs: &snapshotv1api.VolumeSnapshot{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vs",
Namespace: "fake-ns",
},
Status: &snapshotv1api.VolumeSnapshotStatus{
BoundVolumeSnapshotContentName: &vscName,
ReadyToUse: &readyToUse,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &message,
},
},
},
expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage fake-message\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnoseVS(tc.vs)
assert.Equal(t, tc.expected, diag)
})
}
}
func TestDiagnoseVSC(t *testing.T) {
readyToUse := true
message := "fake-message"
handle := "fake-handle"
testCases := []struct {
name string
vsc *snapshotv1api.VolumeSnapshotContent
expected string
}{
{
name: "VS with no status",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
},
expected: "VSC fake-vsc, readyToUse false, errMessage , handle \n",
},
{
name: "VSC with empty status",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{},
},
expected: "VSC fake-vsc, readyToUse false, errMessage , handle \n",
},
{
name: "VSC with ready",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage , handle \n",
},
{
name: "VSC with ready+handle",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
SnapshotHandle: &handle,
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage , handle fake-handle\n",
},
{
name: "VSC with ready+handle+empty error",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
SnapshotHandle: &handle,
Error: &snapshotv1api.VolumeSnapshotError{},
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage , handle fake-handle\n",
},
{
name: "VSC with ready+handle+error",
vsc: &snapshotv1api.VolumeSnapshotContent{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-vsc",
},
Status: &snapshotv1api.VolumeSnapshotContentStatus{
ReadyToUse: &readyToUse,
SnapshotHandle: &handle,
Error: &snapshotv1api.VolumeSnapshotError{
Message: &message,
},
},
},
expected: "VSC fake-vsc, readyToUse true, errMessage fake-message, handle fake-handle\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnoseVSC(tc.vsc)
assert.Equal(t, tc.expected, diag)
})
}
}

View File

@ -263,3 +263,13 @@ func ToSystemAffinity(loadAffinities []*LoadAffinity) *corev1api.Affinity {
return nil
}
func DiagnosePod(pod *corev1api.Pod) string {
diag := fmt.Sprintf("Pod %s/%s, phase %s, node name %s\n", pod.Namespace, pod.Name, pod.Status.Phase, pod.Spec.NodeName)
for _, condition := range pod.Status.Conditions {
diag += fmt.Sprintf("Pod condition %s, status %s, reason %s, message %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}
return diag
}

View File

@ -886,3 +886,49 @@ func TestToSystemAffinity(t *testing.T) {
})
}
}
func TestDiagnosePod(t *testing.T) {
testCases := []struct {
name string
pod *corev1api.Pod
expected string
}{
{
name: "pod with all info",
pod: &corev1api.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pod",
Namespace: "fake-ns",
},
Spec: corev1api.PodSpec{
NodeName: "fake-node",
},
Status: corev1api.PodStatus{
Phase: corev1api.PodPending,
Conditions: []corev1api.PodCondition{
{
Type: corev1api.PodInitialized,
Status: corev1api.ConditionTrue,
Reason: "fake-reason-1",
Message: "fake-message-1",
},
{
Type: corev1api.PodScheduled,
Status: corev1api.ConditionFalse,
Reason: "fake-reason-2",
Message: "fake-message-2",
},
},
},
},
expected: "Pod fake-ns/fake-pod, phase Pending, node name fake-node\nPod condition Initialized, status True, reason fake-reason-1, message fake-message-1\nPod condition PodScheduled, status False, reason fake-reason-2, message fake-message-2\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnosePod(tc.pod)
assert.Equal(t, tc.expected, diag)
})
}
}

View File

@ -418,3 +418,12 @@ func GetPVCForPodVolume(vol *corev1api.Volume, pod *corev1api.Pod, crClient crcl
return pvc, nil
}
func DiagnosePVC(pvc *corev1api.PersistentVolumeClaim) string {
return fmt.Sprintf("PVC %s/%s, phase %s, binding to %s\n", pvc.Namespace, pvc.Name, pvc.Status.Phase, pvc.Spec.VolumeName)
}
func DiagnosePV(pv *corev1api.PersistentVolume) string {
diag := fmt.Sprintf("PV %s, phase %s, reason %s, message %s\n", pv.Name, pv.Status.Phase, pv.Status.Reason, pv.Status.Message)
return diag
}

View File

@ -1488,3 +1488,65 @@ func TestMakePodPVCAttachment(t *testing.T) {
})
}
}
func TestDiagnosePVC(t *testing.T) {
testCases := []struct {
name string
pvc *corev1api.PersistentVolumeClaim
expected string
}{
{
name: "pvc with all info",
pvc: &corev1api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pvc",
Namespace: "fake-ns",
},
Spec: corev1api.PersistentVolumeClaimSpec{
VolumeName: "fake-pv",
},
Status: corev1api.PersistentVolumeClaimStatus{
Phase: corev1api.ClaimPending,
},
},
expected: "PVC fake-ns/fake-pvc, phase Pending, binding to fake-pv\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnosePVC(tc.pvc)
assert.Equal(t, tc.expected, diag)
})
}
}
func TestDiagnosePV(t *testing.T) {
testCases := []struct {
name string
pv *corev1api.PersistentVolume
expected string
}{
{
name: "pv with all info",
pv: &corev1api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-pv",
},
Status: corev1api.PersistentVolumeStatus{
Phase: corev1api.VolumePending,
Message: "fake-message",
Reason: "fake-reason",
},
},
expected: "PV fake-pv, phase Pending, reason fake-reason, message fake-message\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diag := DiagnosePV(tc.pv)
assert.Equal(t, tc.expected, diag)
})
}
}