parent
55873c1c37
commit
88a1317f48
|
@ -680,7 +680,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
|
||||||
s.config.restoreResourcePriorities,
|
s.config.restoreResourcePriorities,
|
||||||
s.kubeClient.CoreV1().Namespaces(),
|
s.kubeClient.CoreV1().Namespaces(),
|
||||||
podvolume.NewRestorerFactory(s.repoLocker, s.repoEnsurer, s.veleroClient, s.kubeClient.CoreV1(),
|
podvolume.NewRestorerFactory(s.repoLocker, s.repoEnsurer, s.veleroClient, s.kubeClient.CoreV1(),
|
||||||
s.sharedInformerFactory.Velero().V1().BackupRepositories().Informer().HasSynced, s.logger),
|
s.kubeClient.CoreV1(), s.kubeClient, s.sharedInformerFactory.Velero().V1().BackupRepositories().Informer().HasSynced, s.logger),
|
||||||
s.config.podVolumeOperationTimeout,
|
s.config.podVolumeOperationTimeout,
|
||||||
s.config.resourceTerminatingTimeout,
|
s.config.resourceTerminatingTimeout,
|
||||||
s.logger,
|
s.logger,
|
||||||
|
|
|
@ -19,19 +19,24 @@ package podvolume
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
corev1api "k8s.io/api/core/v1"
|
corev1api "k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
"k8s.io/client-go/kubernetes"
|
||||||
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
|
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
|
|
||||||
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
|
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
|
||||||
clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned"
|
clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned"
|
||||||
"github.com/vmware-tanzu/velero/pkg/label"
|
"github.com/vmware-tanzu/velero/pkg/label"
|
||||||
|
"github.com/vmware-tanzu/velero/pkg/nodeagent"
|
||||||
"github.com/vmware-tanzu/velero/pkg/repository"
|
"github.com/vmware-tanzu/velero/pkg/repository"
|
||||||
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
|
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
|
||||||
|
"github.com/vmware-tanzu/velero/pkg/util/kube"
|
||||||
)
|
)
|
||||||
|
|
||||||
type RestoreData struct {
|
type RestoreData struct {
|
||||||
|
@ -53,10 +58,13 @@ type restorer struct {
|
||||||
repoEnsurer *repository.RepositoryEnsurer
|
repoEnsurer *repository.RepositoryEnsurer
|
||||||
veleroClient clientset.Interface
|
veleroClient clientset.Interface
|
||||||
pvcClient corev1client.PersistentVolumeClaimsGetter
|
pvcClient corev1client.PersistentVolumeClaimsGetter
|
||||||
|
podClient corev1client.PodsGetter
|
||||||
|
kubeClient kubernetes.Interface
|
||||||
|
|
||||||
resultsLock sync.Mutex
|
resultsLock sync.Mutex
|
||||||
results map[string]chan *velerov1api.PodVolumeRestore
|
results map[string]chan *velerov1api.PodVolumeRestore
|
||||||
log logrus.FieldLogger
|
nodeAgentCheck chan struct{}
|
||||||
|
log logrus.FieldLogger
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRestorer(
|
func newRestorer(
|
||||||
|
@ -66,6 +74,8 @@ func newRestorer(
|
||||||
podVolumeRestoreInformer cache.SharedIndexInformer,
|
podVolumeRestoreInformer cache.SharedIndexInformer,
|
||||||
veleroClient clientset.Interface,
|
veleroClient clientset.Interface,
|
||||||
pvcClient corev1client.PersistentVolumeClaimsGetter,
|
pvcClient corev1client.PersistentVolumeClaimsGetter,
|
||||||
|
podClient corev1client.PodsGetter,
|
||||||
|
kubeClient kubernetes.Interface,
|
||||||
log logrus.FieldLogger,
|
log logrus.FieldLogger,
|
||||||
) *restorer {
|
) *restorer {
|
||||||
r := &restorer{
|
r := &restorer{
|
||||||
|
@ -74,6 +84,8 @@ func newRestorer(
|
||||||
repoEnsurer: repoEnsurer,
|
repoEnsurer: repoEnsurer,
|
||||||
veleroClient: veleroClient,
|
veleroClient: veleroClient,
|
||||||
pvcClient: pvcClient,
|
pvcClient: pvcClient,
|
||||||
|
podClient: podClient,
|
||||||
|
kubeClient: kubeClient,
|
||||||
|
|
||||||
results: make(map[string]chan *velerov1api.PodVolumeRestore),
|
results: make(map[string]chan *velerov1api.PodVolumeRestore),
|
||||||
log: log,
|
log: log,
|
||||||
|
@ -108,6 +120,10 @@ func (r *restorer) RestorePodVolumes(data RestoreData) []error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := nodeagent.IsRunning(r.ctx, r.kubeClient, data.Restore.Namespace); err != nil {
|
||||||
|
return []error{errors.Wrapf(err, "error to check node agent status")}
|
||||||
|
}
|
||||||
|
|
||||||
repositoryType, err := getVolumesRepositoryType(volumesToRestore)
|
repositoryType, err := getVolumesRepositoryType(volumesToRestore)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return []error{err}
|
return []error{err}
|
||||||
|
@ -129,6 +145,8 @@ func (r *restorer) RestorePodVolumes(data RestoreData) []error {
|
||||||
r.results[resultsKey(data.Pod.Namespace, data.Pod.Name)] = resultsChan
|
r.results[resultsKey(data.Pod.Namespace, data.Pod.Name)] = resultsChan
|
||||||
r.resultsLock.Unlock()
|
r.resultsLock.Unlock()
|
||||||
|
|
||||||
|
r.nodeAgentCheck = make(chan struct{})
|
||||||
|
|
||||||
var (
|
var (
|
||||||
errs []error
|
errs []error
|
||||||
numRestores int
|
numRestores int
|
||||||
|
@ -161,6 +179,39 @@ func (r *restorer) RestorePodVolumes(data RestoreData) []error {
|
||||||
numRestores++
|
numRestores++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
nodeName := ""
|
||||||
|
|
||||||
|
checkFunc := func(ctx context.Context) (bool, error) {
|
||||||
|
newObj, err := r.kubeClient.CoreV1().Pods(data.Pod.Namespace).Get(context.TODO(), data.Pod.Name, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeName = newObj.Spec.NodeName
|
||||||
|
|
||||||
|
err = kube.IsPodScheduled(newObj)
|
||||||
|
if err != nil {
|
||||||
|
return false, nil
|
||||||
|
} else {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := wait.PollWithContext(r.ctx, time.Millisecond*500, time.Minute*10, checkFunc)
|
||||||
|
if err == wait.ErrWaitTimeout {
|
||||||
|
r.log.WithError(err).Error("Restoring pod is not scheduled until timeout, disengage")
|
||||||
|
} else if err != nil {
|
||||||
|
r.log.WithError(err).Error("Failed to check node-agent pod status, disengage")
|
||||||
|
} else {
|
||||||
|
err = nodeagent.IsRunningInNode(r.ctx, data.Restore.Namespace, nodeName, r.podClient)
|
||||||
|
if err != nil {
|
||||||
|
r.log.WithField("node", nodeName).WithError(err).Error("node-agent pod is not running on node, abort the restore")
|
||||||
|
r.nodeAgentCheck <- struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
ForEachVolume:
|
ForEachVolume:
|
||||||
for i := 0; i < numRestores; i++ {
|
for i := 0; i < numRestores; i++ {
|
||||||
select {
|
select {
|
||||||
|
@ -171,6 +222,9 @@ ForEachVolume:
|
||||||
if res.Status.Phase == velerov1api.PodVolumeRestorePhaseFailed {
|
if res.Status.Phase == velerov1api.PodVolumeRestorePhaseFailed {
|
||||||
errs = append(errs, errors.Errorf("pod volume restore failed: %s", res.Status.Message))
|
errs = append(errs, errors.Errorf("pod volume restore failed: %s", res.Status.Message))
|
||||||
}
|
}
|
||||||
|
case <-r.nodeAgentCheck:
|
||||||
|
errs = append(errs, errors.New("node agent pod is not running in node"))
|
||||||
|
break ForEachVolume
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ import (
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/client-go/kubernetes"
|
||||||
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
|
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
|
|
||||||
|
@ -42,6 +43,8 @@ func NewRestorerFactory(repoLocker *repository.RepoLocker,
|
||||||
repoEnsurer *repository.RepositoryEnsurer,
|
repoEnsurer *repository.RepositoryEnsurer,
|
||||||
veleroClient clientset.Interface,
|
veleroClient clientset.Interface,
|
||||||
pvcClient corev1client.PersistentVolumeClaimsGetter,
|
pvcClient corev1client.PersistentVolumeClaimsGetter,
|
||||||
|
podClient corev1client.PodsGetter,
|
||||||
|
kubeClient kubernetes.Interface,
|
||||||
repoInformerSynced cache.InformerSynced,
|
repoInformerSynced cache.InformerSynced,
|
||||||
log logrus.FieldLogger) RestorerFactory {
|
log logrus.FieldLogger) RestorerFactory {
|
||||||
return &restorerFactory{
|
return &restorerFactory{
|
||||||
|
@ -49,6 +52,8 @@ func NewRestorerFactory(repoLocker *repository.RepoLocker,
|
||||||
repoEnsurer: repoEnsurer,
|
repoEnsurer: repoEnsurer,
|
||||||
veleroClient: veleroClient,
|
veleroClient: veleroClient,
|
||||||
pvcClient: pvcClient,
|
pvcClient: pvcClient,
|
||||||
|
podClient: podClient,
|
||||||
|
kubeClient: kubeClient,
|
||||||
repoInformerSynced: repoInformerSynced,
|
repoInformerSynced: repoInformerSynced,
|
||||||
log: log,
|
log: log,
|
||||||
}
|
}
|
||||||
|
@ -59,6 +64,8 @@ type restorerFactory struct {
|
||||||
repoEnsurer *repository.RepositoryEnsurer
|
repoEnsurer *repository.RepositoryEnsurer
|
||||||
veleroClient clientset.Interface
|
veleroClient clientset.Interface
|
||||||
pvcClient corev1client.PersistentVolumeClaimsGetter
|
pvcClient corev1client.PersistentVolumeClaimsGetter
|
||||||
|
podClient corev1client.PodsGetter
|
||||||
|
kubeClient kubernetes.Interface
|
||||||
repoInformerSynced cache.InformerSynced
|
repoInformerSynced cache.InformerSynced
|
||||||
log logrus.FieldLogger
|
log logrus.FieldLogger
|
||||||
}
|
}
|
||||||
|
@ -74,7 +81,7 @@ func (rf *restorerFactory) NewRestorer(ctx context.Context, restore *velerov1api
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
r := newRestorer(ctx, rf.repoLocker, rf.repoEnsurer, informer, rf.veleroClient, rf.pvcClient, rf.log)
|
r := newRestorer(ctx, rf.repoLocker, rf.repoEnsurer, informer, rf.veleroClient, rf.pvcClient, rf.podClient, rf.kubeClient, rf.log)
|
||||||
|
|
||||||
go informer.Run(ctx.Done())
|
go informer.Run(ctx.Done())
|
||||||
if !cache.WaitForCacheSync(ctx.Done(), informer.HasSynced, rf.repoInformerSynced) {
|
if !cache.WaitForCacheSync(ctx.Done(), informer.HasSynced, rf.repoInformerSynced) {
|
||||||
|
|
|
@ -37,3 +37,21 @@ func IsPodRunning(pod *corev1api.Pod) error {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsPodRunning does a well-rounded check to make sure the specified pod has been scheduled into a node and in a stable status.
|
||||||
|
// If not, return the error found
|
||||||
|
func IsPodScheduled(pod *corev1api.Pod) error {
|
||||||
|
if pod.Spec.NodeName == "" {
|
||||||
|
return errors.Errorf("pod is not scheduled, name=%s, namespace=%s, phase=%s", pod.Name, pod.Namespace, pod.Status.Phase)
|
||||||
|
}
|
||||||
|
|
||||||
|
if pod.Status.Phase != corev1api.PodRunning && pod.Status.Phase != corev1api.PodPending {
|
||||||
|
return errors.Errorf("pod is not in a stable status, name=%s, namespace=%s, phase=%s", pod.Name, pod.Namespace, pod.Status.Phase)
|
||||||
|
}
|
||||||
|
|
||||||
|
if pod.DeletionTimestamp != nil {
|
||||||
|
return errors.Errorf("pod is being terminated, name=%s, namespace=%s, phase=%s", pod.Name, pod.Namespace, pod.Status.Phase)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue