Merge pull request #7022 from allenxu404/i6721
Fix inconsistent behavior of Backup and Restore hook executionpull/6553/head
commit
6b7ce6655d
|
@ -0,0 +1 @@
|
|||
Fix inconsistent behavior of Backup and Restore hook execution
|
|
@ -50,6 +50,11 @@ type DefaultListWatchFactory struct {
|
|||
PodsGetter cache.Getter
|
||||
}
|
||||
|
||||
type HookErrInfo struct {
|
||||
Namespace string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (d *DefaultListWatchFactory) NewListWatch(namespace string, selector fields.Selector) cache.ListerWatcher {
|
||||
return cache.NewListWatchFromClient(d.PodsGetter, "pods", namespace, selector)
|
||||
}
|
||||
|
@ -158,8 +163,8 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
|
|||
if hook.Hook.WaitTimeout.Duration != 0 && time.Since(waitStart) > hook.Hook.WaitTimeout.Duration {
|
||||
err := fmt.Errorf("hook %s in container %s expired before executing", hook.HookName, hook.Hook.Container)
|
||||
hookLog.Error(err)
|
||||
errors = append(errors, err)
|
||||
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
|
||||
errors = append(errors, err)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
|
@ -172,8 +177,9 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
|
|||
}
|
||||
if err := e.PodCommandExecutor.ExecutePodCommand(hookLog, podMap, pod.Namespace, pod.Name, hook.HookName, eh); err != nil {
|
||||
hookLog.WithError(err).Error("Error executing hook")
|
||||
err = fmt.Errorf("hook %s in container %s failed to execute, err: %v", hook.HookName, hook.Hook.Container, err)
|
||||
errors = append(errors, err)
|
||||
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
|
||||
errors = append(errors, err)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
|
@ -204,10 +210,9 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
|
|||
podWatcher.Run(ctx.Done())
|
||||
|
||||
// There are some cases where this function could return with unexecuted hooks: the pod may
|
||||
// be deleted, a hook with OnError mode Fail could fail, or it may timeout waiting for
|
||||
// be deleted, a hook could fail, or it may timeout waiting for
|
||||
// containers to become ready.
|
||||
// Each unexecuted hook is logged as an error but only hooks with OnError mode Fail return
|
||||
// an error from this function.
|
||||
// Each unexecuted hook is logged as an error and this error will be returned from this function.
|
||||
for _, hooks := range byContainer {
|
||||
for _, hook := range hooks {
|
||||
if hook.executed {
|
||||
|
@ -222,9 +227,7 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
|
|||
},
|
||||
)
|
||||
hookLog.Error(err)
|
||||
if hook.Hook.OnError == velerov1api.HookErrorModeFail {
|
||||
errors = append(errors, err)
|
||||
}
|
||||
errors = append(errors, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -209,10 +209,10 @@ func TestWaitExecHandleHooks(t *testing.T) {
|
|||
Result(),
|
||||
},
|
||||
},
|
||||
expectedErrors: []error{errors.New("pod hook error")},
|
||||
expectedErrors: []error{errors.New("hook <from-annotation> in container container1 failed to execute, err: pod hook error")},
|
||||
},
|
||||
{
|
||||
name: "should return no error when hook from annotation fails with on error mode continue",
|
||||
name: "should return error when hook from annotation fails with on error mode continue",
|
||||
initialPod: builder.ForPod("default", "my-pod").
|
||||
ObjectMeta(builder.WithAnnotations(
|
||||
podRestoreHookCommandAnnotationKey, "/usr/bin/foo",
|
||||
|
@ -278,7 +278,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
|
|||
Result(),
|
||||
},
|
||||
},
|
||||
expectedErrors: nil,
|
||||
expectedErrors: []error{errors.New("hook <from-annotation> in container container1 failed to execute, err: pod hook error")},
|
||||
},
|
||||
{
|
||||
name: "should return no error when hook from annotation executes after 10ms wait for container to start",
|
||||
|
@ -422,7 +422,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
|
|||
},
|
||||
},
|
||||
{
|
||||
name: "should return no error when spec hook with wait timeout expires with OnError mode Continue",
|
||||
name: "should return error when spec hook with wait timeout expires with OnError mode Continue",
|
||||
groupResource: "pods",
|
||||
initialPod: builder.ForPod("default", "my-pod").
|
||||
Containers(&v1.Container{
|
||||
|
@ -435,7 +435,7 @@ func TestWaitExecHandleHooks(t *testing.T) {
|
|||
},
|
||||
}).
|
||||
Result(),
|
||||
expectedErrors: nil,
|
||||
expectedErrors: []error{errors.New("hook my-hook-1 in container container1 in pod default/my-pod not executed: context deadline exceeded")},
|
||||
byContainer: map[string][]PodExecRestoreHook{
|
||||
"container1": {
|
||||
{
|
||||
|
@ -515,8 +515,8 @@ func TestWaitExecHandleHooks(t *testing.T) {
|
|||
sharedHooksContextTimeout: time.Millisecond,
|
||||
},
|
||||
{
|
||||
name: "should return no error when shared hooks context is canceled before spec hook with OnError mode Continue executes",
|
||||
expectedErrors: nil,
|
||||
name: "should return error when shared hooks context is canceled before spec hook with OnError mode Continue executes",
|
||||
expectedErrors: []error{errors.New("hook my-hook-1 in container container1 in pod default/my-pod not executed: context deadline exceeded")},
|
||||
groupResource: "pods",
|
||||
initialPod: builder.ForPod("default", "my-pod").
|
||||
Containers(&v1.Container{
|
||||
|
|
|
@ -261,12 +261,12 @@ type ExecHook struct {
|
|||
type HookErrorMode string
|
||||
|
||||
const (
|
||||
// HookErrorModeContinue means that an error from a hook is acceptable, and the backup can
|
||||
// proceed.
|
||||
// HookErrorModeContinue means that an error from a hook is acceptable and the backup/restore can
|
||||
// proceed with the rest of hooks' execution. This backup/restore should be in `PartiallyFailed` status.
|
||||
HookErrorModeContinue HookErrorMode = "Continue"
|
||||
|
||||
// HookErrorModeFail means that an error from a hook is problematic, and the backup should be in
|
||||
// error.
|
||||
// HookErrorModeFail means that an error from a hook is problematic and Velero should stop executing following hooks.
|
||||
// This backup/restore should be in `PartiallyFailed` status.
|
||||
HookErrorModeFail HookErrorMode = "Fail"
|
||||
)
|
||||
|
||||
|
|
|
@ -311,7 +311,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
|
|||
discoveryHelper: kr.discoveryHelper,
|
||||
resourcePriorities: kr.resourcePriorities,
|
||||
resourceRestoreHooks: resourceRestoreHooks,
|
||||
hooksErrs: make(chan error),
|
||||
hooksErrs: make(chan hook.HookErrInfo),
|
||||
waitExecHookHandler: waitExecHookHandler,
|
||||
hooksContext: hooksCtx,
|
||||
hooksCancelFunc: hooksCancelFunc,
|
||||
|
@ -360,7 +360,7 @@ type restoreContext struct {
|
|||
discoveryHelper discovery.Helper
|
||||
resourcePriorities Priorities
|
||||
hooksWaitGroup sync.WaitGroup
|
||||
hooksErrs chan error
|
||||
hooksErrs chan hook.HookErrInfo
|
||||
resourceRestoreHooks []hook.ResourceRestoreHook
|
||||
waitExecHookHandler hook.WaitExecHookHandler
|
||||
hooksContext go_context.Context
|
||||
|
@ -655,8 +655,8 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
|
|||
ctx.hooksWaitGroup.Wait()
|
||||
close(ctx.hooksErrs)
|
||||
}()
|
||||
for err := range ctx.hooksErrs {
|
||||
errs.Velero = append(errs.Velero, err.Error())
|
||||
for errInfo := range ctx.hooksErrs {
|
||||
errs.Add(errInfo.Namespace, errInfo.Err)
|
||||
}
|
||||
ctx.log.Info("Done waiting for all post-restore exec hooks to complete")
|
||||
|
||||
|
@ -1898,10 +1898,11 @@ func (ctx *restoreContext) waitExec(createdObj *unstructured.Unstructured) {
|
|||
// on the ctx.podVolumeErrs channel.
|
||||
defer ctx.hooksWaitGroup.Done()
|
||||
|
||||
podNs := createdObj.GetNamespace()
|
||||
pod := new(v1.Pod)
|
||||
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(createdObj.UnstructuredContent(), &pod); err != nil {
|
||||
ctx.log.WithError(err).Error("error converting unstructured pod")
|
||||
ctx.hooksErrs <- err
|
||||
ctx.hooksErrs <- hook.HookErrInfo{Namespace: podNs, Err: err}
|
||||
return
|
||||
}
|
||||
execHooksByContainer, err := hook.GroupRestoreExecHooks(
|
||||
|
@ -1911,7 +1912,7 @@ func (ctx *restoreContext) waitExec(createdObj *unstructured.Unstructured) {
|
|||
)
|
||||
if err != nil {
|
||||
ctx.log.WithError(err).Errorf("error getting exec hooks for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ctx.hooksErrs <- err
|
||||
ctx.hooksErrs <- hook.HookErrInfo{Namespace: podNs, Err: err}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -1921,7 +1922,7 @@ func (ctx *restoreContext) waitExec(createdObj *unstructured.Unstructured) {
|
|||
|
||||
for _, err := range errs {
|
||||
// Errors are already logged in the HandleHooks method.
|
||||
ctx.hooksErrs <- err
|
||||
ctx.hooksErrs <- hook.HookErrInfo{Namespace: podNs, Err: err}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
|
Loading…
Reference in New Issue