From d7f1ea4fbd5ccdbf4327f855e461bca9caa89507 Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Fri, 30 Jun 2023 17:53:27 +0800 Subject: [PATCH] Add exit code log and possible memory shortage warning log for Restic command failure. Signed-off-by: Xun Jiang --- changelogs/unreleased/6459-blackpiglet | 1 + pkg/repository/restic/repository.go | 2 +- pkg/restic/exec_commands.go | 4 +++- pkg/util/exec/exec.go | 23 +++++++++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 changelogs/unreleased/6459-blackpiglet diff --git a/changelogs/unreleased/6459-blackpiglet b/changelogs/unreleased/6459-blackpiglet new file mode 100644 index 000000000..26e0e3856 --- /dev/null +++ b/changelogs/unreleased/6459-blackpiglet @@ -0,0 +1 @@ +Add exit code log and possible memory shortage warning log for Restic command failure. \ No newline at end of file diff --git a/pkg/repository/restic/repository.go b/pkg/repository/restic/repository.go index 392caf284..3c15e0b37 100644 --- a/pkg/repository/restic/repository.go +++ b/pkg/repository/restic/repository.go @@ -112,7 +112,7 @@ func (r *RepositoryService) exec(cmd *restic.Command, bsl *velerov1api.BackupSto cmd.ExtraFlags = append(cmd.ExtraFlags, skipTLSRet) } - stdout, stderr, err := veleroexec.RunCommand(cmd.Cmd()) + stdout, stderr, err := veleroexec.RunCommandWithLog(cmd.Cmd(), r.log) r.log.WithFields(logrus.Fields{ "repository": cmd.RepoName(), "command": cmd.String(), diff --git a/pkg/restic/exec_commands.go b/pkg/restic/exec_commands.go index 0cbc42802..94c17c04a 100644 --- a/pkg/restic/exec_commands.go +++ b/pkg/restic/exec_commands.go @@ -86,6 +86,7 @@ func RunBackup(backupCmd *Command, log logrus.FieldLogger, updater uploader.Prog err := cmd.Start() if err != nil { + exec.LogErrorAsExitCode(err, log) return stdoutBuf.String(), stderrBuf.String(), err } @@ -119,6 +120,7 @@ func RunBackup(backupCmd *Command, log logrus.FieldLogger, updater uploader.Prog err = cmd.Wait() if err != nil { + exec.LogErrorAsExitCode(err, log) return stdoutBuf.String(), stderrBuf.String(), err } quit <- struct{}{} @@ -229,7 +231,7 @@ func RunRestore(restoreCmd *Command, log logrus.FieldLogger, updater uploader.Pr } }() - stdout, stderr, err := exec.RunCommand(restoreCmd.Cmd()) + stdout, stderr, err := exec.RunCommandWithLog(restoreCmd.Cmd(), log) quit <- struct{}{} // update progress to 100% diff --git a/pkg/util/exec/exec.go b/pkg/util/exec/exec.go index 84bffb257..109118d58 100644 --- a/pkg/util/exec/exec.go +++ b/pkg/util/exec/exec.go @@ -22,6 +22,7 @@ import ( "os/exec" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // RunCommand runs a command and returns its stdout, stderr, and its returned @@ -52,3 +53,25 @@ func RunCommand(cmd *exec.Cmd) (string, string, error) { return stdout, stderr, runErr } + +func RunCommandWithLog(cmd *exec.Cmd, log logrus.FieldLogger) (string, string, error) { + stdout, stderr, err := RunCommand(cmd) + LogErrorAsExitCode(err, log) + return stdout, stderr, err +} + +func LogErrorAsExitCode(err error, log logrus.FieldLogger) { + if err != nil { + if exitError, ok := err.(*exec.ExitError); ok { + log.Errorf("Restic command fail with ExitCode: %d. Process ID is %d, Exit error is: %s", exitError.ExitCode(), exitError.Pid(), exitError.String()) + // Golang's os.exec -1 ExitCode means signal kill. Usually this is caused + // by CGroup's OOM. Log a warning to notice user. + // https://github.com/golang/go/blob/master/src/os/exec_posix.go#L128-L136 + if exitError.ExitCode() == -1 { + log.Warnf("The ExitCode is -1, which means the process is terminated by signal. Usually this is caused by CGroup kill due to out of memory. Please check whether there is such information in the work nodes' dmesg log.") + } + } else { + log.WithError(err).Info("Error cannot be convert to ExitError format.") + } + } +}