fix: improve crash backtrace on ARM and fix misleading signal info
- Add optional libunwind support for better stack traces on ARM (glibc's backtrace() doesn't work well on ARM due to missing frame pointers) - Fix misleading signal info: si_pid/si_uid/si_status are only valid for SIGCHLD, not for SIGSEGV/SIGBUS - they were showing garbage values (the fault address misinterpreted as PID) - Add human-readable fault descriptions for SIGSEGV, SIGBUS, SIGFPE, and SIGILL signals - Use clearer terminology in error messages To enable libunwind on ARM: apt install libunwind-dev Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>pull/4579/head
parent
a6e91a4699
commit
981c6d685e
|
|
@ -313,6 +313,18 @@ if(HAVE_EXECINFO_H)
|
|||
endif()
|
||||
check_function_exists("backtrace_symbols" HAVE_DECL_BACKTRACE_SYMBOLS)
|
||||
endif()
|
||||
|
||||
# libunwind provides better backtraces than glibc's backtrace(), especially on ARM
|
||||
find_package(LibUnwind)
|
||||
if(LIBUNWIND_FOUND)
|
||||
set(HAVE_LIBUNWIND 1)
|
||||
list(APPEND ZM_BIN_LIBS "${LIBUNWIND_LIBRARIES}")
|
||||
include_directories("${LIBUNWIND_INCLUDE_DIR}")
|
||||
set(optlibsfound "${optlibsfound} libunwind")
|
||||
else()
|
||||
set(optlibsnotfound "${optlibsnotfound} libunwind")
|
||||
endif()
|
||||
|
||||
check_include_file("ucontext.h" HAVE_UCONTEXT_H)
|
||||
check_include_file("sys/sendfile.h" HAVE_SYS_SENDFILE_H)
|
||||
check_include_file("sys/syscall.h" HAVE_SYS_SYSCALL_H)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
# FindLibUnwind.cmake
|
||||
# Find the libunwind library for better stack traces on ARM and other platforms
|
||||
#
|
||||
# Sets:
|
||||
# LIBUNWIND_FOUND - True if libunwind was found
|
||||
# LIBUNWIND_INCLUDE_DIR - Include directory for libunwind
|
||||
# LIBUNWIND_LIBRARIES - Libraries to link against
|
||||
|
||||
find_path(LIBUNWIND_INCLUDE_DIR
|
||||
NAMES libunwind.h
|
||||
PATHS /usr/include /usr/local/include
|
||||
)
|
||||
|
||||
find_library(LIBUNWIND_LIBRARY
|
||||
NAMES unwind
|
||||
PATHS /usr/lib /usr/local/lib /usr/lib/arm-linux-gnueabihf /usr/lib/aarch64-linux-gnu
|
||||
)
|
||||
|
||||
# On some platforms, we also need libunwind-generic or platform-specific libs
|
||||
find_library(LIBUNWIND_GENERIC_LIBRARY
|
||||
NAMES unwind-generic unwind-arm unwind-aarch64 unwind-x86_64 unwind-x86
|
||||
PATHS /usr/lib /usr/local/lib /usr/lib/arm-linux-gnueabihf /usr/lib/aarch64-linux-gnu
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(LibUnwind
|
||||
REQUIRED_VARS LIBUNWIND_LIBRARY LIBUNWIND_INCLUDE_DIR
|
||||
)
|
||||
|
||||
if(LIBUNWIND_FOUND)
|
||||
set(LIBUNWIND_LIBRARIES ${LIBUNWIND_LIBRARY})
|
||||
if(LIBUNWIND_GENERIC_LIBRARY)
|
||||
list(APPEND LIBUNWIND_LIBRARIES ${LIBUNWIND_GENERIC_LIBRARY})
|
||||
endif()
|
||||
mark_as_advanced(LIBUNWIND_INCLUDE_DIR LIBUNWIND_LIBRARY LIBUNWIND_GENERIC_LIBRARY)
|
||||
endif()
|
||||
|
|
@ -56,15 +56,65 @@ RETSIGTYPE zm_die_handler(int signal)
|
|||
// Get more information if available
|
||||
#if ( HAVE_SIGINFO_T && HAVE_UCONTEXT_T )
|
||||
void *ip = nullptr;
|
||||
void *cr2 = nullptr;
|
||||
void *fault_addr = nullptr;
|
||||
if ( info && context ) {
|
||||
Debug(1,
|
||||
"Signal information: number %d code %d errno %d pid %d uid %d status %d",
|
||||
signal, info->si_code, info->si_errno, info->si_pid,
|
||||
info->si_uid, info->si_status);
|
||||
// Log signal code and errno (these are always valid)
|
||||
Debug(1, "Signal information: number %d code %d errno %d",
|
||||
signal, info->si_code, info->si_errno);
|
||||
|
||||
// For memory fault signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL), si_addr is valid
|
||||
// For other signals, si_pid/si_uid/si_status would be valid but we don't handle those here
|
||||
fault_addr = info->si_addr;
|
||||
|
||||
// Provide human-readable description of the fault
|
||||
const char *code_desc = "unknown";
|
||||
if (signal == SIGSEGV) {
|
||||
switch (info->si_code) {
|
||||
case SEGV_MAPERR: code_desc = "address not mapped to object"; break;
|
||||
case SEGV_ACCERR: code_desc = "invalid permissions for mapped object"; break;
|
||||
#ifdef SEGV_BNDERR
|
||||
case SEGV_BNDERR: code_desc = "failed address bound checks"; break;
|
||||
#endif
|
||||
#ifdef SEGV_PKUERR
|
||||
case SEGV_PKUERR: code_desc = "access denied by memory protection keys"; break;
|
||||
#endif
|
||||
default: code_desc = "unknown segfault code"; break;
|
||||
}
|
||||
} else if (signal == SIGBUS) {
|
||||
switch (info->si_code) {
|
||||
case BUS_ADRALN: code_desc = "invalid address alignment"; break;
|
||||
case BUS_ADRERR: code_desc = "nonexistent physical address"; break;
|
||||
case BUS_OBJERR: code_desc = "object-specific hardware error"; break;
|
||||
default: code_desc = "unknown bus error code"; break;
|
||||
}
|
||||
} else if (signal == SIGFPE) {
|
||||
switch (info->si_code) {
|
||||
case FPE_INTDIV: code_desc = "integer divide by zero"; break;
|
||||
case FPE_INTOVF: code_desc = "integer overflow"; break;
|
||||
case FPE_FLTDIV: code_desc = "floating-point divide by zero"; break;
|
||||
case FPE_FLTOVF: code_desc = "floating-point overflow"; break;
|
||||
case FPE_FLTUND: code_desc = "floating-point underflow"; break;
|
||||
case FPE_FLTRES: code_desc = "floating-point inexact result"; break;
|
||||
case FPE_FLTINV: code_desc = "floating-point invalid operation"; break;
|
||||
case FPE_FLTSUB: code_desc = "subscript out of range"; break;
|
||||
default: code_desc = "unknown FPE code"; break;
|
||||
}
|
||||
} else if (signal == SIGILL) {
|
||||
switch (info->si_code) {
|
||||
case ILL_ILLOPC: code_desc = "illegal opcode"; break;
|
||||
case ILL_ILLOPN: code_desc = "illegal operand"; break;
|
||||
case ILL_ILLADR: code_desc = "illegal addressing mode"; break;
|
||||
case ILL_ILLTRP: code_desc = "illegal trap"; break;
|
||||
case ILL_PRVOPC: code_desc = "privileged opcode"; break;
|
||||
case ILL_PRVREG: code_desc = "privileged register"; break;
|
||||
case ILL_COPROC: code_desc = "coprocessor error"; break;
|
||||
case ILL_BADSTK: code_desc = "internal stack error"; break;
|
||||
default: code_desc = "unknown illegal instruction code"; break;
|
||||
}
|
||||
}
|
||||
Debug(1, "Fault reason: %s", code_desc);
|
||||
|
||||
ucontext_t *uc = (ucontext_t *) context;
|
||||
cr2 = info->si_addr;
|
||||
#if defined(__x86_64__)
|
||||
#if defined(__FreeBSD_kernel__) || defined(__FreeBSD__)
|
||||
ip = (void *)(uc->uc_mcontext.mc_rip);
|
||||
|
|
@ -85,11 +135,11 @@ RETSIGTYPE zm_die_handler(int signal)
|
|||
ip = (void *)(uc->uc_mcontext.arm_pc);
|
||||
#endif
|
||||
|
||||
// Print the signal address and instruction pointer if available
|
||||
// Print the fault address and instruction pointer
|
||||
if ( ip ) {
|
||||
Error("Signal address is %p, from %p", cr2, ip);
|
||||
Error("Fault address: %p, instruction pointer: %p", fault_addr, ip);
|
||||
} else {
|
||||
Error("Signal address is %p, no instruction pointer", cr2);
|
||||
Error("Fault address: %p, instruction pointer: unavailable", fault_addr);
|
||||
}
|
||||
}
|
||||
#endif // ( HAVE_SIGINFO_T && HAVE_UCONTEXT_T )
|
||||
|
|
@ -97,7 +147,51 @@ RETSIGTYPE zm_die_handler(int signal)
|
|||
|
||||
|
||||
// Print backtrace if enabled and available
|
||||
#if ( !defined(ZM_NO_CRASHTRACE) && HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS )
|
||||
#if !defined(ZM_NO_CRASHTRACE)
|
||||
#if HAVE_LIBUNWIND
|
||||
// libunwind provides better backtraces, especially on ARM
|
||||
unw_cursor_t cursor;
|
||||
unw_context_t uc;
|
||||
unw_word_t pc, offset;
|
||||
char sym[256];
|
||||
int frame = 0;
|
||||
|
||||
unw_getcontext(&uc);
|
||||
unw_init_local(&cursor, &uc);
|
||||
|
||||
char cmd[1024] = "addr2line -Cfip -e ";
|
||||
char *cmd_ptr = cmd + strlen(cmd);
|
||||
cmd_ptr += snprintf(cmd_ptr, sizeof(cmd) - (cmd_ptr - cmd), "%s", self);
|
||||
bool found_offset = false;
|
||||
|
||||
while (unw_step(&cursor) > 0 && frame < TRACE_SIZE) {
|
||||
unw_get_reg(&cursor, UNW_REG_IP, &pc);
|
||||
if (pc == 0)
|
||||
break;
|
||||
|
||||
if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset) == 0) {
|
||||
Error("Backtrace %d: %s+0x%lx [0x%lx]", frame, sym, (unsigned long)offset, (unsigned long)pc);
|
||||
} else {
|
||||
Error("Backtrace %d: [0x%lx]", frame, (unsigned long)pc);
|
||||
}
|
||||
|
||||
// Collect addresses for addr2line
|
||||
int rc = snprintf(cmd_ptr, sizeof(cmd) - (cmd_ptr - cmd), " 0x%lx", (unsigned long)pc);
|
||||
if (rc > 0 && static_cast<size_t>(rc) < sizeof(cmd) - (cmd_ptr - cmd)) {
|
||||
cmd_ptr += rc;
|
||||
found_offset = true;
|
||||
}
|
||||
|
||||
frame++;
|
||||
}
|
||||
|
||||
if (found_offset) {
|
||||
Error("Backtrace complete, please install debug symbols (typically zoneminder-dbg)");
|
||||
Error("and execute the following command for more information:");
|
||||
Error("%s", cmd);
|
||||
}
|
||||
#elif HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS
|
||||
// Fallback to glibc backtrace (less reliable on ARM)
|
||||
void *trace[TRACE_SIZE];
|
||||
int trace_size = 0;
|
||||
trace_size = backtrace(trace, TRACE_SIZE);
|
||||
|
|
@ -113,7 +207,7 @@ RETSIGTYPE zm_die_handler(int signal)
|
|||
|
||||
// Print the full backtrace
|
||||
for (int i = 0; i < trace_size; i++) {
|
||||
Error("Backtrace %u: %s", i, messages[i]);
|
||||
Error("Backtrace %d: %s", i, messages[i]);
|
||||
if (strstr(messages[i], self) == nullptr)
|
||||
continue;
|
||||
ofs_ptr = strstr(messages[i], "(+0x");
|
||||
|
|
@ -136,7 +230,8 @@ RETSIGTYPE zm_die_handler(int signal)
|
|||
Error("and execute the following command for more information:");
|
||||
Error("%s", cmd);
|
||||
}
|
||||
#endif // ( !defined(ZM_NO_CRASHTRACE) && HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS )
|
||||
#endif // HAVE_LIBUNWIND / HAVE_DECL_BACKTRACE
|
||||
#endif // !defined(ZM_NO_CRASHTRACE)
|
||||
// Icon: Don't exit, setting zm_terminate should cause the exit to happen in a timely manner.
|
||||
// The main reason not to here is to make valgrind traces quieter because logger gets free while other threads
|
||||
// are still running and trying to log.
|
||||
|
|
|
|||
|
|
@ -23,7 +23,10 @@
|
|||
#include "zm_config.h"
|
||||
#include <csignal>
|
||||
|
||||
#if HAVE_EXECINFO_H
|
||||
#if HAVE_LIBUNWIND
|
||||
#define UNW_LOCAL_ONLY
|
||||
#include <libunwind.h>
|
||||
#elif HAVE_EXECINFO_H
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
#if HAVE_UCONTEXT_H
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#cmakedefine HAVE_SENDFILE 1
|
||||
#cmakedefine HAVE_DECL_BACKTRACE 1
|
||||
#cmakedefine HAVE_DECL_BACKTRACE_SYMBOLS 1
|
||||
#cmakedefine HAVE_LIBUNWIND 1
|
||||
#cmakedefine HAVE_POSIX_MEMALIGN 1
|
||||
#cmakedefine HAVE_SIGINFO_T 1
|
||||
#cmakedefine HAVE_UCONTEXT_T 1
|
||||
|
|
|
|||
Loading…
Reference in New Issue