fix: improve crash backtrace on ARM and fix misleading signal info

- Add optional libunwind support for better stack traces on ARM
  (glibc's backtrace() doesn't work well on ARM due to missing
  frame pointers)
- Fix misleading signal info: si_pid/si_uid/si_status are only
  valid for SIGCHLD, not for SIGSEGV/SIGBUS - they were showing
  garbage values (the fault address misinterpreted as PID)
- Add human-readable fault descriptions for SIGSEGV, SIGBUS,
  SIGFPE, and SIGILL signals
- Use clearer terminology in error messages

To enable libunwind on ARM: apt install libunwind-dev

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
pull/4579/head
Isaac Connor 2026-01-29 15:55:56 -05:00
parent a6e91a4699
commit 981c6d685e
5 changed files with 160 additions and 13 deletions

View File

@ -313,6 +313,18 @@ if(HAVE_EXECINFO_H)
endif()
check_function_exists("backtrace_symbols" HAVE_DECL_BACKTRACE_SYMBOLS)
endif()
# libunwind provides better backtraces than glibc's backtrace(), especially on ARM
find_package(LibUnwind)
if(LIBUNWIND_FOUND)
set(HAVE_LIBUNWIND 1)
list(APPEND ZM_BIN_LIBS "${LIBUNWIND_LIBRARIES}")
include_directories("${LIBUNWIND_INCLUDE_DIR}")
set(optlibsfound "${optlibsfound} libunwind")
else()
set(optlibsnotfound "${optlibsnotfound} libunwind")
endif()
check_include_file("ucontext.h" HAVE_UCONTEXT_H)
check_include_file("sys/sendfile.h" HAVE_SYS_SENDFILE_H)
check_include_file("sys/syscall.h" HAVE_SYS_SYSCALL_H)

View File

@ -0,0 +1,36 @@
# FindLibUnwind.cmake
# Find the libunwind library for better stack traces on ARM and other platforms
#
# Sets:
# LIBUNWIND_FOUND - True if libunwind was found
# LIBUNWIND_INCLUDE_DIR - Include directory for libunwind
# LIBUNWIND_LIBRARIES - Libraries to link against
find_path(LIBUNWIND_INCLUDE_DIR
NAMES libunwind.h
PATHS /usr/include /usr/local/include
)
find_library(LIBUNWIND_LIBRARY
NAMES unwind
PATHS /usr/lib /usr/local/lib /usr/lib/arm-linux-gnueabihf /usr/lib/aarch64-linux-gnu
)
# On some platforms, we also need libunwind-generic or platform-specific libs
find_library(LIBUNWIND_GENERIC_LIBRARY
NAMES unwind-generic unwind-arm unwind-aarch64 unwind-x86_64 unwind-x86
PATHS /usr/lib /usr/local/lib /usr/lib/arm-linux-gnueabihf /usr/lib/aarch64-linux-gnu
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(LibUnwind
REQUIRED_VARS LIBUNWIND_LIBRARY LIBUNWIND_INCLUDE_DIR
)
if(LIBUNWIND_FOUND)
set(LIBUNWIND_LIBRARIES ${LIBUNWIND_LIBRARY})
if(LIBUNWIND_GENERIC_LIBRARY)
list(APPEND LIBUNWIND_LIBRARIES ${LIBUNWIND_GENERIC_LIBRARY})
endif()
mark_as_advanced(LIBUNWIND_INCLUDE_DIR LIBUNWIND_LIBRARY LIBUNWIND_GENERIC_LIBRARY)
endif()

View File

@ -56,15 +56,65 @@ RETSIGTYPE zm_die_handler(int signal)
// Get more information if available
#if ( HAVE_SIGINFO_T && HAVE_UCONTEXT_T )
void *ip = nullptr;
void *cr2 = nullptr;
void *fault_addr = nullptr;
if ( info && context ) {
Debug(1,
"Signal information: number %d code %d errno %d pid %d uid %d status %d",
signal, info->si_code, info->si_errno, info->si_pid,
info->si_uid, info->si_status);
// Log signal code and errno (these are always valid)
Debug(1, "Signal information: number %d code %d errno %d",
signal, info->si_code, info->si_errno);
// For memory fault signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL), si_addr is valid
// For other signals, si_pid/si_uid/si_status would be valid but we don't handle those here
fault_addr = info->si_addr;
// Provide human-readable description of the fault
const char *code_desc = "unknown";
if (signal == SIGSEGV) {
switch (info->si_code) {
case SEGV_MAPERR: code_desc = "address not mapped to object"; break;
case SEGV_ACCERR: code_desc = "invalid permissions for mapped object"; break;
#ifdef SEGV_BNDERR
case SEGV_BNDERR: code_desc = "failed address bound checks"; break;
#endif
#ifdef SEGV_PKUERR
case SEGV_PKUERR: code_desc = "access denied by memory protection keys"; break;
#endif
default: code_desc = "unknown segfault code"; break;
}
} else if (signal == SIGBUS) {
switch (info->si_code) {
case BUS_ADRALN: code_desc = "invalid address alignment"; break;
case BUS_ADRERR: code_desc = "nonexistent physical address"; break;
case BUS_OBJERR: code_desc = "object-specific hardware error"; break;
default: code_desc = "unknown bus error code"; break;
}
} else if (signal == SIGFPE) {
switch (info->si_code) {
case FPE_INTDIV: code_desc = "integer divide by zero"; break;
case FPE_INTOVF: code_desc = "integer overflow"; break;
case FPE_FLTDIV: code_desc = "floating-point divide by zero"; break;
case FPE_FLTOVF: code_desc = "floating-point overflow"; break;
case FPE_FLTUND: code_desc = "floating-point underflow"; break;
case FPE_FLTRES: code_desc = "floating-point inexact result"; break;
case FPE_FLTINV: code_desc = "floating-point invalid operation"; break;
case FPE_FLTSUB: code_desc = "subscript out of range"; break;
default: code_desc = "unknown FPE code"; break;
}
} else if (signal == SIGILL) {
switch (info->si_code) {
case ILL_ILLOPC: code_desc = "illegal opcode"; break;
case ILL_ILLOPN: code_desc = "illegal operand"; break;
case ILL_ILLADR: code_desc = "illegal addressing mode"; break;
case ILL_ILLTRP: code_desc = "illegal trap"; break;
case ILL_PRVOPC: code_desc = "privileged opcode"; break;
case ILL_PRVREG: code_desc = "privileged register"; break;
case ILL_COPROC: code_desc = "coprocessor error"; break;
case ILL_BADSTK: code_desc = "internal stack error"; break;
default: code_desc = "unknown illegal instruction code"; break;
}
}
Debug(1, "Fault reason: %s", code_desc);
ucontext_t *uc = (ucontext_t *) context;
cr2 = info->si_addr;
#if defined(__x86_64__)
#if defined(__FreeBSD_kernel__) || defined(__FreeBSD__)
ip = (void *)(uc->uc_mcontext.mc_rip);
@ -85,11 +135,11 @@ RETSIGTYPE zm_die_handler(int signal)
ip = (void *)(uc->uc_mcontext.arm_pc);
#endif
// Print the signal address and instruction pointer if available
// Print the fault address and instruction pointer
if ( ip ) {
Error("Signal address is %p, from %p", cr2, ip);
Error("Fault address: %p, instruction pointer: %p", fault_addr, ip);
} else {
Error("Signal address is %p, no instruction pointer", cr2);
Error("Fault address: %p, instruction pointer: unavailable", fault_addr);
}
}
#endif // ( HAVE_SIGINFO_T && HAVE_UCONTEXT_T )
@ -97,7 +147,51 @@ RETSIGTYPE zm_die_handler(int signal)
// Print backtrace if enabled and available
#if ( !defined(ZM_NO_CRASHTRACE) && HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS )
#if !defined(ZM_NO_CRASHTRACE)
#if HAVE_LIBUNWIND
// libunwind provides better backtraces, especially on ARM
unw_cursor_t cursor;
unw_context_t uc;
unw_word_t pc, offset;
char sym[256];
int frame = 0;
unw_getcontext(&uc);
unw_init_local(&cursor, &uc);
char cmd[1024] = "addr2line -Cfip -e ";
char *cmd_ptr = cmd + strlen(cmd);
cmd_ptr += snprintf(cmd_ptr, sizeof(cmd) - (cmd_ptr - cmd), "%s", self);
bool found_offset = false;
while (unw_step(&cursor) > 0 && frame < TRACE_SIZE) {
unw_get_reg(&cursor, UNW_REG_IP, &pc);
if (pc == 0)
break;
if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset) == 0) {
Error("Backtrace %d: %s+0x%lx [0x%lx]", frame, sym, (unsigned long)offset, (unsigned long)pc);
} else {
Error("Backtrace %d: [0x%lx]", frame, (unsigned long)pc);
}
// Collect addresses for addr2line
int rc = snprintf(cmd_ptr, sizeof(cmd) - (cmd_ptr - cmd), " 0x%lx", (unsigned long)pc);
if (rc > 0 && static_cast<size_t>(rc) < sizeof(cmd) - (cmd_ptr - cmd)) {
cmd_ptr += rc;
found_offset = true;
}
frame++;
}
if (found_offset) {
Error("Backtrace complete, please install debug symbols (typically zoneminder-dbg)");
Error("and execute the following command for more information:");
Error("%s", cmd);
}
#elif HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS
// Fallback to glibc backtrace (less reliable on ARM)
void *trace[TRACE_SIZE];
int trace_size = 0;
trace_size = backtrace(trace, TRACE_SIZE);
@ -113,7 +207,7 @@ RETSIGTYPE zm_die_handler(int signal)
// Print the full backtrace
for (int i = 0; i < trace_size; i++) {
Error("Backtrace %u: %s", i, messages[i]);
Error("Backtrace %d: %s", i, messages[i]);
if (strstr(messages[i], self) == nullptr)
continue;
ofs_ptr = strstr(messages[i], "(+0x");
@ -136,7 +230,8 @@ RETSIGTYPE zm_die_handler(int signal)
Error("and execute the following command for more information:");
Error("%s", cmd);
}
#endif // ( !defined(ZM_NO_CRASHTRACE) && HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS )
#endif // HAVE_LIBUNWIND / HAVE_DECL_BACKTRACE
#endif // !defined(ZM_NO_CRASHTRACE)
// Icon: Don't exit, setting zm_terminate should cause the exit to happen in a timely manner.
// The main reason not to here is to make valgrind traces quieter because logger gets free while other threads
// are still running and trying to log.

View File

@ -23,7 +23,10 @@
#include "zm_config.h"
#include <csignal>
#if HAVE_EXECINFO_H
#if HAVE_LIBUNWIND
#define UNW_LOCAL_ONLY
#include <libunwind.h>
#elif HAVE_EXECINFO_H
#include <execinfo.h>
#endif
#if HAVE_UCONTEXT_H

View File

@ -15,6 +15,7 @@
#cmakedefine HAVE_SENDFILE 1
#cmakedefine HAVE_DECL_BACKTRACE 1
#cmakedefine HAVE_DECL_BACKTRACE_SYMBOLS 1
#cmakedefine HAVE_LIBUNWIND 1
#cmakedefine HAVE_POSIX_MEMALIGN 1
#cmakedefine HAVE_SIGINFO_T 1
#cmakedefine HAVE_UCONTEXT_T 1