fix: flaky `service::tests::test_log_on_panic` (#7850)
* refactor: remove line number from test * refactor: clean up code * fix: flaky `service::tests::test_log_on_panic` This seems to be a really rare issue. So provoking it requires a lot of tests. This can be done (using `fish`): ```console $ while cargo with "parallel -n0 {bin} {args} ::: $(seq 10)" -- test -p service_grpc_influxrpc --lib -- --test-threads 20; echo; end ``` Fixes #7838. * fix: panic handler construction --------- Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>pull/24376/head
parent
98aa7df607
commit
c5ee3250f7
File diff suppressed because it is too large
Load Diff
|
@ -107,7 +107,7 @@ pub async fn main(
|
|||
// lifetime of the program - this is actually a good thing, as it prevents
|
||||
// the panic handler from being removed while unwinding a panic (which in
|
||||
// turn, causes a panic - see #548)
|
||||
let f = SendPanicsToTracing::new().with_metrics(&metrics);
|
||||
let f = SendPanicsToTracing::new_with_metrics(&metrics);
|
||||
std::mem::forget(f);
|
||||
|
||||
// Register jemalloc metrics
|
||||
|
|
|
@ -36,31 +36,31 @@ pub struct SendPanicsToTracing {
|
|||
|
||||
impl SendPanicsToTracing {
|
||||
pub fn new() -> Self {
|
||||
let current_panic_hook: PanicFunctionPtr = Arc::new(panic::take_hook());
|
||||
let old_panic_hook = Some(Arc::clone(¤t_panic_hook));
|
||||
panic::set_hook(Box::new(move |info| {
|
||||
let panic_type = PanicType::classify(info);
|
||||
tracing_panic_hook(¤t_panic_hook, info, panic_type)
|
||||
}));
|
||||
|
||||
Self { old_panic_hook }
|
||||
Self::new_inner(None)
|
||||
}
|
||||
|
||||
/// Configure this panic handler to emit a panic count metric.
|
||||
///
|
||||
/// The metric is named `thread_panic_count_total` and is incremented each
|
||||
/// time the panic handler is invoked.
|
||||
pub fn with_metrics(self, metrics: &metric::Registry) -> Self {
|
||||
let metric = Metrics::new(metrics);
|
||||
pub fn new_with_metrics(metrics: &metric::Registry) -> Self {
|
||||
let metrics = Metrics::new(metrics);
|
||||
Self::new_inner(Some(metrics))
|
||||
}
|
||||
|
||||
let old_hook = Arc::clone(self.old_panic_hook.as_ref().expect("no hook set"));
|
||||
fn new_inner(metrics: Option<Metrics>) -> Self {
|
||||
let current_panic_hook: PanicFunctionPtr = Arc::new(panic::take_hook());
|
||||
let old_panic_hook = Some(Arc::clone(¤t_panic_hook));
|
||||
panic::set_hook(Box::new(move |info| {
|
||||
let panic_type = PanicType::classify(info);
|
||||
metric.inc(panic_type);
|
||||
tracing_panic_hook(&old_hook, info, panic_type);
|
||||
if let Some(metrics) = &metrics {
|
||||
metrics.inc(panic_type);
|
||||
}
|
||||
error!(panic_type=panic_type.name(), panic_info=%info, "Thread panic");
|
||||
current_panic_hook(info);
|
||||
}));
|
||||
|
||||
self
|
||||
Self { old_panic_hook }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,19 +104,6 @@ impl Drop for SendPanicsToTracing {
|
|||
}
|
||||
}
|
||||
|
||||
fn tracing_panic_hook(
|
||||
other_hook: &PanicFunctionPtr,
|
||||
panic_info: &PanicInfo<'_>,
|
||||
panic_type: PanicType,
|
||||
) {
|
||||
// Attempt to replicate the standard format:
|
||||
error!(panic_type=panic_type.name(), panic_info=%panic_info, "Thread panic");
|
||||
|
||||
// Call into the previous panic function (typically the standard
|
||||
// panic function)
|
||||
other_hook(panic_info)
|
||||
}
|
||||
|
||||
/// Ensure panics are fatal events by exiting the process with an exit code of
|
||||
/// 1 after calling the existing panic handler, if any.
|
||||
pub fn make_panics_fatal() {
|
||||
|
@ -225,7 +212,7 @@ mod tests {
|
|||
|
||||
let metrics = metric::Registry::default();
|
||||
let capture = Arc::new(TracingCapture::new());
|
||||
let _guard = SendPanicsToTracing::new().with_metrics(&metrics);
|
||||
let guard = SendPanicsToTracing::new_with_metrics(&metrics);
|
||||
|
||||
assert_count(&metrics, "offset_overflow", 0);
|
||||
assert_count(&metrics, "unknown", 0);
|
||||
|
@ -255,14 +242,23 @@ mod tests {
|
|||
.join()
|
||||
.expect_err("wat");
|
||||
|
||||
drop(guard);
|
||||
let capture2 = Arc::clone(&capture);
|
||||
std::thread::spawn(move || {
|
||||
capture2.register_in_current_thread();
|
||||
panic!("no guard");
|
||||
})
|
||||
.join()
|
||||
.expect_err("wat");
|
||||
|
||||
assert_count(&metrics, "offset_overflow", 2);
|
||||
assert_count(&metrics, "unknown", 1);
|
||||
|
||||
assert_eq!(
|
||||
capture.to_string(),
|
||||
"level = ERROR; message = Thread panic; panic_type = \"unknown\"; panic_info = panicked at 'it's bananas', panic_logging/src/lib.rs:236:13; \
|
||||
\nlevel = ERROR; message = Thread panic; panic_type = \"offset_overflow\"; panic_info = panicked at 'offset', panic_logging/src/lib.rs:244:13; \
|
||||
\nlevel = ERROR; message = Thread panic; panic_type = \"offset_overflow\"; panic_info = panicked at 'offset overflow', panic_logging/src/lib.rs:253:13; ",
|
||||
"level = ERROR; message = Thread panic; panic_type = \"unknown\"; panic_info = panicked at 'it's bananas', panic_logging/src/lib.rs:223:13; \
|
||||
\nlevel = ERROR; message = Thread panic; panic_type = \"offset_overflow\"; panic_info = panicked at 'offset', panic_logging/src/lib.rs:231:13; \
|
||||
\nlevel = ERROR; message = Thread panic; panic_type = \"offset_overflow\"; panic_info = panicked at 'offset overflow', panic_logging/src/lib.rs:240:13; ",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ datafusion_util = { path = "../datafusion_util" }
|
|||
influxdb_storage_client = { path = "../influxdb_storage_client" }
|
||||
metric = { path = "../metric" }
|
||||
panic_logging = { path = "../panic_logging" }
|
||||
procspawn = { version = "0.10", default-features = false, features = ["test-support", "safe-shared-libraries"] }
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
trace_http = { path = "../trace_http" }
|
||||
service_grpc_testing = { path = "../service_grpc_testing" }
|
||||
|
|
|
@ -2649,8 +2649,29 @@ mod tests {
|
|||
grpc_request_metric_has_count(&fixture, "MeasurementTagValues", "server_error", 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_log_on_panic() {
|
||||
procspawn::enable_test_support!();
|
||||
|
||||
#[test]
|
||||
fn test_log_on_panic() {
|
||||
// libtest (i.e. the standard library test fixture) sets panic hooks. This will race w/ our own panic hooks. To
|
||||
// prevent that, we spawn a dedicated process with its own panic hooks that is isolated from the remaining
|
||||
// tests.
|
||||
|
||||
procspawn::spawn((), |_| {
|
||||
// do NOT write to stdout (default behavior)
|
||||
std::panic::set_hook(Box::new(|_| {}));
|
||||
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
rt.block_on(test_log_on_panic_inner());
|
||||
})
|
||||
.join()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn test_log_on_panic_inner() {
|
||||
// Send a message to a route that causes a panic and ensure:
|
||||
// 1. We don't use up all executors 2. The panic message
|
||||
// message ends up in the log system
|
||||
|
@ -2687,7 +2708,7 @@ mod tests {
|
|||
// Note we don't include the actual line / column in the
|
||||
// expected panic message to avoid needing to update the test
|
||||
// whenever the source code file changed.
|
||||
let expected_error = "'This is a test panic', service_grpc_testing/src/lib.rs:18:9";
|
||||
let expected_error = "'This is a test panic', service_grpc_testing/src/lib.rs:";
|
||||
assert_contains!(captured_logs, expected_error);
|
||||
|
||||
// Ensure that panics don't exhaust the tokio executor by
|
||||
|
|
|
@ -199,7 +199,7 @@ once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
|||
rustls = { version = "0.21", features = ["dangerous_configuration"] }
|
||||
scopeguard = { version = "1" }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "ioapiset", "knownfolders", "libloaderapi", "memoryapi", "minwinbase", "minwindef", "namedpipeapi", "ntsecapi", "ntstatus", "objbase", "processenv", "processthreadsapi", "psapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
|
||||
windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
|
||||
|
@ -207,7 +207,7 @@ windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", feat
|
|||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
scopeguard = { version = "1" }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "ioapiset", "knownfolders", "libloaderapi", "memoryapi", "minwinbase", "minwindef", "namedpipeapi", "ntsecapi", "ntstatus", "objbase", "processenv", "processthreadsapi", "psapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
|
||||
windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
|
||||
|
|
Loading…
Reference in New Issue