commit
fcb82b2c99
|
@ -3909,6 +3909,7 @@ dependencies = [
|
|||
name = "panic_logging"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"metric",
|
||||
"observability_deps",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
|
|
@ -370,7 +370,7 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
info!(?ingester_addresses, "starting querier");
|
||||
let querier = create_querier_server_type(
|
||||
&common_state,
|
||||
metrics,
|
||||
Arc::clone(&metrics),
|
||||
catalog,
|
||||
object_store,
|
||||
time_provider,
|
||||
|
@ -388,5 +388,5 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
Service::create_grpc_only(querier, &querier_run_config),
|
||||
];
|
||||
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(common_state, services, metrics).await?)
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
|
||||
let server_type = create_compactor_server_type(
|
||||
&common_state,
|
||||
metric_registry,
|
||||
Arc::clone(&metric_registry),
|
||||
catalog,
|
||||
object_store,
|
||||
exec,
|
||||
|
@ -103,5 +103,5 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
info!("starting compactor");
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(common_state, services, metric_registry).await?)
|
||||
}
|
||||
|
|
|
@ -130,5 +130,10 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
));
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(
|
||||
common_state,
|
||||
services,
|
||||
Arc::new(metric::Registry::default()),
|
||||
)
|
||||
.await?)
|
||||
}
|
||||
|
|
|
@ -89,7 +89,7 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
let exec = Arc::new(Executor::new(config.query_exec_thread_count));
|
||||
let server_type = create_ingester_server_type(
|
||||
&common_state,
|
||||
metric_registry,
|
||||
Arc::clone(&metric_registry),
|
||||
catalog,
|
||||
object_store,
|
||||
exec,
|
||||
|
@ -101,5 +101,5 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
info!("starting ingester");
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(common_state, services, metric_registry).await?)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use ioxd_common::Service;
|
||||
use ioxd_common::{grpc_listener, http_listener, serve, server_type::CommonServerState};
|
||||
use observability_deps::tracing::{error, info};
|
||||
|
@ -66,7 +68,11 @@ fn build_malloc_conf() -> String {
|
|||
///
|
||||
/// Due to its invasive nature (install global panic handling,
|
||||
/// logging, etc) this function should not be used during unit tests.
|
||||
pub async fn main(common_state: CommonServerState, services: Vec<Service>) -> Result<()> {
|
||||
pub async fn main(
|
||||
common_state: CommonServerState,
|
||||
services: Vec<Service>,
|
||||
metrics: Arc<metric::Registry>,
|
||||
) -> Result<()> {
|
||||
let git_hash = env!("GIT_HASH", "starting influxdb_iox server");
|
||||
let num_cpus = num_cpus::get();
|
||||
let build_malloc_conf = build_malloc_conf();
|
||||
|
@ -98,7 +104,7 @@ pub async fn main(common_state: CommonServerState, services: Vec<Service>) -> Re
|
|||
// lifetime of the program - this is actually a good thing, as it prevents
|
||||
// the panic handler from being removed while unwinding a panic (which in
|
||||
// turn, causes a panic - see #548)
|
||||
let f = SendPanicsToTracing::new();
|
||||
let f = SendPanicsToTracing::new().with_metrics(&*metrics);
|
||||
std::mem::forget(f);
|
||||
|
||||
// Register jemalloc metrics
|
||||
|
|
|
@ -89,7 +89,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
let exec = Arc::new(Executor::new(num_threads));
|
||||
let server_type = create_querier_server_type(
|
||||
&common_state,
|
||||
metric_registry,
|
||||
Arc::clone(&metric_registry),
|
||||
catalog,
|
||||
object_store,
|
||||
time_provider,
|
||||
|
@ -101,5 +101,5 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
info!("starting querier");
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(common_state, services, metric_registry).await?)
|
||||
}
|
||||
|
|
|
@ -159,5 +159,10 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
));
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(
|
||||
common_state,
|
||||
services,
|
||||
Arc::new(metric::Registry::default()),
|
||||
)
|
||||
.await?)
|
||||
}
|
||||
|
|
|
@ -96,5 +96,5 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
|
||||
info!("starting router2");
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(common_state, services, metrics).await?)
|
||||
}
|
||||
|
|
|
@ -61,5 +61,10 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
));
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services).await?)
|
||||
Ok(main::main(
|
||||
common_state,
|
||||
services,
|
||||
Arc::new(metric::Registry::default()),
|
||||
)
|
||||
.await?)
|
||||
}
|
||||
|
|
|
@ -5,5 +5,6 @@ authors = ["Paul Dix <paul@pauldix.net>"]
|
|||
edition = "2021"
|
||||
|
||||
[dependencies] # In alphabetical order
|
||||
metric = { path = "../metric" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
use std::{fmt, panic, sync::Arc};
|
||||
|
||||
use metric::U64Counter;
|
||||
use observability_deps::tracing::{error, warn};
|
||||
use panic::PanicInfo;
|
||||
|
||||
|
@ -24,6 +25,7 @@ type PanicFunctionPtr = Arc<Box<dyn Fn(&PanicInfo<'_>) + Sync + Send + 'static>>
|
|||
/// prior panic hook.
|
||||
///
|
||||
/// Upon drop, restores the pre-existing panic hook
|
||||
#[derive(Default)]
|
||||
pub struct SendPanicsToTracing {
|
||||
/// The previously installed panic hook -- Note it is wrapped in an
|
||||
/// `Option` so we can `.take` it during the call to `drop()`;
|
||||
|
@ -40,12 +42,23 @@ impl SendPanicsToTracing {
|
|||
|
||||
Self { old_panic_hook }
|
||||
}
|
||||
}
|
||||
|
||||
// recommended by clippy
|
||||
impl Default for SendPanicsToTracing {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
/// Configure this panic handler to emit a panic count metric.
|
||||
///
|
||||
/// The metric is named `thread_panic_count_total` and is incremented each
|
||||
/// time the panic handler is invoked.
|
||||
pub fn with_metrics(self, metrics: &metric::Registry) -> Self {
|
||||
let panic_count = metrics
|
||||
.register_metric::<U64Counter>("thread_panic_count", "number of thread panics observed")
|
||||
.recorder(&[]);
|
||||
|
||||
let old_hook = Arc::clone(self.old_panic_hook.as_ref().expect("no hook set"));
|
||||
panic::set_hook(Box::new(move |info| {
|
||||
panic_count.inc(1);
|
||||
tracing_panic_hook(&old_hook, info)
|
||||
}));
|
||||
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,3 +110,36 @@ fn tracing_panic_hook(other_hook: &PanicFunctionPtr, panic_info: &PanicInfo<'_>)
|
|||
// panic function)
|
||||
other_hook(panic_info)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use metric::{Attributes, Metric};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn assert_count(metrics: &metric::Registry, count: u64) {
|
||||
let got = metrics
|
||||
.get_instrument::<Metric<U64Counter>>("thread_panic_count")
|
||||
.expect("failed to read metric")
|
||||
.get_observer(&Attributes::from(&[]))
|
||||
.expect("failed to get observer")
|
||||
.fetch();
|
||||
assert_eq!(got, count);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_panic_counter() {
|
||||
let metrics = metric::Registry::default();
|
||||
let _guard = SendPanicsToTracing::new().with_metrics(&metrics);
|
||||
|
||||
assert_count(&metrics, 0);
|
||||
|
||||
std::thread::spawn(|| {
|
||||
panic!("it's bananas");
|
||||
})
|
||||
.join()
|
||||
.expect_err("wat");
|
||||
|
||||
assert_count(&metrics, 1);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue