feat: log cancelled HTTP/gRPC requests (#5980)
This will be helpful to see when the querier or router is too slow and we timeout. In contrast to the existing metrics, this also helps w/ log correlation (i.e. "when did we get stuck"). Closes #5975. Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>pull/24376/head
parent
d466a04ad8
commit
b438e4b18a
|
@ -283,6 +283,10 @@ impl RequestLogContext {
|
|||
pub(crate) fn new(ctx: SpanContext) -> Self {
|
||||
Self(ctx)
|
||||
}
|
||||
|
||||
pub fn ctx(&self) -> &SpanContext {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Format span context as Jaeger trace context.
|
||||
|
|
|
@ -20,14 +20,14 @@ use std::task::{Context, Poll};
|
|||
use futures::ready;
|
||||
use http::{HeaderValue, Request, Response};
|
||||
use http_body::SizeHint;
|
||||
use pin_project::pin_project;
|
||||
use pin_project::{pin_project, pinned_drop};
|
||||
use tower::{Layer, Service};
|
||||
|
||||
use observability_deps::tracing::error;
|
||||
use observability_deps::tracing::{error, warn};
|
||||
use trace::{span::SpanRecorder, TraceCollector};
|
||||
|
||||
use crate::classify::{classify_headers, classify_response, Classification};
|
||||
use crate::ctx::{RequestLogContext, TraceHeaderParser};
|
||||
use crate::ctx::{RequestLogContext, RequestLogContextExt, TraceHeaderParser};
|
||||
use crate::metrics::{MetricsCollection, MetricsRecorder};
|
||||
|
||||
/// `TraceLayer` implements `tower::Layer` and can be used to decorate a
|
||||
|
@ -99,23 +99,16 @@ where
|
|||
fn call(&mut self, mut request: Request<ReqBody>) -> Self::Future {
|
||||
let metrics_recorder = Some(self.metrics.recorder(&request));
|
||||
|
||||
let span = match self
|
||||
let request_ctx = match self
|
||||
.trace_header_parser
|
||||
.parse(self.collector.as_ref(), request.headers())
|
||||
{
|
||||
Ok(Some(ctx)) => {
|
||||
request
|
||||
.extensions_mut()
|
||||
.insert(RequestLogContext::new(ctx.clone()));
|
||||
let ctx = RequestLogContext::new(ctx);
|
||||
|
||||
(ctx.sampled && ctx.collector.is_some()).then(|| {
|
||||
let span = ctx.child("IOx");
|
||||
request.extensions_mut().insert(ctx.clone());
|
||||
|
||||
// Add context to request for use by service handlers
|
||||
request.extensions_mut().insert(span.ctx.clone());
|
||||
|
||||
span
|
||||
})
|
||||
Some(ctx)
|
||||
}
|
||||
Ok(None) => None,
|
||||
Err(e) => {
|
||||
|
@ -124,9 +117,24 @@ where
|
|||
}
|
||||
};
|
||||
|
||||
let span = request_ctx.as_ref().and_then(|ctx| {
|
||||
let ctx = ctx.ctx();
|
||||
|
||||
(ctx.sampled && ctx.collector.is_some()).then(|| {
|
||||
let span = ctx.child("IOx");
|
||||
|
||||
// Add context to request for use by service handlers
|
||||
request.extensions_mut().insert(span.ctx.clone());
|
||||
|
||||
span
|
||||
})
|
||||
});
|
||||
|
||||
TracedFuture {
|
||||
request_ctx,
|
||||
metrics_recorder,
|
||||
span_recorder: SpanRecorder::new(span),
|
||||
was_ready: false,
|
||||
inner: self.service.call(request),
|
||||
}
|
||||
}
|
||||
|
@ -134,15 +142,31 @@ where
|
|||
|
||||
/// `TracedFuture` wraps a future returned by a `tower::Service` and
|
||||
/// instruments the returned body if any
|
||||
#[pin_project]
|
||||
#[pin_project(PinnedDrop)]
|
||||
#[derive(Debug)]
|
||||
pub struct TracedFuture<F> {
|
||||
request_ctx: Option<RequestLogContext>,
|
||||
span_recorder: SpanRecorder,
|
||||
metrics_recorder: Option<MetricsRecorder>,
|
||||
was_ready: bool,
|
||||
#[pin]
|
||||
inner: F,
|
||||
}
|
||||
|
||||
#[pinned_drop]
|
||||
impl<F> PinnedDrop for TracedFuture<F> {
|
||||
fn drop(self: Pin<&mut Self>) {
|
||||
if !self.was_ready {
|
||||
let trace = self.request_ctx.format_jaeger();
|
||||
warn!(
|
||||
%trace,
|
||||
when="before returning headers",
|
||||
"request cancelled",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<F, ResBody, Error> Future for TracedFuture<F>
|
||||
where
|
||||
F: Future<Output = Result<Response<ResBody>, Error>>,
|
||||
|
@ -155,6 +179,7 @@ where
|
|||
ready!(self.as_mut().project().inner.poll(cx));
|
||||
|
||||
let projected = self.as_mut().project();
|
||||
*projected.was_ready = true;
|
||||
let span_recorder = projected.span_recorder;
|
||||
let mut metrics_recorder = projected.metrics_recorder.take().unwrap();
|
||||
match &result {
|
||||
|
@ -180,7 +205,9 @@ where
|
|||
match result {
|
||||
Ok(mut response) => {
|
||||
// add trace-id header to the response, if we have one
|
||||
let span_recorder = self.as_mut().project().span_recorder.take();
|
||||
let projected = self.as_mut().project();
|
||||
let request_ctx = projected.request_ctx.take();
|
||||
let span_recorder = projected.span_recorder.take();
|
||||
if let Some(trace_id) = span_recorder.span().map(|span| span.ctx.trace_id) {
|
||||
// format as hex
|
||||
let trace_id = HeaderValue::from_str(&format!("{:x}", trace_id.get())).unwrap();
|
||||
|
@ -188,7 +215,10 @@ where
|
|||
}
|
||||
|
||||
Poll::Ready(Ok(response.map(|body| TracedBody {
|
||||
request_ctx,
|
||||
span_recorder,
|
||||
was_done_data: false,
|
||||
was_ready_trailers: false,
|
||||
inner: body,
|
||||
metrics_recorder,
|
||||
})))
|
||||
|
@ -199,15 +229,39 @@ where
|
|||
}
|
||||
|
||||
/// `TracedBody` wraps a `http_body::Body` and instruments it
|
||||
#[pin_project]
|
||||
#[pin_project(PinnedDrop)]
|
||||
#[derive(Debug)]
|
||||
pub struct TracedBody<B> {
|
||||
request_ctx: Option<RequestLogContext>,
|
||||
span_recorder: SpanRecorder,
|
||||
metrics_recorder: MetricsRecorder,
|
||||
was_done_data: bool,
|
||||
was_ready_trailers: bool,
|
||||
#[pin]
|
||||
inner: B,
|
||||
}
|
||||
|
||||
#[pinned_drop]
|
||||
impl<B> PinnedDrop for TracedBody<B> {
|
||||
fn drop(self: Pin<&mut Self>) {
|
||||
if !self.was_done_data {
|
||||
let trace = self.request_ctx.format_jaeger();
|
||||
warn!(
|
||||
%trace,
|
||||
when="before fully returning body data",
|
||||
"request cancelled",
|
||||
);
|
||||
} else if !self.was_ready_trailers {
|
||||
let trace = self.request_ctx.format_jaeger();
|
||||
warn!(
|
||||
%trace,
|
||||
when="before returning trailers",
|
||||
"request cancelled",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: http_body::Body> http_body::Body for TracedBody<B> {
|
||||
type Data = B::Data;
|
||||
type Error = B::Error;
|
||||
|
@ -219,7 +273,10 @@ impl<B: http_body::Body> http_body::Body for TracedBody<B> {
|
|||
let maybe_result = ready!(self.as_mut().project().inner.poll_data(cx));
|
||||
let result = match maybe_result {
|
||||
Some(result) => result,
|
||||
None => return Poll::Ready(None),
|
||||
None => {
|
||||
*self.as_mut().project().was_done_data = true;
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
};
|
||||
|
||||
let projected = self.as_mut().project();
|
||||
|
@ -249,6 +306,7 @@ impl<B: http_body::Body> http_body::Body for TracedBody<B> {
|
|||
ready!(self.as_mut().project().inner.poll_trailers(cx));
|
||||
|
||||
let projected = self.as_mut().project();
|
||||
*projected.was_ready_trailers = true;
|
||||
let span_recorder = projected.span_recorder;
|
||||
let metrics_recorder = projected.metrics_recorder;
|
||||
match &result {
|
||||
|
|
Loading…
Reference in New Issue