feat: log cancelled HTTP/gRPC requests (#5980)

This will be helpful to see when the querier or router is too slow and
we timeout. In contrast to the existing metrics, this also helps w/ log
correlation (i.e. "when did we get stuck").

Closes #5975.

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Marco Neumann 2022-10-27 07:33:22 +00:00 committed by GitHub
parent d466a04ad8
commit b438e4b18a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 18 deletions

View File

@ -283,6 +283,10 @@ impl RequestLogContext {
pub(crate) fn new(ctx: SpanContext) -> Self {
Self(ctx)
}
pub fn ctx(&self) -> &SpanContext {
&self.0
}
}
/// Format span context as Jaeger trace context.

View File

@ -20,14 +20,14 @@ use std::task::{Context, Poll};
use futures::ready;
use http::{HeaderValue, Request, Response};
use http_body::SizeHint;
use pin_project::pin_project;
use pin_project::{pin_project, pinned_drop};
use tower::{Layer, Service};
use observability_deps::tracing::error;
use observability_deps::tracing::{error, warn};
use trace::{span::SpanRecorder, TraceCollector};
use crate::classify::{classify_headers, classify_response, Classification};
use crate::ctx::{RequestLogContext, TraceHeaderParser};
use crate::ctx::{RequestLogContext, RequestLogContextExt, TraceHeaderParser};
use crate::metrics::{MetricsCollection, MetricsRecorder};
/// `TraceLayer` implements `tower::Layer` and can be used to decorate a
@ -99,23 +99,16 @@ where
fn call(&mut self, mut request: Request<ReqBody>) -> Self::Future {
let metrics_recorder = Some(self.metrics.recorder(&request));
let span = match self
let request_ctx = match self
.trace_header_parser
.parse(self.collector.as_ref(), request.headers())
{
Ok(Some(ctx)) => {
request
.extensions_mut()
.insert(RequestLogContext::new(ctx.clone()));
let ctx = RequestLogContext::new(ctx);
(ctx.sampled && ctx.collector.is_some()).then(|| {
let span = ctx.child("IOx");
request.extensions_mut().insert(ctx.clone());
// Add context to request for use by service handlers
request.extensions_mut().insert(span.ctx.clone());
span
})
Some(ctx)
}
Ok(None) => None,
Err(e) => {
@ -124,9 +117,24 @@ where
}
};
let span = request_ctx.as_ref().and_then(|ctx| {
let ctx = ctx.ctx();
(ctx.sampled && ctx.collector.is_some()).then(|| {
let span = ctx.child("IOx");
// Add context to request for use by service handlers
request.extensions_mut().insert(span.ctx.clone());
span
})
});
TracedFuture {
request_ctx,
metrics_recorder,
span_recorder: SpanRecorder::new(span),
was_ready: false,
inner: self.service.call(request),
}
}
@ -134,15 +142,31 @@ where
/// `TracedFuture` wraps a future returned by a `tower::Service` and
/// instruments the returned body if any
#[pin_project]
#[pin_project(PinnedDrop)]
#[derive(Debug)]
pub struct TracedFuture<F> {
request_ctx: Option<RequestLogContext>,
span_recorder: SpanRecorder,
metrics_recorder: Option<MetricsRecorder>,
was_ready: bool,
#[pin]
inner: F,
}
#[pinned_drop]
impl<F> PinnedDrop for TracedFuture<F> {
fn drop(self: Pin<&mut Self>) {
if !self.was_ready {
let trace = self.request_ctx.format_jaeger();
warn!(
%trace,
when="before returning headers",
"request cancelled",
);
}
}
}
impl<F, ResBody, Error> Future for TracedFuture<F>
where
F: Future<Output = Result<Response<ResBody>, Error>>,
@ -155,6 +179,7 @@ where
ready!(self.as_mut().project().inner.poll(cx));
let projected = self.as_mut().project();
*projected.was_ready = true;
let span_recorder = projected.span_recorder;
let mut metrics_recorder = projected.metrics_recorder.take().unwrap();
match &result {
@ -180,7 +205,9 @@ where
match result {
Ok(mut response) => {
// add trace-id header to the response, if we have one
let span_recorder = self.as_mut().project().span_recorder.take();
let projected = self.as_mut().project();
let request_ctx = projected.request_ctx.take();
let span_recorder = projected.span_recorder.take();
if let Some(trace_id) = span_recorder.span().map(|span| span.ctx.trace_id) {
// format as hex
let trace_id = HeaderValue::from_str(&format!("{:x}", trace_id.get())).unwrap();
@ -188,7 +215,10 @@ where
}
Poll::Ready(Ok(response.map(|body| TracedBody {
request_ctx,
span_recorder,
was_done_data: false,
was_ready_trailers: false,
inner: body,
metrics_recorder,
})))
@ -199,15 +229,39 @@ where
}
/// `TracedBody` wraps a `http_body::Body` and instruments it
#[pin_project]
#[pin_project(PinnedDrop)]
#[derive(Debug)]
pub struct TracedBody<B> {
request_ctx: Option<RequestLogContext>,
span_recorder: SpanRecorder,
metrics_recorder: MetricsRecorder,
was_done_data: bool,
was_ready_trailers: bool,
#[pin]
inner: B,
}
#[pinned_drop]
impl<B> PinnedDrop for TracedBody<B> {
fn drop(self: Pin<&mut Self>) {
if !self.was_done_data {
let trace = self.request_ctx.format_jaeger();
warn!(
%trace,
when="before fully returning body data",
"request cancelled",
);
} else if !self.was_ready_trailers {
let trace = self.request_ctx.format_jaeger();
warn!(
%trace,
when="before returning trailers",
"request cancelled",
);
}
}
}
impl<B: http_body::Body> http_body::Body for TracedBody<B> {
type Data = B::Data;
type Error = B::Error;
@ -219,7 +273,10 @@ impl<B: http_body::Body> http_body::Body for TracedBody<B> {
let maybe_result = ready!(self.as_mut().project().inner.poll_data(cx));
let result = match maybe_result {
Some(result) => result,
None => return Poll::Ready(None),
None => {
*self.as_mut().project().was_done_data = true;
return Poll::Ready(None);
}
};
let projected = self.as_mut().project();
@ -249,6 +306,7 @@ impl<B: http_body::Body> http_body::Body for TracedBody<B> {
ready!(self.as_mut().project().inner.poll_trailers(cx));
let projected = self.as_mut().project();
*projected.was_ready_trailers = true;
let span_recorder = projected.span_recorder;
let metrics_recorder = projected.metrics_recorder;
match &result {