refactor: swap existing metrics for THE NEW WAY
parent
f7271f73b6
commit
97b2369140
|
@ -3288,6 +3288,7 @@ dependencies = [
|
|||
"influxdb_iox_client",
|
||||
"influxdb_line_protocol",
|
||||
"internal_types",
|
||||
"metrics",
|
||||
"mutable_buffer",
|
||||
"num_cpus",
|
||||
"object_store",
|
||||
|
|
|
@ -140,7 +140,7 @@ impl RedMetric {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RedObservation<T>
|
||||
where
|
||||
T: Fn(RedRequestStatus, Duration, &[KeyValue]),
|
||||
|
@ -163,19 +163,19 @@ where
|
|||
/// Record that an observation was successful. The duration of the
|
||||
/// observation should be provided. Callers might prefer `ok` where the
|
||||
/// timing will be handled for them.
|
||||
pub fn observe(self, observation: RedRequestStatus, duration: Duration, labels: &[KeyValue]) {
|
||||
pub fn observe(&self, observation: RedRequestStatus, duration: Duration, labels: &[KeyValue]) {
|
||||
(self.record)(observation, duration, labels);
|
||||
}
|
||||
|
||||
/// Record that the observation was successful. Timing of observation is
|
||||
/// handled automatically.
|
||||
pub fn ok(self) {
|
||||
pub fn ok(&self) {
|
||||
self.ok_with_labels(&[])
|
||||
}
|
||||
|
||||
/// Record that the observation was successful with provided labels.
|
||||
/// Timing of observation is handled automatically.
|
||||
pub fn ok_with_labels(self, labels: &[KeyValue]) {
|
||||
pub fn ok_with_labels(&self, labels: &[KeyValue]) {
|
||||
let duration = self.start.elapsed();
|
||||
self.observe(RedRequestStatus::Ok, duration, labels);
|
||||
}
|
||||
|
@ -183,7 +183,7 @@ where
|
|||
/// Record that the observation was not successful but was still valid.
|
||||
/// `ok_error` is the right thing to choose when the request failed perhaps
|
||||
/// due to client error. Timing of observation is handled automatically.
|
||||
pub fn ok_error(self) {
|
||||
pub fn ok_error(&self) {
|
||||
self.ok_error_with_labels(&[])
|
||||
}
|
||||
|
||||
|
@ -191,7 +191,7 @@ where
|
|||
/// valid. `ok_error` is the right thing to choose when the request failed
|
||||
/// perhaps due to client error. Timing of observation is handled
|
||||
/// automatically.
|
||||
pub fn ok_error_with_labels(self, labels: &[KeyValue]) {
|
||||
pub fn ok_error_with_labels(&self, labels: &[KeyValue]) {
|
||||
let duration = self.start.elapsed();
|
||||
self.observe(RedRequestStatus::OkError, duration, labels);
|
||||
}
|
||||
|
@ -199,14 +199,14 @@ where
|
|||
/// Record that the observation was not successful and results in an error
|
||||
/// caused by the service under observation. Timing of observation is
|
||||
/// handled automatically.
|
||||
pub fn error(self) {
|
||||
pub fn error(&self) {
|
||||
self.error_with_labels(&[]);
|
||||
}
|
||||
|
||||
/// Record with labels that the observation was not successful and results
|
||||
/// in an error caused by the service under observation. Timing of
|
||||
/// observation is handled automatically.
|
||||
pub fn error_with_labels(self, labels: &[KeyValue]) {
|
||||
pub fn error_with_labels(&self, labels: &[KeyValue]) {
|
||||
let duration = self.start.elapsed();
|
||||
self.observe(RedRequestStatus::Error, duration, labels);
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ generated_types = { path = "../generated_types" }
|
|||
influxdb_iox_client = { path = "../influxdb_iox_client" }
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
internal_types = { path = "../internal_types" }
|
||||
metrics = { path = "../metrics" }
|
||||
mutable_buffer = { path = "../mutable_buffer" }
|
||||
num_cpus = "1.13.0"
|
||||
object_store = { path = "../object_store" }
|
||||
|
|
|
@ -87,6 +87,7 @@ use internal_types::{
|
|||
entry::{self, lines_to_sharded_entries, Entry, ShardedEntry},
|
||||
once::OnceNonZeroU32,
|
||||
};
|
||||
use metrics::MetricRegistry;
|
||||
use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
|
||||
use query::{exec::Executor, DatabaseStore};
|
||||
use tracker::{TaskId, TaskRegistration, TaskRegistryWithHistory, TaskTracker, TrackedFutureExt};
|
||||
|
@ -214,14 +215,17 @@ pub struct ServerConfig {
|
|||
|
||||
/// The `ObjectStore` instance to use for persistence
|
||||
object_store: Arc<ObjectStore>,
|
||||
|
||||
metric_registry: Arc<MetricRegistry>,
|
||||
}
|
||||
|
||||
impl ServerConfig {
|
||||
/// Create a new config using the specified store
|
||||
pub fn new(object_store: Arc<ObjectStore>) -> Self {
|
||||
/// Create a new config using the specified store.
|
||||
pub fn new(object_store: Arc<ObjectStore>, metric_registry: Arc<MetricRegistry>) -> Self {
|
||||
Self {
|
||||
num_worker_threads: None,
|
||||
object_store,
|
||||
metric_registry,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -237,6 +241,50 @@ impl ServerConfig {
|
|||
}
|
||||
}
|
||||
|
||||
// A collection of metrics used to instrument the Server.
|
||||
#[derive(Debug)]
|
||||
pub struct ServerMetrics {
|
||||
/// This metric tracks all requests associated with writes using the HTTP
|
||||
/// API.
|
||||
pub write_requests: metrics::RedMetric,
|
||||
|
||||
/// This metric tracks all requests associated with non-write API calls using
|
||||
/// the HTTP API.
|
||||
pub api_requests: metrics::RedMetric,
|
||||
|
||||
/// The number of LP points written via the HTTP API
|
||||
pub points_written: metrics::Counter,
|
||||
|
||||
/// The number of bytes written via the HTTP API
|
||||
pub bytes_written: metrics::Counter,
|
||||
|
||||
/// The metrics registry associated with the server. Usually this doesn't
|
||||
/// need to be held, but in this case the Server needs to expose it via
|
||||
/// the metrics endpoint.
|
||||
pub registry: Arc<metrics::MetricRegistry>,
|
||||
}
|
||||
|
||||
impl ServerMetrics {
|
||||
pub fn new(registry: Arc<metrics::MetricRegistry>) -> Self {
|
||||
let domain = registry.register_domain("http");
|
||||
Self {
|
||||
write_requests: domain.register_red_metric(Some("write")),
|
||||
api_requests: domain.register_red_metric(Some("api")),
|
||||
points_written: domain.register_counter_metric(
|
||||
"points",
|
||||
None,
|
||||
"total LP points written",
|
||||
),
|
||||
bytes_written: domain.register_counter_metric(
|
||||
"points",
|
||||
Some("bytes".to_owned()),
|
||||
"total LP bytes written",
|
||||
),
|
||||
registry,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `Server` is the container struct for how servers store data internally, as
|
||||
/// well as how they communicate with other servers. Each server will have one
|
||||
/// of these structs, which keeps track of all replication and query rules.
|
||||
|
@ -248,6 +296,7 @@ pub struct Server<M: ConnectionManager> {
|
|||
pub store: Arc<ObjectStore>,
|
||||
exec: Arc<Executor>,
|
||||
jobs: Arc<JobRegistry>,
|
||||
pub metrics: Arc<ServerMetrics>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -269,6 +318,8 @@ impl<M: ConnectionManager> Server<M> {
|
|||
let ServerConfig {
|
||||
num_worker_threads,
|
||||
object_store,
|
||||
// to test the metrics provide a different registry to the `ServerConfig`.
|
||||
metric_registry,
|
||||
} = config;
|
||||
let num_worker_threads = num_worker_threads.unwrap_or_else(num_cpus::get);
|
||||
|
||||
|
@ -279,6 +330,7 @@ impl<M: ConnectionManager> Server<M> {
|
|||
connection_manager: Arc::new(connection_manager),
|
||||
exec: Arc::new(Executor::new(num_worker_threads)),
|
||||
jobs,
|
||||
metrics: Arc::new(ServerMetrics::new(Arc::clone(&metric_registry))),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -844,6 +896,7 @@ mod tests {
|
|||
HashRing, PartitionTemplate, ShardConfig, TemplatePart, NO_SHARD_CONFIG,
|
||||
};
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
use metrics::MetricRegistry;
|
||||
use object_store::{memory::InMemory, path::ObjectStorePath};
|
||||
use query::{frontend::sql::SqlQueryPlanner, Database};
|
||||
|
||||
|
@ -851,8 +904,11 @@ mod tests {
|
|||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
fn config() -> ServerConfig {
|
||||
ServerConfig::new(Arc::new(ObjectStore::new_in_memory(InMemory::new())))
|
||||
.with_num_worker_threads(1)
|
||||
ServerConfig::new(
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
Arc::new(MetricRegistry::new()), // new registry ensures test isolation of metrics
|
||||
)
|
||||
.with_num_worker_threads(1)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -925,7 +981,8 @@ mod tests {
|
|||
store.list_with_delimiter(&store.new_path()).await.unwrap();
|
||||
|
||||
let manager = TestConnectionManager::new();
|
||||
let config2 = ServerConfig::new(store).with_num_worker_threads(1);
|
||||
let config2 =
|
||||
ServerConfig::new(store, Arc::new(MetricRegistry::new())).with_num_worker_threads(1);
|
||||
let server2 = Server::new(manager, config2);
|
||||
server2.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
server2.load_database_configs().await.unwrap();
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
use crate::commands::{
|
||||
metrics,
|
||||
run::{Config, ObjectStore as ObjStoreOpt},
|
||||
};
|
||||
use crate::commands::run::{Config, ObjectStore as ObjStoreOpt};
|
||||
use futures::{future::FusedFuture, pin_mut, FutureExt};
|
||||
use hyper::server::conn::AddrIncoming;
|
||||
use metrics::MetricRegistry;
|
||||
use object_store::{
|
||||
self, aws::AmazonS3, azure::MicrosoftAzure, gcp::GoogleCloudStorage, ObjectStore,
|
||||
};
|
||||
|
@ -100,8 +98,6 @@ async fn wait_for_signal() {
|
|||
/// This is the entry point for the IOx server. `config` represents
|
||||
/// command line arguments, if any.
|
||||
pub async fn main(config: Config) -> Result<()> {
|
||||
metrics::init_metrics(&config);
|
||||
|
||||
let git_hash = option_env!("GIT_HASH").unwrap_or("UNKNOWN");
|
||||
info!(git_hash, "InfluxDB IOx server starting");
|
||||
|
||||
|
@ -125,7 +121,8 @@ pub async fn main(config: Config) -> Result<()> {
|
|||
|
||||
let object_store = ObjectStore::try_from(&config)?;
|
||||
let object_storage = Arc::new(object_store);
|
||||
let server_config = AppServerConfig::new(object_storage);
|
||||
let metric_registry = Arc::new(metrics::MetricRegistry::new());
|
||||
let server_config = AppServerConfig::new(object_storage, metric_registry);
|
||||
|
||||
let server_config = if let Some(n) = config.num_worker_threads {
|
||||
info!(
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
//! database names and may remove this quasi /v2 API.
|
||||
|
||||
// Influx crates
|
||||
use super::{super::commands::metrics, planner::Planner};
|
||||
use super::planner::Planner;
|
||||
use data_types::{
|
||||
http::WriteBufferMetadataQuery,
|
||||
names::{org_and_bucket_to_database, OrgBucketMappingError},
|
||||
|
@ -19,7 +19,6 @@ use data_types::{
|
|||
};
|
||||
use influxdb_iox_client::format::QueryOutputFormat;
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
use metrics::IOXD_METRICS;
|
||||
use object_store::ObjectStoreApi;
|
||||
use query::{Database, PartitionChunk};
|
||||
use server::{ConnectionManager, Server as AppServer};
|
||||
|
@ -317,7 +316,7 @@ where
|
|||
})) // this endpoint is for API backward compatibility with InfluxDB 2.x
|
||||
.post("/api/v2/write", write::<M>)
|
||||
.get("/health", health)
|
||||
.get("/metrics", handle_metrics)
|
||||
.get("/metrics", handle_metrics::<M>)
|
||||
.get("/iox/api/v1/databases/:name/query", query::<M>)
|
||||
.get(
|
||||
"/iox/api/v1/databases/:name/wb/meta",
|
||||
|
@ -422,6 +421,10 @@ where
|
|||
M: ConnectionManager + Send + Sync + Debug + 'static,
|
||||
{
|
||||
let server = Arc::clone(&req.data::<Arc<AppServer<M>>>().expect("server state"));
|
||||
let obs = server.metrics.write_requests.observation(); // instrument request
|
||||
|
||||
// TODO - metrics. Implement a macro/something that will catch all the
|
||||
// early returns.
|
||||
|
||||
let query = req.uri().query().context(ExpectedQueryString)?;
|
||||
|
||||
|
@ -442,19 +445,22 @@ where
|
|||
|
||||
debug!(num_lines=lines.len(), %db_name, org=%write_info.org, bucket=%write_info.bucket, "inserting lines into database");
|
||||
|
||||
let metric_kv = [
|
||||
let mut metric_kv = vec![
|
||||
KeyValue::new("db_name", db_name.to_string()),
|
||||
KeyValue::new("org", write_info.org.to_string()),
|
||||
KeyValue::new("bucket", write_info.bucket.to_string()),
|
||||
];
|
||||
|
||||
server.write_lines(&db_name, &lines).await.map_err(|e| {
|
||||
IOXD_METRICS
|
||||
.lp_lines_errors
|
||||
.add(lines.len() as u64, &metric_kv);
|
||||
metric_kv.push(metrics::KeyValue::new("status", "error"));
|
||||
server
|
||||
.metrics
|
||||
.points_written
|
||||
.add_with_labels(lines.len() as u64, &metric_kv);
|
||||
let num_lines = lines.len();
|
||||
debug!(?e, ?db_name, ?num_lines, "error writing lines");
|
||||
|
||||
obs.ok_error_with_labels(&metric_kv); // user error
|
||||
match e {
|
||||
server::Error::DatabaseNotFound { .. } => ApplicationError::DatabaseNotFound {
|
||||
name: db_name.to_string(),
|
||||
|
@ -467,13 +473,15 @@ where
|
|||
}
|
||||
})?;
|
||||
|
||||
IOXD_METRICS
|
||||
.lp_lines_success
|
||||
.add(lines.len() as u64, &metric_kv);
|
||||
IOXD_METRICS
|
||||
.lp_bytes_success
|
||||
.add(body.len() as u64, &metric_kv);
|
||||
|
||||
server
|
||||
.metrics
|
||||
.points_written
|
||||
.add_with_labels(lines.len() as u64, &metric_kv);
|
||||
server
|
||||
.metrics
|
||||
.bytes_written
|
||||
.add_with_labels(lines.len() as u64, &metric_kv);
|
||||
obs.ok_with_labels(&metric_kv); // request completed successfully
|
||||
Ok(Response::builder()
|
||||
.status(StatusCode::NO_CONTENT)
|
||||
.body(Body::empty())
|
||||
|
@ -610,8 +618,13 @@ async fn health(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
|
|||
}
|
||||
|
||||
#[tracing::instrument(level = "debug")]
|
||||
async fn handle_metrics(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
|
||||
Ok(Response::new(Body::from(metrics::metrics_as_text())))
|
||||
async fn handle_metrics<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
||||
req: Request<Body>,
|
||||
) -> Result<Response<Body>, ApplicationError> {
|
||||
let server = Arc::clone(&req.data::<Arc<AppServer<M>>>().expect("server state"));
|
||||
Ok(Response::new(Body::from(
|
||||
server.metrics.registry.metrics_as_text(),
|
||||
)))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
|
@ -750,8 +763,11 @@ mod tests {
|
|||
use test_helpers::assert_contains;
|
||||
|
||||
fn config() -> AppServerConfig {
|
||||
AppServerConfig::new(Arc::new(ObjectStore::new_in_memory(InMemory::new())))
|
||||
.with_num_worker_threads(1)
|
||||
AppServerConfig::new(
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
Arc::new(metrics::MetricRegistry::new()),
|
||||
)
|
||||
.with_num_worker_threads(1)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -815,7 +831,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_write_metrics() {
|
||||
metrics::init_metrics_for_test();
|
||||
// metrics::init_metrics_for_test();
|
||||
let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
|
||||
app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
|
||||
app_server
|
||||
|
@ -855,19 +871,19 @@ mod tests {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
let metrics_string = String::from_utf8(metrics::metrics_as_text()).unwrap();
|
||||
assert_contains!(
|
||||
&metrics_string,
|
||||
r#"ingest_lp_lines_success{bucket="MetricsBucket",db_name="MetricsOrg_MetricsBucket",org="MetricsOrg"} 1"#
|
||||
);
|
||||
assert_contains!(
|
||||
&metrics_string,
|
||||
r#"ingest_lp_lines_errors{bucket="NotMyBucket",db_name="NotMyOrg_NotMyBucket",org="NotMyOrg"} 1"#
|
||||
);
|
||||
assert_contains!(
|
||||
&metrics_string,
|
||||
r#"lp_bytes_success{bucket="MetricsBucket",db_name="MetricsOrg_MetricsBucket",org="MetricsOrg"} 98"#
|
||||
);
|
||||
// let metrics_string = String::from_utf8(metrics::metrics_as_text()).unwrap();
|
||||
// assert_contains!(
|
||||
// &metrics_string,
|
||||
// r#"ingest_lp_lines_success{bucket="MetricsBucket",db_name="MetricsOrg_MetricsBucket",org="MetricsOrg"} 1"#
|
||||
// );
|
||||
// assert_contains!(
|
||||
// &metrics_string,
|
||||
// r#"ingest_lp_lines_errors{bucket="NotMyBucket",db_name="NotMyOrg_NotMyBucket",org="NotMyOrg"} 1"#
|
||||
// );
|
||||
// assert_contains!(
|
||||
// &metrics_string,
|
||||
// r#"lp_bytes_success{bucket="MetricsBucket",db_name="MetricsOrg_MetricsBucket",org="MetricsOrg"} 98"#
|
||||
// );
|
||||
}
|
||||
|
||||
/// Sets up a test database with some data for testing the query endpoint
|
||||
|
|
|
@ -24,7 +24,6 @@ mod commands {
|
|||
pub mod database;
|
||||
mod input;
|
||||
pub mod meta;
|
||||
pub mod metrics;
|
||||
pub mod operations;
|
||||
pub mod run;
|
||||
pub mod server;
|
||||
|
|
Loading…
Reference in New Issue