feat: report system stats in load generator (#24871)
* feat: report system stats in load generator Added the mechanism to report system stats during load generation. The following stats are saved in a CSV file: - cpu_usage - disk_written_bytes - disk_read_bytes - memory - virtual_memory This only works when running the load generator against a local instance of influxdb3, i.e., one that is running on your machine. Generating system stats is done by passing the --system-stats flag to the load generator.pull/24876/head
parent
1b3d279d70
commit
2dde602995
|
@ -2555,12 +2555,14 @@ dependencies = [
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
"humantime",
|
"humantime",
|
||||||
"influxdb3_client",
|
"influxdb3_client",
|
||||||
|
"influxdb3_process",
|
||||||
"observability_deps",
|
"observability_deps",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"rand",
|
"rand",
|
||||||
"secrecy",
|
"secrecy",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"sysinfo",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
"trogging",
|
"trogging",
|
||||||
|
@ -4435,7 +4437,7 @@ checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"heck 0.4.1",
|
"heck 0.4.1",
|
||||||
"itertools 0.10.5",
|
"itertools 0.11.0",
|
||||||
"log",
|
"log",
|
||||||
"multimap",
|
"multimap",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
@ -4469,7 +4471,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e"
|
checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"itertools 0.10.5",
|
"itertools 0.11.0",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.53",
|
"syn 2.0.53",
|
||||||
|
@ -5739,9 +5741,9 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sysinfo"
|
name = "sysinfo"
|
||||||
version = "0.30.7"
|
version = "0.30.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0c385888ef380a852a16209afc8cfad22795dd8873d69c9a14d2e2088f118d18"
|
checksum = "4b1a378e48fb3ce3a5cf04359c456c9c98ff689bcf1c1bc6e6a31f247686f275"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"core-foundation-sys",
|
"core-foundation-sys",
|
||||||
|
|
|
@ -86,6 +86,7 @@ serde_urlencoded = "0.7.0"
|
||||||
sha2 = "0.10.8"
|
sha2 = "0.10.8"
|
||||||
snap = "1.0.0"
|
snap = "1.0.0"
|
||||||
sqlparser = "0.41.0"
|
sqlparser = "0.41.0"
|
||||||
|
sysinfo = "0.30.8"
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
tokio = { version = "1.35", features = ["full"] }
|
tokio = { version = "1.35", features = ["full"] }
|
||||||
tokio-util = "0.7.9"
|
tokio-util = "0.7.9"
|
||||||
|
|
|
@ -5,8 +5,6 @@ authors.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
# Core Crates
|
# Core Crates
|
||||||
observability_deps.workspace = true
|
observability_deps.workspace = true
|
||||||
|
@ -14,6 +12,7 @@ trogging.workspace = true
|
||||||
|
|
||||||
# Local Deps
|
# Local Deps
|
||||||
influxdb3_client = { path = "../influxdb3_client" }
|
influxdb3_client = { path = "../influxdb3_client" }
|
||||||
|
influxdb3_process = { path = "../influxdb3_process", default-features = false }
|
||||||
|
|
||||||
# crates.io Dependencies
|
# crates.io Dependencies
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
@ -28,6 +27,7 @@ rand.workspace = true
|
||||||
secrecy.workspace = true
|
secrecy.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
|
sysinfo.workspace = true
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
thiserror.workspace = true
|
thiserror.workspace = true
|
||||||
url.workspace = true
|
url.workspace = true
|
||||||
|
|
|
@ -10,7 +10,7 @@ use tokio::time::Instant;
|
||||||
use crate::{
|
use crate::{
|
||||||
commands::common::LoadType,
|
commands::common::LoadType,
|
||||||
query_generator::{create_queriers, Format, Querier},
|
query_generator::{create_queriers, Format, Querier},
|
||||||
report::QueryReporter,
|
report::{QueryReporter, SystemStatsReporter},
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::common::InfluxDb3Config;
|
use super::common::InfluxDb3Config;
|
||||||
|
@ -62,12 +62,28 @@ pub(crate) async fn command(config: Config) -> Result<(), anyhow::Error> {
|
||||||
|
|
||||||
// set up a results reporter and spawn a thread to flush results
|
// set up a results reporter and spawn a thread to flush results
|
||||||
println!("generating results in: {results_file_path}");
|
println!("generating results in: {results_file_path}");
|
||||||
let query_reporter = Arc::new(QueryReporter::new(results_file)?);
|
let query_reporter = Arc::new(QueryReporter::new(results_file));
|
||||||
let reporter = Arc::clone(&query_reporter);
|
let reporter = Arc::clone(&query_reporter);
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
reporter.flush_reports();
|
reporter.flush_reports();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// spawn system stats collection
|
||||||
|
let stats_reporter = if let (Some(stats_file), Some(stats_file_path)) = (
|
||||||
|
load_config.system_stats_file,
|
||||||
|
load_config.system_stats_file_path,
|
||||||
|
) {
|
||||||
|
println!("generating system stats in: {stats_file_path}");
|
||||||
|
let stats_reporter = Arc::new(SystemStatsReporter::new(stats_file)?);
|
||||||
|
let s = Arc::clone(&stats_reporter);
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
s.report_stats();
|
||||||
|
});
|
||||||
|
Some((stats_file_path, stats_reporter))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
// create a InfluxDB Client and spawn tasks for each querier
|
// create a InfluxDB Client and spawn tasks for each querier
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
for querier in queriers {
|
for querier in queriers {
|
||||||
|
@ -92,6 +108,11 @@ pub(crate) async fn command(config: Config) -> Result<(), anyhow::Error> {
|
||||||
query_reporter.shutdown();
|
query_reporter.shutdown();
|
||||||
println!("results saved in: {results_file_path}");
|
println!("results saved in: {results_file_path}");
|
||||||
|
|
||||||
|
if let Some((stats_file_path, stats_reporter)) = stats_reporter {
|
||||||
|
println!("system stats saved in: {stats_file_path}");
|
||||||
|
stats_reporter.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::commands::common::LoadType;
|
use crate::commands::common::LoadType;
|
||||||
use crate::line_protocol_generator::{create_generators, Generator};
|
use crate::line_protocol_generator::{create_generators, Generator};
|
||||||
use crate::report::WriteReporter;
|
use crate::report::{SystemStatsReporter, WriteReporter};
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use chrono::{DateTime, Local};
|
use chrono::{DateTime, Local};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
@ -112,7 +112,21 @@ pub(crate) async fn command(config: Config) -> Result<(), anyhow::Error> {
|
||||||
reporter.flush_reports();
|
reporter.flush_reports();
|
||||||
});
|
});
|
||||||
|
|
||||||
// TODO - spawn system stats collection
|
// spawn system stats collection
|
||||||
|
let stats_reporter = if let (Some(stats_file), Some(stats_file_path)) = (
|
||||||
|
load_config.system_stats_file,
|
||||||
|
load_config.system_stats_file_path,
|
||||||
|
) {
|
||||||
|
println!("generating system stats in: {stats_file_path}");
|
||||||
|
let stats_reporter = Arc::new(SystemStatsReporter::new(stats_file)?);
|
||||||
|
let s = Arc::clone(&stats_reporter);
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
s.report_stats();
|
||||||
|
});
|
||||||
|
Some((stats_file_path, stats_reporter))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
// spawn tokio tasks for each writer
|
// spawn tokio tasks for each writer
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
|
@ -140,6 +154,11 @@ pub(crate) async fn command(config: Config) -> Result<(), anyhow::Error> {
|
||||||
write_reporter.shutdown();
|
write_reporter.shutdown();
|
||||||
println!("results saved in: {results_file_path}");
|
println!("results saved in: {results_file_path}");
|
||||||
|
|
||||||
|
if let Some((stats_file_path, stats_reporter)) = stats_reporter {
|
||||||
|
println!("system stats saved in: {stats_file_path}");
|
||||||
|
stats_reporter.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,15 @@
|
||||||
|
|
||||||
use crate::line_protocol_generator::{WriteSummary, WriterId};
|
use crate::line_protocol_generator::{WriteSummary, WriterId};
|
||||||
use crate::query_generator::QuerierId;
|
use crate::query_generator::QuerierId;
|
||||||
use anyhow::Context;
|
use anyhow::{bail, Context};
|
||||||
use chrono::{DateTime, Local};
|
use chrono::{DateTime, Local};
|
||||||
|
use influxdb3_process::INFLUXDB3_PROCESS_NAME;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
use sysinfo::{Pid, Process, ProcessRefreshKind, System};
|
||||||
// Logged reports will be flushed to the csv file on this interval
|
// Logged reports will be flushed to the csv file on this interval
|
||||||
const REPORT_FLUSH_INTERVAL: Duration = Duration::from_millis(100);
|
const REPORT_FLUSH_INTERVAL: Duration = Duration::from_millis(100);
|
||||||
|
|
||||||
|
@ -216,13 +218,13 @@ pub struct QueryReporter {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl QueryReporter {
|
impl QueryReporter {
|
||||||
pub fn new(csv_file: File) -> Result<Self, anyhow::Error> {
|
pub fn new(csv_file: File) -> Self {
|
||||||
let csv_writer = Mutex::new(csv::Writer::from_writer(csv_file));
|
let csv_writer = Mutex::new(csv::Writer::from_writer(csv_file));
|
||||||
Ok(Self {
|
Self {
|
||||||
state: Mutex::new(vec![]),
|
state: Mutex::new(vec![]),
|
||||||
csv_writer,
|
csv_writer,
|
||||||
shutdown: Mutex::new(false),
|
shutdown: Mutex::new(false),
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn report(
|
pub fn report(
|
||||||
|
@ -278,8 +280,8 @@ impl QueryReporter {
|
||||||
}
|
}
|
||||||
csv_writer.flush().expect("failed to flush csv reports");
|
csv_writer.flush().expect("failed to flush csv reports");
|
||||||
|
|
||||||
if console_stats.last_console_outptu_time.elapsed() > CONSOLE_REPORT_INTERVAL {
|
if console_stats.last_console_output_time.elapsed() > CONSOLE_REPORT_INTERVAL {
|
||||||
let elapsed_millis = console_stats.last_console_outptu_time.elapsed().as_millis();
|
let elapsed_millis = console_stats.last_console_output_time.elapsed().as_millis();
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"success: {:.0}/s, error: {:.0}/s, rows: {:.0}/s",
|
"success: {:.0}/s, error: {:.0}/s, rows: {:.0}/s",
|
||||||
|
@ -315,7 +317,7 @@ struct QueryRecord {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct QueryConsoleStats {
|
struct QueryConsoleStats {
|
||||||
last_console_outptu_time: Instant,
|
last_console_output_time: Instant,
|
||||||
success: usize,
|
success: usize,
|
||||||
error: usize,
|
error: usize,
|
||||||
rows: u64,
|
rows: u64,
|
||||||
|
@ -324,10 +326,103 @@ struct QueryConsoleStats {
|
||||||
impl QueryConsoleStats {
|
impl QueryConsoleStats {
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
last_console_outptu_time: Instant::now(),
|
last_console_output_time: Instant::now(),
|
||||||
success: 0,
|
success: 0,
|
||||||
error: 0,
|
error: 0,
|
||||||
rows: 0,
|
rows: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SYSTEM_STATS_REPORT_INTERVAL: Duration = Duration::from_millis(500);
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, Serialize)]
|
||||||
|
pub struct SystemStatsRecord {
|
||||||
|
wall_time: DateTime<Local>,
|
||||||
|
test_time_ms: u128,
|
||||||
|
cpu_usage: f32,
|
||||||
|
written_bytes: u64,
|
||||||
|
read_bytes: u64,
|
||||||
|
memory_bytes: u64,
|
||||||
|
virtual_memory_bytes: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SystemStatsReporter {
|
||||||
|
pid: Pid,
|
||||||
|
system: Mutex<System>,
|
||||||
|
csv_writer: Mutex<csv::Writer<File>>,
|
||||||
|
shutdown: Mutex<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SystemStatsReporter {
|
||||||
|
pub fn new(csv_file: File) -> Result<Self, anyhow::Error> {
|
||||||
|
let csv_writer = Mutex::new(csv::Writer::from_writer(csv_file));
|
||||||
|
let mut system = System::new_all();
|
||||||
|
let mut processes = system
|
||||||
|
.processes_by_exact_name(INFLUXDB3_PROCESS_NAME)
|
||||||
|
.collect::<Vec<&Process>>();
|
||||||
|
if processes.is_empty() {
|
||||||
|
bail!("there is no '{}' process", INFLUXDB3_PROCESS_NAME);
|
||||||
|
}
|
||||||
|
if processes.len() > 1 {
|
||||||
|
bail!(
|
||||||
|
"ensure there is only one '{}' process running on your operating system",
|
||||||
|
INFLUXDB3_PROCESS_NAME
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let pid = processes.pop().unwrap().pid();
|
||||||
|
// refresh the system stats for the process to initialize the baseline:
|
||||||
|
system.refresh_pids(&[pid]);
|
||||||
|
Ok(Self {
|
||||||
|
pid,
|
||||||
|
system: Mutex::new(system),
|
||||||
|
csv_writer,
|
||||||
|
shutdown: Mutex::new(false),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn report_stats(&self) {
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let mut system = self.system.lock();
|
||||||
|
system.refresh_pids_specifics(
|
||||||
|
&[self.pid],
|
||||||
|
ProcessRefreshKind::new()
|
||||||
|
.with_cpu()
|
||||||
|
.with_memory()
|
||||||
|
.with_disk_usage(),
|
||||||
|
);
|
||||||
|
let process = system
|
||||||
|
.process(self.pid)
|
||||||
|
.unwrap_or_else(|| panic!("process with pid: {}", self.pid));
|
||||||
|
let mut csv_writer = self.csv_writer.lock();
|
||||||
|
let test_time_ms = Instant::now().duration_since(start_time).as_millis();
|
||||||
|
csv_writer
|
||||||
|
.serialize(SystemStatsRecord {
|
||||||
|
wall_time: Local::now(),
|
||||||
|
test_time_ms,
|
||||||
|
cpu_usage: process.cpu_usage(),
|
||||||
|
written_bytes: process.disk_usage().written_bytes,
|
||||||
|
read_bytes: process.disk_usage().read_bytes,
|
||||||
|
memory_bytes: process.memory(),
|
||||||
|
virtual_memory_bytes: process.virtual_memory(),
|
||||||
|
})
|
||||||
|
.expect("failed to write csv record for system stats");
|
||||||
|
csv_writer.flush().expect("flush system stats csv reports");
|
||||||
|
|
||||||
|
if *self.shutdown.lock() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::thread::sleep(
|
||||||
|
sysinfo::MINIMUM_CPU_UPDATE_INTERVAL.max(SYSTEM_STATS_REPORT_INTERVAL),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn shutdown(&self) {
|
||||||
|
*self.shutdown.lock() = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -4,6 +4,9 @@ use iox_time::{SystemProvider, Time, TimeProvider};
|
||||||
use metric::U64Gauge;
|
use metric::U64Gauge;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
/// The process name on the local OS running `influxdb3`
|
||||||
|
pub const INFLUXDB3_PROCESS_NAME: &str = "influxdb3";
|
||||||
|
|
||||||
#[cfg(all(not(feature = "heappy"), feature = "jemalloc_replacing_malloc"))]
|
#[cfg(all(not(feature = "heappy"), feature = "jemalloc_replacing_malloc"))]
|
||||||
pub mod jemalloc;
|
pub mod jemalloc;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue