Merge pull request #2357 from influxdata/pd/add-data-generator

refactor: move data generator to IOx repo and fix build
pull/24376/head
kodiakhq[bot] 2021-08-19 19:35:48 +00:00 committed by GitHub
commit 19bdc00d4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 5898 additions and 15 deletions

226
Cargo.lock generated
View File

@ -271,7 +271,7 @@ dependencies = [
"md5",
"oauth2",
"paste",
"quick-error",
"quick-error 1.2.3",
"reqwest",
"serde",
"serde-xml-rs",
@ -300,7 +300,7 @@ dependencies = [
"md5",
"mime",
"percent-encoding",
"quick-error",
"quick-error 1.2.3",
"ring",
"serde",
"serde-xml-rs",
@ -398,13 +398,34 @@ dependencies = [
"constant_time_eq",
]
[[package]]
name = "block-buffer"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
dependencies = [
"block-padding",
"byte-tools",
"byteorder",
"generic-array 0.12.4",
]
[[package]]
name = "block-buffer"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
dependencies = [
"generic-array",
"generic-array 0.14.4",
]
[[package]]
name = "block-padding"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
dependencies = [
"byte-tools",
]
[[package]]
@ -446,6 +467,12 @@ version = "3.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631"
[[package]]
name = "byte-tools"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
[[package]]
name = "bytemuck"
version = "1.7.2"
@ -525,6 +552,17 @@ dependencies = [
"winapi",
]
[[package]]
name = "chrono-english"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0be5180df5f7c41fc2416bc038bc8d78d44db8136c415b94ccbc95f523dc38e9"
dependencies = [
"chrono",
"scanlex",
"time 0.1.43",
]
[[package]]
name = "clang-sys"
version = "1.2.0"
@ -784,7 +822,7 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714"
dependencies = [
"generic-array",
"generic-array 0.14.4",
"subtle",
]
@ -917,13 +955,22 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]]
name = "digest"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
dependencies = [
"generic-array 0.12.4",
]
[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
"generic-array",
"generic-array 0.14.4",
]
[[package]]
@ -1095,6 +1142,12 @@ dependencies = [
"synstructure",
]
[[package]]
name = "fake-simd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
[[package]]
name = "fd-lock"
version = "2.0.0"
@ -1320,6 +1373,15 @@ dependencies = [
"tonic-build 0.5.2",
]
[[package]]
name = "generic-array"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd"
dependencies = [
"typenum",
]
[[package]]
name = "generic-array"
version = "0.14.4"
@ -1433,6 +1495,20 @@ version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
[[package]]
name = "handlebars"
version = "3.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4498fc115fa7d34de968184e473529abb40eeb6be8bc5f7faba3d08c316cb3e3"
dependencies = [
"log",
"pest",
"pest_derive",
"quick-error 2.0.1",
"serde",
"serde_json",
]
[[package]]
name = "hashbrown"
version = "0.11.2"
@ -1488,7 +1564,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b"
dependencies = [
"crypto-mac",
"digest",
"digest 0.9.0",
]
[[package]]
@ -1816,6 +1892,36 @@ dependencies = [
"tokio",
]
[[package]]
name = "iox_data_generator"
version = "0.1.0"
dependencies = [
"chrono",
"chrono-english",
"clap",
"criterion",
"data_types",
"futures",
"generated_types",
"handlebars",
"influxdb2_client",
"influxdb_iox_client",
"itertools 0.9.0",
"packers",
"rand 0.8.4",
"rand_core 0.6.3",
"rand_seeder",
"serde",
"snafu",
"test_helpers",
"tokio",
"toml",
"tracing",
"tracing-futures",
"tracing-subscriber",
"uuid",
]
[[package]]
name = "iox_object_store"
version = "0.1.0"
@ -2014,6 +2120,12 @@ dependencies = [
"libc",
]
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "matchers"
version = "0.0.1"
@ -2035,9 +2147,9 @@ version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15"
dependencies = [
"block-buffer",
"digest",
"opaque-debug",
"block-buffer 0.9.0",
"digest 0.9.0",
"opaque-debug 0.3.0",
]
[[package]]
@ -2484,6 +2596,12 @@ version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "opaque-debug"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
[[package]]
name = "opaque-debug"
version = "0.3.0"
@ -2788,6 +2906,49 @@ dependencies = [
"test_helpers",
]
[[package]]
name = "pest"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
dependencies = [
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pest_meta"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d"
dependencies = [
"maplit",
"pest",
"sha-1",
]
[[package]]
name = "petgraph"
version = "0.5.1"
@ -3188,6 +3349,12 @@ version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quick-error"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
name = "quick-xml"
version = "0.20.0"
@ -3313,6 +3480,15 @@ dependencies = [
"rand_core 0.6.3",
]
[[package]]
name = "rand_seeder"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "612dd698949d531335b4c29d1c64fb11942798decfc08abc218578942e66d7d0"
dependencies = [
"rand_core 0.6.3",
]
[[package]]
name = "rayon"
version = "1.5.1"
@ -3605,7 +3781,7 @@ dependencies = [
"base64 0.13.0",
"bytes",
"chrono",
"digest",
"digest 0.9.0",
"futures",
"hex",
"hmac",
@ -3728,6 +3904,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scanlex"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "088c5d71572124929ea7549a8ce98e1a6fd33d0a38367b09027b382e67c033db"
[[package]]
name = "schannel"
version = "0.1.19"
@ -3950,6 +4132,18 @@ dependencies = [
"tokio",
]
[[package]]
name = "sha-1"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df"
dependencies = [
"block-buffer 0.7.3",
"digest 0.8.1",
"fake-simd",
"opaque-debug 0.2.3",
]
[[package]]
name = "sha1"
version = "0.6.0"
@ -3962,11 +4156,11 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b362ae5752fd2137731f9fa25fd4d9058af34666ca1966fb969119cc35719f12"
dependencies = [
"block-buffer",
"block-buffer 0.9.0",
"cfg-if",
"cpufeatures",
"digest",
"opaque-debug",
"digest 0.9.0",
"opaque-debug 0.3.0",
]
[[package]]
@ -4892,6 +5086,12 @@ version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06"
[[package]]
name = "ucd-trie"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
[[package]]
name = "unicode-bidi"
version = "0.3.6"

View File

@ -25,6 +25,7 @@ members = [
"influxdb_line_protocol",
"influxdb_tsm",
"internal_types",
"iox_data_generator",
"iox_object_store",
"logfmt",
"lifecycle",

View File

@ -0,0 +1,38 @@
[package]
name = "iox_data_generator"
version = "0.1.0"
authors = ["Paul Dix <paul@pauldix.net>"]
edition = "2018"
default-run = "iox_data_generator"
[dependencies]
chrono = "0.4.13"
chrono-english = "0.1.4"
clap = "2.33.1"
futures = "0.3.5"
handlebars = "3.3.0"
data_types = { path = "../data_types" }
generated_types = { path = "../generated_types" }
influxdb2_client = { path = "../influxdb2_client" }
influxdb_iox_client = { path = "../influxdb_iox_client" }
packers = { path = "../packers" }
itertools = "0.9.0"
rand = { version = "0.8.3", features = ["small_rng"] }
rand_core = "0.6.2"
rand_seeder = "0.2.1"
serde = { version = "1.0", features = ["derive"] }
snafu = "0.6.8"
tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] }
toml = "0.5.6"
tracing = "0.1"
tracing-futures = "0.2.4"
tracing-subscriber = "0.2.11"
uuid = { version = "0.8.1", default_features = false }
[dev-dependencies]
criterion = "0.3.3"
test_helpers = { path = "../test_helpers" }
[[bench]]
name = "point_generation"
harness = false

View File

@ -0,0 +1,106 @@
# `iox_data_generator`
The `iox_data_generator` tool creates random data points according to a specification and loads them
into an `iox` instance to simulate real data.
To build and run, [first install Rust](https://www.rust-lang.org/tools/install). Then from root of the `influxdb_iox` repo run:
```
cargo build --release
```
And the built binary has command line help:
```
./target/release/iox_data_generator --help
```
For examples of specifications see the [schemas folder](schemas)
## Use with two IOx servers and Kafka
The data generator tool can be used to simulate data being written to IOx in various shapes. This
is how to set up a local experiment for profiling or debugging purposes using a database in two IOx
instances: one writing to Kafka and one reading from Kafka.
If you're profiling IOx, be sure you've compiled and are running a release build using either:
```
cargo build --release
./target/release/influxdb_iox run --server-id 1
```
or:
```
cargo run --release -- run --server-id 1
```
Server ID is the only required attribute for running IOx; see `influxdb_iox run --help` for all the
other configuration options for the server you may want to set for your experiment. Note that the
default HTTP API address is `127.0.0.1:8080` unless you set something different with `--api-bind`
and the default gRPC address is `127.0.0.1:8082` unless you set something different using
`--grpc-bind`.
For the Kafka setup, you'll need to start two IOx servers, so you'll need to set the bind addresses
for at least one of them. Here's an example of the two commands to run:
```
cargo run --release -- run --server-id 1
cargo run --release -- run --server-id 2 --api-bind 127.0.0.1:8084 --grpc-bind 127.0.0.1:8086
```
You'll also need to run a Kafka instance. There's a Docker compose script in the influxdb_iox
repo you can run with:
```
docker-compose -f docker/ci-kafka-docker-compose.yml up kafka
```
The Kafka instance will be accessible from `127.0.0.1:9093` if you run it with this script.
Once you have the two IOx servers and one Kafka instance running, create a database with a name in
the format `[orgname]_[bucketname]`. For example, create a database in IOx named `mlb_pirates`, and
the org you'll use in the data generator will be `mlb` and the bucket will be `pirates`. The
`DatabaseRules` defined in `src/bin/create_database.rs` will set up a database in the "writer" IOx
instance to write to Kafka and the database in the "reader" IOx instance to read from Kafka if
you run it with:
```
cargo run -p iox_data_generator --bin create_database -- --writer 127.0.0.1:8082 --reader 127.0.0.1:8086 mlb_pirates
```
This script adds 3 rows to a `writer_test` table because [this issue with the Kafka Consumer
needing data before it can find partitions](https://github.com/influxdata/influxdb_iox/issues/2189).
Once the database is created, decide what kind of data you would like to send it. You can use an
existing data generation schema in the `schemas` directory or create a new one, perhaps starting
from an existing schema as a guide. In this example, we're going to use
`iox_data_generator/schemas/cap-write.toml`.
Next, run the data generation tool as follows:
```
cargo run -p iox_data_generator -- --spec iox_data_generator/schemas/cap-write.toml --continue --host 127.0.0.1:8080 --token arbitrary --org mlb --bucket pirates
```
- `--spec iox_data_generator/schemas/cap-write.toml` sets the schema you want to use to generate the data
- `--continue` means the data generation tool should generate data every `sampling_interval` (which
is set in the schema) until we stop it
- `--host 127.0.0.1:8080` means to write to the writer IOx server running at the default HTTP API address
of `127.0.0.1:8080` (note this is NOT the gRPC address used by the `create_database` command)
- `--token arbitrary` - the data generator requires a token value but IOx doesn't use it, so this
can be any value.
- `--org mlb` is the part of the database name you created before the `_`
- `--bucket pirates` is the part of the database name you created after the `_`
You should be able to use `influxdb_iox sql -h http://127.0.0.1:8086` to connect to the gRPC of the reader
then `use database mlb_pirates;` and query the tables to see that the data is being inserted. That
is,
```
# in your influxdb_iox checkout
cargo run -- sql -h http://127.0.0.1:8086
```
Connecting to the writer instance won't show any data.

View File

@ -0,0 +1,66 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use iox_data_generator::{
specification::{AgentSpec, DataSpec, FieldSpec, FieldValueSpec, MeasurementSpec},
write::PointsWriterBuilder,
};
pub fn single_agent(c: &mut Criterion) {
let spec = DataSpec {
base_seed: Some("faster faster faster".into()),
name: "benchmark".into(),
agents: vec![AgentSpec {
name: "agent-1".into(),
count: None,
sampling_interval: Some(1),
name_tag_key: None,
tags: vec![],
measurements: vec![MeasurementSpec {
name: "measurement-1".into(),
count: None,
tags: vec![],
fields: vec![FieldSpec {
name: "field-1".into(),
field_value_spec: FieldValueSpec::Bool(true),
count: None,
}],
}],
}],
};
let mut points_writer = PointsWriterBuilder::new_no_op(true);
let start_datetime = Some(0);
let one_hour_s = 60 * 60;
let ns_per_second = 1_000_000_000;
let end_datetime = Some(one_hour_s * ns_per_second);
let expected_points = 3601;
let mut group = c.benchmark_group("single_agent");
group.throughput(Throughput::Elements(expected_points));
group.bench_function("single agent with basic configuration", |b| {
b.iter(|| {
let r = block_on({
iox_data_generator::generate::<rand::rngs::SmallRng>(
&spec,
&mut points_writer,
start_datetime,
end_datetime,
0,
false,
)
});
let n_points = r.expect("Could not generate data");
assert_eq!(n_points, expected_points as usize);
})
});
}
#[tokio::main]
async fn block_on<F: std::future::Future>(f: F) -> F::Output {
f.await
}
criterion_group!(benches, single_agent);
criterion_main!(benches);

View File

@ -0,0 +1,428 @@
# This config file aims to replicate the data produced by the capwrite tool:
# https://github.com/influxdata/idpe/tree/e493a8e9b6b773e9374a8542ddcab7d8174d320d/performance/capacity/write
name = "cap_write"
base_seed = "correct horse battery staple"
[[agents]]
name = "cap_write_{{agent_id}}"
count = 3
sampling_interval = 10
[[agents.measurements]]
name = "system"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.fields]]
name = "n_cpus"
i64_range = [8, 8]
[[agents.measurements.fields]]
name = "n_users"
i64_range = [2, 11]
[[agents.measurements.fields]]
name = "uptime"
uptime = "i64"
[[agents.measurements.fields]]
name = "uptime_format"
uptime = "telegraf"
[[agents.measurements.fields]]
name = "load1"
f64_range = [0.0, 8.0]
[[agents.measurements.fields]]
name = "load5"
f64_range = [0.0, 8.0]
[[agents.measurements.fields]]
name = "load15"
f64_range = [0.0, 8.0]
[[agents.measurements]]
name = "mem"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.fields]]
name = "active"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "available"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "buffered"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "cached"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "inactive"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "slab"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "used"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "avaiable_percent"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "used_percent"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "wired"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "commit_limit"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "committed_as"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "dirty"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "high_free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "high_total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "huge_page_size"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "huge_pages_free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "huge_pages_total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "low_free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "low_total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "mapped"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "page_tables"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "shared"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "swap_cached"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "swap_free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "swap_total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "vmalloc_chunk"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "vmalloc_total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "vmalloc_used"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "write_back"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "write_back_tmp"
i64_range = [0, 10000000]
[[agents.measurements]]
name = "disk"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.fields]]
name = "free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "used"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "used_percent"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "inodes_free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "inodes_total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "inodes_used"
i64_range = [0, 10000000]
[[agents.measurements]]
name = "swap"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.fields]]
name = "free"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "total"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "used"
i64_range = [0, 1000000] # Note this is an order of magnitude less deliberately to match
# https://github.com/influxdata/idpe/blob/ffbceb04dd4b3aa0828d039135977a4f36f7b822/performance/capacity/write/swap.go#L17
# not sure if that value was intentional, perhaps it is to ensure used < total?
[[agents.measurements.fields]]
name = "used_percent"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "in"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "out"
i64_range = [0, 10000000]
[[agents.measurements]]
name = "cpu"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.tags]]
name = "cpu"
value = "cpu-total"
[[agents.measurements.fields]]
name = "usage_user"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_nice"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_system"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_idle"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_irq"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_softirq"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_steal"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_guest"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "usage_guest_nice"
f64_range = [0.0, 100.0]
[[agents.measurements]]
name = "processes"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.fields]]
name = "blocked"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "running"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "sleeping"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "stopped"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "total"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "zombie"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "dead"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "wait"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "idle"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "paging"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "total_threads"
i64_range = [0, 255]
[[agents.measurements.fields]]
name = "unknown"
i64_range = [0, 255]
[[agents.measurements]]
name = "net"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.fields]]
name = "bytes_recv"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "bytes_sent"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "packets_sent"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "packets_recv"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "err_in"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "err_out"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "drop_in"
i64_range = [0, 10000000]
[[agents.measurements.fields]]
name = "drop_out"
i64_range = [0, 10000000]
[[agents.measurements]]
name = "diskio"
[[agents.measurements.tags]]
name = "host"
value = "host-{{agent_id}}"
[[agents.measurements.fields]]
name = "reads"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "writes"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "read_bytes"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "write_bytes"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "read_time"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "write_time"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "io_time"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "weighted_io_time"
i64_range = [0, 1000000]
[[agents.measurements.fields]]
name = "iops_in_progress"
i64_range = [0, 1000000]

View File

@ -0,0 +1,39 @@
# Every feature demonstrated in this schema is fully supported in the current implementation.
# Other schemas may demonstrate future features.
# Every point generated by this schema will contain a tag `data_spec=[this_value]`.
name = "demo_schema"
# This seed can be any string and will be used to seed all random number generators. To change
# the randomness in the points generated by this schema, change this value to something else.
# To generate the same data in the same order as previous runs with this schema (except for any
# elements in this schema you have changed), keep this value the same.
base_seed = "this is a demo"
[[agents]]
name = "basic"
sampling_interval = 10 # in seconds. TODO: parse nice durations like "12m" and "30s"
[[agents.measurements]]
name = "cpu"
[[agents.measurements.fields]]
name = "temp"
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "location"
pattern = "{{city}}, {{country}}"
replacements = [
{replace = "city", with = ["San Jose", "San Antonio", "Santa Maria"]},
{replace = "country", with = ["United States", "Costa Rica", ["Argentina", 10]]},
]
[[agents.measurements.fields]]
name = "wave_height"
i64_range = [0, 10]
increment = true
reset_after = 20
[[agents.measurements.fields]]
name = "uptime"
uptime = "i64"

View File

@ -0,0 +1,141 @@
name = "demo_schema"
base_seed = "correct horse battery staple"
# the most basic spec with no auto generating of agents, measurements, tags or fields
[[agents]]
name = "demo"
sampling_interval = 10
[[agents.measurements]]
name = "some_measurement"
[[agents.measurements.tags]]
name = "foo"
value = "bar"
[[agents.measurements.fields]]
name = "field1"
# it's a boolean field, the true means to generate the boolean randomly with equal probability
bool = true
[[agents.measurements.fields]]
name = "field2"
# it's an i64 field, values will be generated using a pseudo random number generator
# with a set seed and values in the range [3, 200). Setting it to [3, 3] or [3, 4] will
# make the value always be 3
i64_range = [3, 200]
[[agents.measurements.fields]]
name = "field3"
# it's an i64 field, values will be generated using a pseudo random number generator
# with a set seed and values in the range in the range [1000, 5000)
i64_range = [1000, 5000]
# The value after each same will be incremented by the next random amount. This is
# useful when simulating a counter.
increment = true
[[agents.measurements.fields]]
name = "field4"
# it's an f64 field, values will be generated using a pseudo random number generator
# with a set seed with values in the range [0.0, 100.0). Setting both values to the same
# number will make every value that number.
f64_range = [0.0, 100.0]
[[agents.measurements.fields]]
name = "field5"
# this is a string field. Parts of the string will be replaced. {{agent_name}} will be replaced
# with the name of the agent, {{random 200}} will be replaced with a random alphanumeric string
# of the length specified. {{format-time "%Y-%m-%d %H:%M"}} will be replaced with the time for
# this line in the simulation (that is, the same value that this line will have for its
# timestamp) formatted using a strftime specifier. Other patterns will be looked for based on
# the keys in replacements.
pattern = "{{agent_name}} foo {{level}} {{format-time \"%Y-%m-%d %H:%M\"}} {{random 200}}"
# each key in string replacements will be replaced in the pattern with a value randomly
# selected from the array of strings. Specify a weight as an integer greater than 1 to change
# the probability that a given string will be selected.
replacements = [
{replace = "color", with = ["red", "blue", "green"]},
{replace = "level", with = [
["info", 800],
["warn", 195],
["error", 5]
]}
]
[[agents]]
name = "some-server-{{agent_id}}"
count = 10
sampling_interval = 22
# Optional: every measurement (row) this agent produces will include a tag with the agent_id filled
# in:
# agent_name=some-server-{{agent_id}}
name_tag_key = "agent_name"
# Optional: these values will be rotated through so that each agent that gets created will have one.
# e.g: the first agent will always inject region=us-west and secnod will be region=us-east, etc.
tags = [
{key = "region", values = ["us-west", "us-east", "dublin", "frankfurt"]},
{key = "foo", values = ["bar", "asdf"]},
]
[[agents.measurements]]
name = "few-tags-measurement-{{measurement_id}}"
count = 20
[[agents.measurements.tags]]
# {{measurement_id}} will be replaced with the id of the measurement this tag is for
name = "tag-1-{{measurement_id}}"
value = "value-1"
[[agents.measurements.tags]]
name = "tag-2"
# {{cardinality}} will be replaced with the cardinality counter
value = "value-{{cardinality}}"
# Optional: This means each collection on this agent will have 4 rows of this measurement with
# unique values for this tag. This could be for things like org_id as a tag or for
# something like cpu measurements in Telegraf where you have a separate line for each cpu:
# cpu,cpu=cpu-total,host=foo usage_user=23.2,usage_system=33.3
# cpu,cpu=cpu-0,host=foo usage_user=22.2,usage_system=34.5
# cpu,cpu=cpu-1,host=foo usage_user=11.2,usage_system=56.5
cardinality = 4
[[agents.measurements.tags]]
name = "tag-3"
# {{counter}} will be replaced with the increment counter
value = "value-{{counter}}"
# Optional: This means that {{counter}} will increase by 1 after every 10 samples that are
# pulled.
# This option simulates temporal tag values like process IDs or container IDs in tags
increment_every = 10
[[agents.measurements.tags]]
name = "tag-4"
# {{counter}} will be replaced with the increment counter and {{cardinality}} will be replaced
# with the cardinality counter
value = "value-{{counter}}-{{cardinality}}"
# Optional: This means that {{counter}} will increment by 1 after every 100 samples that are
# pulled.
# This option simulates temporal tag values like process IDs or container IDs in tags
increment_every = 100
# when paired with cardinality, this can simulate having many containers running on a single
# host
cardinality = 10
[[agents.measurements.fields]]
name = "field-2"
bool = true
# This example shows generating 10 different measurements that each have their own set of
# tags (10 of them) and each have their own set of fields (4 of them)
[[agents.measurements]]
name = "mid-tags-measurement-{{measurement_id}}"
count = 10
[[agents.measurements.tags]]
name = "tag-{{tag_id}}-{{measurement_id}}"
count = 10
value = "value-{{cardinality}}"
cardinality = 3
[[agents.measurements.fields]]
name = "field-1"
bool = true

View File

@ -0,0 +1,52 @@
name = "tracing_schema"
base_seed = "this is a demo"
[[agents]]
name = "trace-sender"
sampling_interval = 10 # in seconds. TODO: parse nice durations like "12m" and "30s"
[[agents.measurements]]
name = "traces"
[[agents.measurements.tags]]
name = "trace_id"
value = "{{guid}}"
[[agents.measurements.tags]]
name = "span_id"
value = "{{guid}}"
cardinality = 10
[[agents.measurements.tags]]
name = "host"
value = "{{host}}"
replacements = [
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
]
resample_every_line = true
[[agents.measurements.tags]]
name = "region"
value = "{{region}}"
replacements = [
{replace = "region", with = ["us-west", "us-east"]},
]
resample_every_line = false
[[agents.measurements.tags]]
name = "service"
value = "{{service}}"
replacements = [
{replace = "service", with = ["nginx", "istio", "storage", "gateway", "redis", "mysql", "s3"]},
]
resample_every_line = true
[[agents.measurements.fields]]
name = "timing"
f64_range = [0.0, 500.0]
[[agents.measurements.fields]]
name = "depth"
i64_range = [0, 3]
increment = true
reset_after = 10

View File

@ -0,0 +1,557 @@
//! Agents responsible for generating points
use crate::{
measurement::MeasurementGeneratorSet, now_ns, specification, tag::Tag, write::PointsWriter,
DataGenRng, RandomNumberGenerator,
};
use influxdb2_client::models::DataPoint;
use snafu::{ResultExt, Snafu};
use std::{fmt, time::Duration};
use tracing::{debug, info};
/// Agent-specific Results
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Errors that may happen while creating points
#[derive(Snafu, Debug)]
pub enum Error {
/// Error that may happen when generating points from measurements
#[snafu(display("{}", source))]
CouldNotGeneratePoint {
/// Underlying `measurement` module error that caused this problem
source: crate::measurement::Error,
},
/// Error that may happen when creating measurement generator sets
#[snafu(display("Could not create measurement generator sets, caused by:\n{}", source))]
CouldNotCreateMeasurementGeneratorSets {
/// Underlying `measurement` module error that caused this problem
source: crate::measurement::Error,
},
/// Error that may happen when writing points
#[snafu(display("Could not write points, caused by:\n{}", source))]
CouldNotWritePoints {
/// Underlying `write` module error that caused this problem
source: crate::write::Error,
},
}
/// Each `AgentSpec` informs the instantiation of an `Agent`, which coordinates
/// the generation of the measurements in their specification.
#[derive(Debug)]
pub struct Agent<T: DataGenRng> {
agent_id: usize,
name: String,
#[allow(dead_code)]
rng: RandomNumberGenerator<T>,
agent_tags: Vec<Tag>,
measurement_generator_sets: Vec<MeasurementGeneratorSet<T>>,
sampling_interval: Option<i64>,
/// nanoseconds since the epoch, used as the timestamp for the next
/// generated point
current_datetime: i64,
/// nanoseconds since the epoch, when current_datetime exceeds this, stop
/// generating points
end_datetime: i64,
/// whether to continue generating points after reaching the current time
continue_on: bool,
/// whether this agent is done generating points or not
finished: bool,
/// Optional interval at which to re-run the agent if generating data in
/// "continue" mode
interval: Option<tokio::time::Interval>,
}
impl<T: DataGenRng> Agent<T> {
/// Create a new agent that will generate data points according to these
/// specs. Substitutions in `name` and `agent_tags` should be made
/// before using them to instantiate an agent.
#[allow(clippy::too_many_arguments)]
pub fn new(
agent_spec: &specification::AgentSpec,
agent_name: impl Into<String>,
agent_id: usize,
parent_seed: impl fmt::Display,
agent_tags: Vec<Tag>,
start_datetime: Option<i64>, // in nanoseconds since the epoch, defaults to now
end_datetime: Option<i64>, // also in nanoseconds since the epoch, defaults to now
execution_start_time: i64,
continue_on: bool, // If true, run in "continue" mode after historical data is generated
) -> Result<Self> {
let name = agent_name.into();
// Will agents actually need rngs? Might just need seeds...
let seed = format!("{}-{}", parent_seed, name);
let rng = RandomNumberGenerator::<T>::new(&seed);
let measurement_generator_sets = agent_spec
.measurements
.iter()
.map(|spec| {
MeasurementGeneratorSet::new(
&name,
agent_id,
spec,
&seed,
&agent_tags,
execution_start_time,
)
})
.collect::<crate::measurement::Result<_>>()
.context(CouldNotCreateMeasurementGeneratorSets)?;
let current_datetime = start_datetime.unwrap_or_else(now_ns);
let end_datetime = end_datetime.unwrap_or_else(now_ns);
// Convert to nanoseconds
let sampling_interval = agent_spec
.sampling_interval
.map(|s| s as i64 * 1_000_000_000);
Ok(Self {
agent_id,
name,
rng,
agent_tags,
measurement_generator_sets,
sampling_interval,
current_datetime,
end_datetime,
continue_on,
finished: false,
interval: None,
})
}
/// Generate and write points in batches until `generate` doesn't return any
/// points. Meant to be called in a `tokio::task`.
pub async fn generate_all(&mut self, mut points_writer: PointsWriter) -> Result<usize> {
let mut total_points = 0;
let mut points = self.generate().await?;
while !points.is_empty() {
info!("[agent {}] sending {} points", self.name, points.len());
total_points += points.len();
points_writer
.write_points(points)
.await
.context(CouldNotWritePoints)?;
points = self.generate().await?;
}
Ok(total_points)
}
/// Generate data points from the configuration in this agent, one point per
/// measurement contained in this agent's configuration.
pub async fn generate(&mut self) -> Result<Vec<DataPoint>> {
let mut points = Vec::new();
debug!(
"[agent {}] generate more? {} current: {}, end: {}",
self.name, self.finished, self.current_datetime, self.end_datetime
);
if !self.finished {
// Save the current_datetime to use in the set of points that we're generating
// because we might increment current_datetime to see if we're done
// or not.
let point_timestamp = self.current_datetime;
if let Some(i) = &mut self.interval {
i.tick().await;
self.current_datetime = now_ns();
} else if let Some(ns) = self.sampling_interval {
self.current_datetime += ns;
if self.current_datetime > self.end_datetime {
if self.continue_on {
let mut i = tokio::time::interval(Duration::from_nanos(ns as u64));
i.tick().await; // first tick completes immediately
self.current_datetime = now_ns();
self.interval = Some(i);
} else {
self.finished = true;
}
}
} else {
self.finished = true;
}
for mgs in &mut self.measurement_generator_sets {
for point in mgs
.generate(point_timestamp)
.context(CouldNotGeneratePoint)?
{
points.push(point);
}
}
}
Ok(points)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{now_ns, specification::*, ZeroRng};
use influxdb2_client::models::WriteDataPoint;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
impl<T: DataGenRng> Agent<T> {
/// Instantiate an agent only with the parameters we're interested in
/// testing, keeping everything else constant across different
/// tests.
fn test_instance(
sampling_interval: Option<i64>,
continue_on: bool,
current_datetime: i64,
end_datetime: i64,
) -> Self {
let measurement_spec = MeasurementSpec {
name: "measurement-{{agent_id}}-{{measurement_id}}".into(),
count: Some(2),
tags: vec![],
fields: vec![FieldSpec {
name: "field-{{agent_id}}-{{measurement_id}}-{{field_id}}".into(),
field_value_spec: FieldValueSpec::I64 {
range: 0..60,
increment: false,
reset_after: None,
},
count: Some(2),
}],
};
let measurement_generator_set =
MeasurementGeneratorSet::new("test", 42, &measurement_spec, "spec-test", &[], 0)
.unwrap();
Self {
agent_id: 0,
name: String::from("test"),
rng: RandomNumberGenerator::<T>::new("spec-test"),
agent_tags: vec![],
measurement_generator_sets: vec![measurement_generator_set],
finished: false,
interval: None,
sampling_interval,
current_datetime,
end_datetime,
continue_on,
}
}
}
fn timestamps(points: &[influxdb2_client::models::DataPoint]) -> Result<Vec<i64>> {
points
.iter()
.map(|point| {
let mut v = Vec::new();
point.write_data_point_to(&mut v)?;
let line = String::from_utf8(v)?;
Ok(line.split(' ').last().unwrap().trim().parse()?)
})
.collect()
}
#[rustfmt::skip]
// # Summary: No Sampling Interval
//
// If there isn't a sampling interval, we don't know how often to run, so we can neither
// generate historical data nor can we continue into the future. The only thing we'll do is
// generate once then stop.
//
// | sampling_interval | continue | cmp(current_time, end_time) | expected outcome |
// |-------------------+----------+-----------------------------+------------------|
// | None | false | Less | gen 1x, stop |
// | None | false | Equal | gen 1x, stop |
// | None | false | Greater | gen 1x, stop |
// | None | true | Less | gen 1x, stop |
// | None | true | Equal | gen 1x, stop |
// | None | true | Greater | gen 1x, stop |
mod without_sampling_interval {
use super::*;
mod without_continue {
use super::*;
#[tokio::test]
async fn current_time_less_than_end_time() -> Result<()> {
let mut agent = Agent::<ZeroRng>::test_instance(None, false, 0, 10);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
#[tokio::test]
async fn current_time_equal_end_time() -> Result<()> {
let mut agent = Agent::<ZeroRng>::test_instance(None, false, 10, 10);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
#[tokio::test]
async fn current_time_greater_than_end_time() -> Result<()> {
let mut agent = Agent::<ZeroRng>::test_instance(None, false, 11, 10);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
}
mod with_continue {
use super::*;
#[tokio::test]
async fn current_time_less_than_end_time() -> Result<()> {
let mut agent = Agent::<ZeroRng>::test_instance(None, true, 0, 10);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
#[tokio::test]
async fn current_time_equal_end_time() -> Result<()> {
let mut agent = Agent::<ZeroRng>::test_instance(None, true, 10, 10);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
#[tokio::test]
async fn current_time_greater_than_end_time() -> Result<()> {
let mut agent = Agent::<ZeroRng>::test_instance(None, true, 11, 10);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
}
}
mod with_sampling_interval {
use super::*;
// The tests take about 5 ms to run on my computer, so set the sampling interval
// to 10 ms to be able to test that the delay is happening when
// `continue` is true without making the tests too artificially slow.
const TEST_SAMPLING_INTERVAL: i64 = 10_000_000;
#[rustfmt::skip]
// # Summary: Not continuing
//
// If there is a sampling interval but we're not continuing, we should generate points at
// least once but if the current time is greater than the ending time (which might be set
// to `now`), we've generated everything we need to and should stop.
//
// | sampling_interval | continue | cmp(current_time, end_time) | expected outcome |
// |-------------------+----------+-----------------------------+------------------|
// | Some(_) | false | Less | gen & increment |
// | Some(_) | false | Equal | gen 1x, stop |
// | Some(_) | false | Greater | gen 1x, stop |
mod without_continue {
use super::*;
#[tokio::test]
async fn current_time_less_than_end_time() -> Result<()> {
let current = 0;
let end = TEST_SAMPLING_INTERVAL;
let mut agent =
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), false, current, end);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
#[tokio::test]
async fn current_time_equal_end_time() -> Result<()> {
let current = TEST_SAMPLING_INTERVAL;
let end = current;
let mut agent =
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), false, current, end);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
#[tokio::test]
async fn current_time_greater_than_end_time() -> Result<()> {
let current = 2 * TEST_SAMPLING_INTERVAL;
let end = TEST_SAMPLING_INTERVAL;
let mut agent =
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), false, current, end);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let points = agent.generate().await?;
assert!(points.is_empty(), "expected no points, got {:?}", points);
Ok(())
}
}
#[rustfmt::skip]
// # Summary: After generating historical data, continue sampling in "real time"
//
// If there is a sampling interval and we are continuing, generate points as fast as
// possible (but with timestamps separated by sampling_interval amounts) until we catch up
// to `now`. Then add pauses of the sampling_interval's duration, generating points with
// their timestamps set to the current time to simulate "real" point generation.
//
// | sampling_interval | continue | cmp(current_time, end_time) | expected outcome |
// |-------------------+----------+-----------------------------+------------------|
// | Some(_) | true | Less | gen, no delay |
// | Some(_) | true | Equal | gen, delay |
// | Some(_) | true | Greater | gen, delay |
mod with_continue {
use super::*;
#[tokio::test]
async fn current_time_less_than_end_time() -> Result<()> {
let end = now_ns();
let current = end - TEST_SAMPLING_INTERVAL;
let mut agent =
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), true, current, end);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let times = timestamps(&points).unwrap();
assert_eq!(vec![current, current], times);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let times = timestamps(&points).unwrap();
assert_eq!(vec![end, end], times);
Ok(())
}
#[tokio::test]
async fn current_time_equal_end_time() -> Result<()> {
let end = now_ns();
let current = end;
let mut agent =
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), true, current, end);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let times = timestamps(&points).unwrap();
assert_eq!(vec![end, end], times);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let real_now = now_ns();
let times = timestamps(&points).unwrap();
for time in times {
assert!(
time <= real_now,
"expected timestamp {} to be generated before now ({}); \
was {} nanoseconds greater",
time,
real_now,
time - real_now
);
}
Ok(())
}
#[tokio::test]
async fn current_time_greater_than_end_time() -> Result<()> {
let end = now_ns();
let current = end + TEST_SAMPLING_INTERVAL;
let mut agent =
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), true, current, end);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let times = timestamps(&points).unwrap();
assert_eq!(vec![current, current], times);
let points = agent.generate().await?;
assert_eq!(points.len(), 2);
let real_now = now_ns();
let times = timestamps(&points).unwrap();
for time in times {
assert!(
time <= real_now,
"expected timestamp {} to be generated before now ({}); \
was {} nanoseconds greater",
time,
real_now,
time - real_now
);
}
Ok(())
}
}
}
}

View File

@ -0,0 +1,157 @@
#![deny(rust_2018_idioms)]
#![warn(
missing_copy_implementations,
missing_debug_implementations,
clippy::explicit_iter_loop,
clippy::use_self
)]
use clap::{App, Arg};
use generated_types::influxdata::iox::management::v1::{
self as management, database_rules::*, lifecycle_rules::*, *,
};
#[tokio::main]
async fn main() {
let help = r#"IOx database creator
Examples:
# Create a database named `foo_bar` with the IOx server listening at the default gRPC address:
create_database foo_bar
# Create a database named `myorg_mybucket` with the IOx server listening at
# 127.0.0.1:9000:
create_database --grpc-bind 127.0.0.1:9000 myorg_mybucket
"#;
let matches = App::new(help)
.about("IOx Database creation script")
.arg(
Arg::with_name("DATABASE_NAME")
.help("Name of the database to create")
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("WRITER")
.long("writer")
.help("The gRPC host and port of the IOx server that should write to Kafka")
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("READER")
.long("reader")
.help("The gRPC host and port of the IOx server that should read from Kafka")
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("KAFKA")
.long("kafka")
.help("The connection address of the Kafka instance")
.takes_value(true)
.default_value("127.0.0.1:9093"),
)
.get_matches();
let db_name = matches
.value_of("DATABASE_NAME")
.expect("DATABASE_NAME is required")
.to_string();
let writer = matches.value_of("WRITER").expect("WRITER is required");
let reader = matches.value_of("READER").expect("READER is required");
let kafka = matches
.value_of("KAFKA")
.expect("KAFKA has a default value");
// Edit these to whatever DatabaseRules you want to use
let writer_database_rules = DatabaseRules {
name: db_name.clone(),
partition_template: Some(PartitionTemplate {
parts: vec![partition_template::Part {
part: Some(partition_template::part::Part::Time(
"%Y-%m-%d %H:00:00".into(),
)),
}],
}),
lifecycle_rules: Some(LifecycleRules {
immutable: true,
..Default::default()
}),
worker_cleanup_avg_sleep: None,
routing_rules: Some(RoutingRules::RoutingConfig(RoutingConfig {
sink: Some(management::Sink {
sink: Some(management::sink::Sink::Kafka(KafkaProducer {})),
}),
})),
write_buffer_connection: Some(WriteBufferConnection::Writing(kafka.to_string())),
};
let reader_database_rules = DatabaseRules {
name: db_name.clone(),
partition_template: Some(PartitionTemplate {
parts: vec![partition_template::Part {
part: Some(partition_template::part::Part::Time(
"%Y-%m-%d %H:00:00".into(),
)),
}],
}),
lifecycle_rules: Some(LifecycleRules {
buffer_size_soft: 1024 * 1024 * 1024,
buffer_size_hard: 1024 * 1024 * 1024 * 2,
worker_backoff_millis: 100,
max_active_compactions_cfg: Some(MaxActiveCompactionsCfg::MaxActiveCompactions(1)),
persist: true,
persist_row_threshold: 10 * 1000 * 1000,
..Default::default()
}),
worker_cleanup_avg_sleep: None,
routing_rules: Some(RoutingRules::RoutingConfig(RoutingConfig {
sink: Some(management::Sink {
sink: Some(management::sink::Sink::Kafka(KafkaProducer {})),
}),
})),
write_buffer_connection: Some(WriteBufferConnection::Reading(kafka.to_string())),
};
// Create the writer db
let writer_grpc_bind_addr = format!("http://{}", writer);
let writer_grpc_channel = influxdb_iox_client::connection::Builder::default()
.build(writer_grpc_bind_addr)
.await
.unwrap();
let mut writer_management_client =
influxdb_iox_client::management::Client::new(writer_grpc_channel.clone());
writer_management_client
.create_database(writer_database_rules)
.await
.expect("create writer database failed");
// Write a few points
let mut write_client = influxdb_iox_client::write::Client::new(writer_grpc_channel);
let lp_lines = [
"write_test,region=west user=23.2 100",
"write_test,region=west user=21.0 150",
"write_test,region=east bytes=99i 200",
];
let num_lines_written = write_client
.write(&db_name, lp_lines.join("\n"))
.await
.expect("cannot write");
assert_eq!(num_lines_written, 3);
// Create the reader db
let reader_grpc_bind_addr = format!("http://{}", reader);
let reader_grpc_channel = influxdb_iox_client::connection::Builder::default()
.build(reader_grpc_bind_addr)
.await
.unwrap();
let mut reader_management_client =
influxdb_iox_client::management::Client::new(reader_grpc_channel.clone());
reader_management_client
.create_database(reader_database_rules)
.await
.expect("create reader database failed");
println!("Created database {}", db_name);
}

View File

@ -0,0 +1,777 @@
//! Generating a set of field keys and values given a specification
use crate::{
now_ns, specification,
substitution::{pick_from_replacements, Substitute},
DataGenRng, RandomNumberGenerator,
};
use influxdb2_client::models::FieldValue;
use rand::Rng;
use serde::Serialize;
use snafu::{ResultExt, Snafu};
use std::{collections::BTreeMap, fmt, ops::Range, time::Duration};
/// Field-specific Results
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Errors that may happen while creating fields
#[derive(Snafu, Debug)]
pub enum Error {
/// Error that may happen when substituting placeholder values
#[snafu(display("Could not create field name, caused by:\n{}", source))]
CouldNotCreateFieldName {
/// Underlying `substitution` module error that caused this problem
source: crate::substitution::Error,
},
/// Error that may happen when substituting placeholder values
#[snafu(display("Could not compile field name template, caused by:\n{}", source))]
CouldNotCompileStringTemplate {
/// Underlying `substitution` module error that caused this problem
source: crate::substitution::Error,
},
}
/// A generated field value that will be used in a generated data point.
#[derive(Debug, PartialEq)]
pub struct Field {
/// The key for the field
pub key: String,
/// The value for the field
pub value: FieldValue,
}
impl Field {
/// Create a new field with the given key and value.
pub fn new(key: impl Into<String>, value: impl Into<FieldValue>) -> Self {
Self {
key: key.into(),
value: value.into(),
}
}
}
/// A set of `count` fields that have the same configuration but different
/// `field_id`s.
pub struct FieldGeneratorSet {
field_generators: Vec<Box<dyn FieldGenerator + Send>>,
}
// field_generators doesn't implement Debug
impl fmt::Debug for FieldGeneratorSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("FieldGeneratorSet")
.field("field_generators", &"(dynamic)")
.finish()
}
}
impl FieldGeneratorSet {
/// Create a new set of field generators for a particular agent,
/// measurement, and field specification.
pub fn new<T: DataGenRng>(
agent_name: &str,
agent_id: usize,
measurement_id: usize,
spec: &specification::FieldSpec,
parent_seed: &str,
execution_start_time: i64,
) -> Result<Self> {
let count = spec.count.unwrap_or(1);
let field_generators = (0..count)
.map(|field_id| {
field_spec_to_generator::<T>(
agent_name,
agent_id,
measurement_id,
field_id,
spec,
parent_seed,
execution_start_time,
)
})
.collect::<Result<_>>()?;
Ok(Self { field_generators })
}
/// Create one set of fields
pub fn generate(&mut self, timestamp: i64) -> Vec<Field> {
self.field_generators
.iter_mut()
.map(|fg| fg.generate(timestamp))
.collect()
}
}
trait FieldGenerator {
fn generate(&mut self, timestamp: i64) -> Field;
}
/// Generate boolean field names and values.
#[derive(Debug)]
pub struct BooleanFieldGenerator<T: DataGenRng> {
name: String,
rng: RandomNumberGenerator<T>,
}
impl<T: DataGenRng> BooleanFieldGenerator<T> {
/// Create a new boolean field generator that will always use the specified
/// name.
pub fn new(name: &str, parent_seed: &str) -> Self {
let name = name.into();
let seed = format!("{}-{}", parent_seed, name);
let rng = RandomNumberGenerator::<T>::new(seed);
Self { name, rng }
}
}
impl<T: DataGenRng> FieldGenerator for BooleanFieldGenerator<T> {
fn generate(&mut self, _timestamp: i64) -> Field {
let b: bool = self.rng.gen();
Field::new(&self.name, b)
}
}
/// Generate integer field names and values.
#[derive(Debug)]
pub struct I64FieldGenerator<T: DataGenRng> {
name: String,
range: Range<i64>,
increment: bool,
rng: RandomNumberGenerator<T>,
previous_value: i64,
reset_after: Option<usize>,
current_tick: usize,
}
impl<T: DataGenRng> I64FieldGenerator<T> {
/// Create a new integer field generator that will always use the specified
/// name.
pub fn new(
name: impl Into<String>,
range: &Range<i64>,
increment: bool,
reset_after: Option<usize>,
parent_seed: impl fmt::Display,
) -> Self {
let name = name.into();
let seed = format!("{}-{}", parent_seed, name);
let rng = RandomNumberGenerator::<T>::new(seed);
Self {
name,
range: range.to_owned(),
increment,
rng,
previous_value: 0,
reset_after,
current_tick: 0,
}
}
}
impl<T: DataGenRng> FieldGenerator for I64FieldGenerator<T> {
fn generate(&mut self, _timestamp: i64) -> Field {
let mut value = if self.range.start == self.range.end {
self.range.start
} else {
self.rng.gen_range(self.range.clone())
};
if self.increment {
self.previous_value = self.previous_value.wrapping_add(value);
value = self.previous_value;
if let Some(reset) = self.reset_after {
self.current_tick += 1;
if self.current_tick >= reset {
self.previous_value = 0;
self.current_tick = 0;
}
}
}
Field::new(&self.name, value)
}
}
/// Generate floating point field names and values.
#[derive(Debug)]
pub struct F64FieldGenerator<T: DataGenRng> {
name: String,
range: Range<f64>,
rng: RandomNumberGenerator<T>,
}
impl<T: DataGenRng> F64FieldGenerator<T> {
/// Create a new floating point field generator that will always use the
/// specified name.
pub fn new(
name: impl Into<String>,
range: &Range<f64>,
parent_seed: impl fmt::Display,
) -> Self {
let name = name.into();
let seed = format!("{}-{}", parent_seed, name);
let rng = RandomNumberGenerator::<T>::new(seed);
Self {
name,
range: range.to_owned(),
rng,
}
}
}
impl<T: DataGenRng> FieldGenerator for F64FieldGenerator<T> {
fn generate(&mut self, _timestamp: i64) -> Field {
let value = if (self.range.start - self.range.end).abs() < f64::EPSILON {
self.range.start
} else {
self.rng.gen_range(self.range.clone())
};
Field::new(&self.name, value)
}
}
/// Generate string field names and values.
#[derive(Debug)]
pub struct StringFieldGenerator<T: DataGenRng> {
agent_name: String,
name: String,
substitute: Substitute,
rng: RandomNumberGenerator<T>,
replacements: Vec<specification::Replacement>,
}
impl<T: DataGenRng> StringFieldGenerator<T> {
/// Create a new string field generator
pub fn new(
agent_name: impl Into<String>,
name: impl Into<String>,
pattern: impl Into<String>,
parent_seed: impl fmt::Display,
replacements: Vec<specification::Replacement>,
) -> Result<Self> {
let name = name.into();
let seed = format!("{}-{}", parent_seed, name);
let rng = RandomNumberGenerator::<T>::new(seed);
let substitute = Substitute::new(pattern, RandomNumberGenerator::<T>::new(&rng.seed))
.context(CouldNotCompileStringTemplate {})?;
Ok(Self {
agent_name: agent_name.into(),
name,
substitute,
rng,
replacements,
})
}
}
impl<T: DataGenRng> FieldGenerator for StringFieldGenerator<T> {
fn generate(&mut self, timestamp: i64) -> Field {
#[derive(Serialize)]
struct Values<'a> {
#[serde(flatten)]
replacements: BTreeMap<&'a str, &'a str>,
agent_name: &'a str,
timestamp: i64,
}
let values = Values {
replacements: pick_from_replacements(&mut self.rng, &self.replacements),
agent_name: &self.agent_name,
timestamp,
};
let value = self
.substitute
.evaluate(&values)
.expect("Unable to substitute string field value");
Field::new(&self.name, value)
}
}
/// Generate an i64 field that has the name `uptime` and the value of the number
/// of seconds since the data generator started running
#[derive(Debug)]
pub struct UptimeFieldGenerator {
name: String,
execution_start_time: i64,
kind: specification::UptimeKind,
}
impl UptimeFieldGenerator {
fn new(
name: impl Into<String>,
kind: &specification::UptimeKind,
execution_start_time: i64,
) -> Self {
Self {
name: name.into(),
kind: *kind,
execution_start_time,
}
}
}
impl FieldGenerator for UptimeFieldGenerator {
fn generate(&mut self, _timestamp: i64) -> Field {
use specification::UptimeKind::*;
let elapsed = Duration::from_nanos((now_ns() - self.execution_start_time) as u64);
let elapsed_seconds = elapsed.as_secs();
match self.kind {
I64 => Field::new(&self.name, elapsed_seconds as i64),
Telegraf => {
let days = elapsed_seconds / (60 * 60 * 24);
let days_plural = if days == 1 { "" } else { "s" };
let mut minutes = elapsed_seconds / 60;
let mut hours = minutes / 60;
hours %= 24;
minutes %= 60;
let duration_string =
format!("{} day{}, {:02}:{:02}", days, days_plural, hours, minutes);
Field::new(&self.name, duration_string)
}
}
}
}
fn field_spec_to_generator<T: DataGenRng>(
agent_name: &str,
agent_id: usize,
measurement_id: usize,
field_id: usize,
spec: &specification::FieldSpec,
parent_seed: &str,
execution_start_time: i64,
) -> Result<Box<dyn FieldGenerator + Send>> {
use specification::FieldValueSpec::*;
let spec_name = Substitute::once(
&spec.name,
&[
("agent_id", &agent_id.to_string()),
("measurement_id", &measurement_id.to_string()),
("field_id", &field_id.to_string()),
],
)
.context(CouldNotCreateFieldName)?;
Ok(match &spec.field_value_spec {
Bool(true) => Box::new(BooleanFieldGenerator::<T>::new(&spec_name, parent_seed)),
Bool(false) => unimplemented!("Not sure what false means for bool fields yet"),
I64 {
range,
increment,
reset_after,
} => Box::new(I64FieldGenerator::<T>::new(
&spec_name,
range,
*increment,
*reset_after,
parent_seed,
)),
F64 { range } => Box::new(F64FieldGenerator::<T>::new(&spec_name, range, parent_seed)),
String {
pattern,
replacements,
} => Box::new(StringFieldGenerator::<T>::new(
agent_name,
&spec_name,
pattern,
parent_seed,
replacements.to_vec(),
)?),
Uptime { kind } => Box::new(UptimeFieldGenerator::new(
&spec_name,
kind,
execution_start_time,
)),
})
}
#[cfg(test)]
mod test {
use super::*;
use crate::{DynamicRng, ZeroRng, TEST_SEED};
use test_helpers::approximately_equal;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
// Shortcut functions that panic for getting values out of fields for test convenience
impl Field {
fn i64(&self) -> i64 {
match self.value {
FieldValue::I64(v) => v,
ref other => panic!("expected i64, got {:?}", other),
}
}
fn f64(&self) -> f64 {
match self.value {
FieldValue::F64(v) => v,
ref other => panic!("expected f64, got {:?}", other),
}
}
fn bool(&self) -> bool {
match self.value {
FieldValue::Bool(v) => v,
ref other => panic!("expected bool, got {:?}", other),
}
}
fn string(&self) -> String {
match &self.value {
FieldValue::String(v) => v.clone(),
ref other => panic!("expected String, got {:?}", other),
}
}
}
#[test]
fn generate_boolean_field() {
let mut bfg = BooleanFieldGenerator::<ZeroRng>::new("bfg", TEST_SEED);
assert!(!bfg.generate(1234).bool());
}
#[test]
fn generate_i64_field_always_the_same() {
// If the specification has the same number for the start and end of the
// range...
let mut i64fg =
I64FieldGenerator::<DynamicRng>::new("i64fg", &(3..3), false, None, TEST_SEED);
let i64_fields: Vec<_> = (0..10).map(|_| i64fg.generate(1234).i64()).collect();
let expected = i64_fields[0];
// All the values generated will always be the same.
assert!(
i64_fields.iter().all(|f| *f == expected),
"{:?}",
i64_fields
);
// If the specification has n for the start and n+1 for the end of the range...
let mut i64fg =
I64FieldGenerator::<DynamicRng>::new("i64fg", &(4..5), false, None, TEST_SEED);
let i64_fields: Vec<_> = (0..10).map(|_| i64fg.generate(1234).i64()).collect();
// We know what the value will be even though we're using a real random number generator
let expected = 4;
// All the values generated will also always be the same, because the end of the
// range is exclusive.
assert!(
i64_fields.iter().all(|f| *f == expected),
"{:?}",
i64_fields
);
}
#[test]
fn generate_i64_field_within_a_range() {
let range = 3..1000;
let mut i64fg =
I64FieldGenerator::<DynamicRng>::new("i64fg", &range, false, None, TEST_SEED);
let val = i64fg.generate(1234).i64();
assert!(range.contains(&val), "`{}` was not in the range", val);
}
#[test]
fn generate_incrementing_i64_field() {
let mut i64fg =
I64FieldGenerator::<DynamicRng>::new("i64fg", &(3..10), true, None, TEST_SEED);
let val1 = i64fg.generate(1234).i64();
let val2 = i64fg.generate(1234).i64();
let val3 = i64fg.generate(1234).i64();
let val4 = i64fg.generate(1234).i64();
assert!(val1 < val2, "`{}` < `{}` was false", val1, val2);
assert!(val2 < val3, "`{}` < `{}` was false", val2, val3);
assert!(val3 < val4, "`{}` < `{}` was false", val3, val4);
}
#[test]
fn incrementing_i64_wraps() {
let rng = RandomNumberGenerator::<DynamicRng>::new(TEST_SEED);
let range = 3..10;
let previous_value = i64::MAX;
// Construct by hand to set the previous value at the end of i64's range
let mut i64fg = I64FieldGenerator {
name: "i64fg".into(),
range: range.clone(),
increment: true,
reset_after: None,
rng,
previous_value,
current_tick: 0,
};
let resulting_range =
range.start.wrapping_add(previous_value)..range.end.wrapping_add(previous_value);
let val = i64fg.generate(1234).i64();
assert!(
resulting_range.contains(&val),
"`{}` was not in the range",
val
);
}
#[test]
fn incrementing_i64_that_resets() {
let reset_after = Some(3);
let mut i64fg =
I64FieldGenerator::<DynamicRng>::new("i64fg", &(3..10), true, reset_after, TEST_SEED);
let val1 = i64fg.generate(1234).i64();
let val2 = i64fg.generate(1234).i64();
let val3 = i64fg.generate(1234).i64();
let val4 = i64fg.generate(1234).i64();
assert!(val1 < val2, "`{}` < `{}` was false", val1, val2);
assert!(val2 < val3, "`{}` < `{}` was false", val2, val3);
assert!(val4 < val3, "`{}` < `{}` was false", val4, val3);
}
#[test]
fn generate_f64_field_always_the_same() {
// If the specification has the same number for the start and end of the
// range...
let start_and_end = 3.0;
let range = start_and_end..start_and_end;
let mut f64fg = F64FieldGenerator::<DynamicRng>::new("f64fg", &range, TEST_SEED);
let f64_fields: Vec<_> = (0..10).map(|_| f64fg.generate(1234).f64()).collect();
// All the values generated will always be the same known value.
assert!(
f64_fields
.iter()
.all(|f| approximately_equal(*f, start_and_end)),
"{:?}",
f64_fields
);
}
#[test]
fn generate_f64_field_within_a_range() {
let range = 3.0..1000.0;
let mut f64fg = F64FieldGenerator::<DynamicRng>::new("f64fg", &range, TEST_SEED);
let val = f64fg.generate(1234).f64();
assert!(range.contains(&val), "`{}` was not in the range", val);
}
#[test]
fn generate_string_field_without_replacements() {
let fake_now = 11111;
let mut stringfg = StringFieldGenerator::<DynamicRng>::new(
"agent_name",
"stringfg",
"my value",
TEST_SEED,
vec![],
)
.unwrap();
assert_eq!("my value", stringfg.generate(fake_now).string());
}
#[test]
fn generate_string_field_with_provided_replacements() {
let fake_now = 5555555555;
let mut stringfg = StringFieldGenerator::<DynamicRng>::new(
"double-oh-seven",
"stringfg",
r#"{{agent_name}}---{{random 16}}---{{format-time "%s%f"}}"#,
TEST_SEED,
vec![],
)
.unwrap();
let string_val1 = stringfg.generate(fake_now).string();
let string_val2 = stringfg.generate(fake_now).string();
assert!(
string_val1.starts_with("double-oh-seven---"),
"`{}` did not start with `double-oh-seven---`",
string_val1
);
assert!(
string_val1.ends_with("---5555555555"),
"`{}` did not end with `---5555555555`",
string_val1
);
assert!(
string_val2.starts_with("double-oh-seven---"),
"`{}` did not start with `double-oh-seven---`",
string_val2
);
assert!(
string_val2.ends_with("---5555555555"),
"`{}` did not end with `---5555555555`",
string_val2
);
assert_ne!(string_val1, string_val2, "random value should change");
}
#[test]
#[should_panic(expected = "Unable to substitute string field value")]
fn unknown_replacement_errors() {
let fake_now = 55555;
let mut stringfg = StringFieldGenerator::<DynamicRng>::new(
"arbitrary",
"stringfg",
"static-{{unknown}}",
TEST_SEED,
vec![],
)
.unwrap();
stringfg.generate(fake_now);
}
#[test]
fn replacements_no_weights() -> Result<()> {
let fake_now = 55555;
let toml: specification::FieldSpec = toml::from_str(
r#"
name = "sf"
pattern = "foo {{level}}"
replacements = [
{replace = "level", with = ["info", "warn", "error"]}
]"#,
)
.unwrap();
let mut stringfg =
field_spec_to_generator::<ZeroRng>("agent_name", 0, 0, 0, &toml, TEST_SEED, fake_now)?;
assert_eq!("foo info", stringfg.generate(fake_now).string());
Ok(())
}
#[test]
fn replacements_with_weights() -> Result<()> {
let fake_now = 55555;
let toml: specification::FieldSpec = toml::from_str(
r#"
name = "sf"
pattern = "foo {{level}}"
replacements = [
{replace = "level", with = [["info", 1000000], ["warn", 1], ["error", 0]]}
]"#,
)
.unwrap();
let mut stringfg =
field_spec_to_generator::<ZeroRng>("agent_name", 0, 0, 0, &toml, TEST_SEED, fake_now)?;
assert_eq!("foo info", stringfg.generate(fake_now).string());
Ok(())
}
#[test]
fn uptime_i64() -> Result<()> {
let fake_now = 55555;
// Pretend data generator started running 10 seconds ago
let seconds_ago = 10;
let fake_start_execution_time = now_ns() - seconds_ago * 1_000_000_000;
let toml: specification::FieldSpec = toml::from_str(
r#"
name = "arbitrary" # field name doesn't have to be uptime
uptime = "i64""#,
)
.unwrap();
let mut uptimefg = field_spec_to_generator::<DynamicRng>(
"agent_name",
0,
0,
0,
&toml,
TEST_SEED,
fake_start_execution_time,
)?;
assert_eq!(seconds_ago, uptimefg.generate(fake_now).i64());
Ok(())
}
#[test]
fn uptime_telegraf() -> Result<()> {
let fake_now = 55555;
// Pretend data generator started running 10 days, 2 hours, and 33 minutes ago
let seconds_ago = 10 * 24 * 60 * 60 + 2 * 60 * 60 + 33 * 60;
let fake_start_execution_time = now_ns() - seconds_ago * 1_000_000_000;
let toml: specification::FieldSpec = toml::from_str(
r#"
name = "arbitrary" # field name doesn't have to be uptime
uptime = "telegraf""#,
)
.unwrap();
let mut uptimefg = field_spec_to_generator::<DynamicRng>(
"agent_name",
0,
0,
0,
&toml,
TEST_SEED,
fake_start_execution_time,
)?;
assert_eq!("10 days, 02:33", uptimefg.generate(fake_now).string());
// Pretend data generator started running 1 day, 14 hours, and 5 minutes ago
// to exercise different formatting
let seconds_in_1_day = 24 * 60 * 60;
let seconds_in_14_hours = 14 * 60 * 60;
let seconds_in_5_minutes = 5 * 60;
let seconds_ago = seconds_in_1_day + seconds_in_14_hours + seconds_in_5_minutes;
let fake_start_execution_time = now_ns() - seconds_ago * 1_000_000_000;
let mut uptimefg = field_spec_to_generator::<DynamicRng>(
"agent_name",
0,
0,
0,
&toml,
TEST_SEED,
fake_start_execution_time,
)?;
assert_eq!("1 day, 14:05", uptimefg.generate(fake_now).string());
Ok(())
}
}

View File

@ -0,0 +1,357 @@
//! This crate contains structures and generators for specifying how to generate
//! historical and real-time test data for Delorean. The rules for how to
//! generate data and what shape it should take can be specified in a TOML file.
//!
//! Generators can output in line protocol, Parquet, or can be used to generate
//! real-time load on a server that implements the [InfluxDB 2.0 write
//! path][write-api].
//!
//! [write-api]: https://v2.docs.influxdata.com/v2.0/api/#tag/Write
//!
//! While this generator could be compared to [the Go based one that creates TSM
//! data][go-gen], its purpose is meant to be more far reaching. In addition to
//! generating historical data, it should be useful for generating data in a
//! sequence as you would expect it to arrive in a production environment. That
//! means many agents sending data with their different tags and timestamps.
//!
//! [go-gen]: https://github.com/influxdata/influxdb/pull/12710
#![deny(rust_2018_idioms)]
#![warn(
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
clippy::explicit_iter_loop,
clippy::use_self
)]
use crate::substitution::Substitute;
use rand::Rng;
use rand_seeder::Seeder;
use snafu::{ResultExt, Snafu};
use std::{
convert::TryFrom,
time::{SystemTime, UNIX_EPOCH},
};
pub mod agent;
pub mod field;
pub mod measurement;
pub mod specification;
pub mod substitution;
pub mod tag;
pub mod write;
/// Errors that may happen while generating points.
#[derive(Snafu, Debug)]
pub enum Error {
/// Error that may happen when waiting on a tokio task
#[snafu(display("Could not join tokio task: {}", source))]
TokioError {
/// Underlying tokio error that caused this problem
source: tokio::task::JoinError,
},
/// Error that may happen when constructing an agent name
#[snafu(display("Could not create agent name, caused by:\n{}", source))]
CouldNotCreateAgentName {
/// Underlying `substitution` module error that caused this problem
source: substitution::Error,
},
/// Error that may happen when an agent generates points
#[snafu(display("Agent could not generate points, caused by:\n{}", source))]
AgentCouldNotGeneratePoints {
/// Underlying `agent` module error that caused this problem
source: agent::Error,
},
/// Error that may happen when creating agents
#[snafu(display("Could not create agent `{}`, caused by:\n{}", name, source))]
CouldNotCreateAgent {
/// The name of the relevant agent
name: String,
/// Underlying `agent` module error that caused this problem
source: agent::Error,
},
}
type Result<T, E = Error> = std::result::Result<T, E>;
/// Generate data from the configuration in the spec.
///
/// Provide a writer that the line protocol should be written to.
///
/// If `start_datetime` or `end_datetime` are `None`, the current datetime will
/// be used.
pub async fn generate<T: DataGenRng>(
spec: &specification::DataSpec,
points_writer_builder: &mut write::PointsWriterBuilder,
start_datetime: Option<i64>,
end_datetime: Option<i64>,
execution_start_time: i64,
continue_on: bool,
) -> Result<usize> {
let seed = spec.base_seed.to_owned().unwrap_or_else(|| {
let mut rng = rand::thread_rng();
format!("{:04}", rng.gen_range(0..10000))
});
let mut handles = vec![];
// for each agent specification
for agent_spec in &spec.agents {
// create iterators to `cycle` through for `agent_spec.tags`
let tag_set_iterator = tag::AgentTagIterator::new(&agent_spec.tags);
// create `count` number of agent instances, or 1 agent if no count is specified
let n_agents = agent_spec.count.unwrap_or(1);
for (agent_id, mut agent_tags) in tag_set_iterator.take(n_agents).enumerate() {
let agent_name =
Substitute::once(&agent_spec.name, &[("agent_id", &agent_id.to_string())])
.context(CouldNotCreateAgentName)?;
agent_tags.push(tag::Tag::new("data_spec", &spec.name));
if let Some(name_tag_key) = &agent_spec.name_tag_key {
agent_tags.push(tag::Tag::new(name_tag_key, &agent_name));
}
let mut agent = agent::Agent::<T>::new(
agent_spec,
&agent_name,
agent_id,
&seed,
agent_tags,
start_datetime,
end_datetime,
execution_start_time,
continue_on,
)
.context(CouldNotCreateAgent { name: &agent_name })?;
let agent_points_writer = points_writer_builder.build_for_agent(&agent_name);
handles.push(tokio::task::spawn(async move {
agent.generate_all(agent_points_writer).await
}));
}
}
let mut total_points = 0;
for handle in handles {
total_points += handle
.await
.context(TokioError)?
.context(AgentCouldNotGeneratePoints)?;
}
Ok(total_points)
}
/// Shorthand trait for the functionality this crate needs a random number generator to have
pub trait DataGenRng: rand::Rng + rand::SeedableRng + Send + 'static {}
impl<T: rand::Rng + rand::SeedableRng + Send + 'static> DataGenRng for T {}
/// Encapsulating the creation of an optionally-seedable random number generator
/// to make this easy to change. Uses a 4-digit number expressed as a `String`
/// as the seed type to enable easy creation of another instance using the same
/// seed.
#[derive(Debug)]
pub struct RandomNumberGenerator<T: DataGenRng> {
rng: T,
/// The seed used for this instance.
pub seed: String,
}
impl<T: DataGenRng> Default for RandomNumberGenerator<T> {
fn default() -> Self {
let mut rng = rand::thread_rng();
let seed = format!("{:04}", rng.gen_range(0..10000));
Self::new(seed)
}
}
impl<T: DataGenRng> RandomNumberGenerator<T> {
/// Create a new instance using the specified seed.
pub fn new(seed: impl Into<String>) -> Self {
let seed = seed.into();
Self {
rng: Seeder::from(&seed).make_rng(),
seed,
}
}
/// Generate a random GUID
pub fn guid(&mut self) -> uuid::Uuid {
let mut bytes = [0u8; 16];
self.rng.fill_bytes(&mut bytes);
uuid::Builder::from_bytes(bytes)
.set_variant(uuid::Variant::RFC4122)
.set_version(uuid::Version::Random)
.build()
}
}
impl<T: DataGenRng> rand::RngCore for RandomNumberGenerator<T> {
fn next_u32(&mut self) -> u32 {
self.rng.next_u32()
}
fn next_u64(&mut self) -> u64 {
self.rng.next_u64()
}
fn fill_bytes(&mut self, dest: &mut [u8]) {
self.rng.fill_bytes(dest);
}
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> std::result::Result<(), rand::Error> {
self.rng.try_fill_bytes(dest)
}
}
/// Gets the current time in nanoseconds since the epoch
pub fn now_ns() -> i64 {
let since_the_epoch = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
i64::try_from(since_the_epoch.as_nanos()).expect("Time does not fit")
}
// Always returns 0.
#[cfg(test)]
#[derive(Default)]
struct ZeroRng;
#[cfg(test)]
impl rand::RngCore for ZeroRng {
fn next_u32(&mut self) -> u32 {
self.next_u64() as u32
}
fn next_u64(&mut self) -> u64 {
0
}
fn fill_bytes(&mut self, dest: &mut [u8]) {
rand_core::impls::fill_bytes_via_next(self, dest)
}
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> std::result::Result<(), rand::Error> {
self.fill_bytes(dest);
Ok(())
}
}
#[cfg(test)]
impl rand::SeedableRng for ZeroRng {
type Seed = Vec<u8>;
// Ignore the seed value
fn from_seed(_seed: Self::Seed) -> Self {
Self
}
}
// The test rng ignores the seed anyway, so the seed specified doesn't matter.
#[cfg(test)]
const TEST_SEED: &str = "";
#[cfg(test)]
fn test_rng() -> RandomNumberGenerator<ZeroRng> {
RandomNumberGenerator::<ZeroRng>::new(TEST_SEED)
}
// A random number type that does *not* have a predictable sequence of values for use in tests
// that assert on properties rather than exact values. Aliased for convenience in changing to
// a different Rng type.
#[cfg(test)]
type DynamicRng = rand::rngs::SmallRng;
#[cfg(test)]
mod test {
use super::*;
use crate::specification::*;
use influxdb2_client::models::WriteDataPoint;
use std::str::FromStr;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
#[tokio::test]
async fn historical_data_sampling_interval() -> Result<()> {
let toml = r#"
name = "demo_schema"
[[agents]]
name = "basic"
sampling_interval = 10 # seconds
[[agents.measurements]]
name = "cpu"
[[agents.measurements.fields]]
name = "up"
bool = true"#;
let data_spec = DataSpec::from_str(toml).unwrap();
let agent_id = 0;
let agent_spec = &data_spec.agents[0];
// Take agent_tags out of the equation for the purposes of this test
let agent_tags = vec![];
let execution_start_time = now_ns();
// imagine we've specified at the command line that we want to generate metrics
// for 1970
let start_datetime = Some(0);
// for the first 15 seconds of the year
let end_datetime = Some(15 * 1_000_000_000);
let mut agent = agent::Agent::<ZeroRng>::new(
agent_spec,
&agent_spec.name,
agent_id,
TEST_SEED,
agent_tags,
start_datetime,
end_datetime,
execution_start_time,
false,
)?;
let data_points = agent.generate().await?;
let mut v = Vec::new();
for data_point in data_points {
data_point.write_data_point_to(&mut v).unwrap();
}
let line_protocol = String::from_utf8(v).unwrap();
// Get a point for time 0
let expected_line_protocol = "cpu up=f 0\n";
assert_eq!(line_protocol, expected_line_protocol);
let data_points = agent.generate().await?;
let mut v = Vec::new();
for data_point in data_points {
data_point.write_data_point_to(&mut v).unwrap();
}
let line_protocol = String::from_utf8(v).unwrap();
// Get a point for time 10s
let expected_line_protocol = "cpu up=f 10000000000\n";
assert_eq!(line_protocol, expected_line_protocol);
// Don't get any points anymore because we're past the ending datetime
let data_points = agent.generate().await?;
assert!(
data_points.is_empty(),
"expected no data points, got {:?}",
data_points
);
Ok(())
}
}

View File

@ -0,0 +1,265 @@
#![deny(rust_2018_idioms)]
#![warn(
missing_copy_implementations,
missing_debug_implementations,
clippy::explicit_iter_loop,
clippy::use_self
)]
use chrono::prelude::*;
use chrono_english::{parse_date_string, Dialect};
use clap::{crate_authors, crate_version, App, Arg};
use iox_data_generator::{specification::DataSpec, write::PointsWriterBuilder};
use tracing::info;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt::init();
let help = r#"IOx data point generator
Examples:
# Generate data points using the specification in `spec.toml` and save in the `lp` directory
iox_data_generator -s spec.toml -o lp
# Generate data points and write to the server running at localhost:8080 with the provided org,
# bucket and authorization token, creating the bucket
iox_data_generator -s spec.toml -h localhost:8080 --org myorg --org_id 0000111100001111 \
--bucket mybucket --token mytoken --create
# Generate data points for the 24 hours between midnight 2020-01-01 and 2020-01-02
iox_data_generator -s spec.toml -o lp --start 2020-01-01 --end 2020-01-02
# Generate data points starting from an hour ago until now, generating the historical data as
# fast as possible. Then generate data according to the sampling interval until terminated.
iox_data_generator -s spec.toml -o lp --start "1 hr ago" --continue
Logging:
Use the RUST_LOG environment variable to configure the desired logging level.
For example:
# Enable INFO level logging for all of iox_data_generator
RUST_LOG=iox_data_generator=info iox_data_generator -s spec.toml -o lp
"#;
let matches = App::new(help)
.version(crate_version!())
.author(crate_authors!())
.about("IOx data point generator")
.arg(
Arg::with_name("SPECIFICATION")
.short("s")
.long("spec")
.help("Path to the specification TOML file describing the data generation")
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("OUTPUT")
.short("o")
.long("output")
.help("The filename to write line protocol")
.takes_value(true),
)
.arg(
Arg::with_name("HOST")
.short("h")
.long("host")
.help("The host name part of the API endpoint to write to")
.takes_value(true),
)
.arg(
Arg::with_name("ORG")
.long("org")
.help("The organization name to write to")
.takes_value(true),
)
.arg(
Arg::with_name("ORG_ID")
.long("org_id")
.help("The 16-digit hex ID of the organization. Only needed if passing `--create`.")
.takes_value(true),
)
.arg(
Arg::with_name("BUCKET")
.long("bucket")
.help("The bucket name to write to")
.takes_value(true),
)
.arg(
Arg::with_name("TOKEN")
.long("token")
.help("The API authorization token used for all requests")
.takes_value(true),
)
.arg(
Arg::with_name("START")
.long("start")
.help(
"The date and time at which to start the timestamps of the generated data. \
Can be an exact datetime like `2020-01-01T01:23:45-05:00` or a fuzzy \
specification like `1 hour ago`. If not specified, defaults to now.",
)
.takes_value(true),
)
.arg(
Arg::with_name("END")
.long("end")
.help(
"The date and time at which to stop the timestamps of the generated data. \
Can be an exact datetime like `2020-01-01T01:23:45-05:00` or a fuzzy \
specification like `1 hour ago`. If not specified, defaults to now.",
)
.takes_value(true),
)
.arg(
Arg::with_name("create")
.long("create")
.help("Create the bucket specified before sending points. Requires `--org_id`"),
)
.arg(Arg::with_name("continue").long("continue").help(
"Generate live data using the intervals from the spec after generating historical \
data. This option has no effect if you specify an end time.",
))
.get_matches();
let spec_filename = matches
.value_of("SPECIFICATION")
// This should never fail if clap is working properly
.expect("SPECIFICATION is a required argument");
let execution_start_time = Local::now();
let start_datetime = datetime_nanoseconds(matches.value_of("START"), execution_start_time);
let end_datetime = datetime_nanoseconds(matches.value_of("END"), execution_start_time);
let start_display = start_datetime.unwrap_or_else(|| execution_start_time.timestamp_nanos());
let end_display = end_datetime.unwrap_or_else(|| execution_start_time.timestamp_nanos());
let continue_on = matches.is_present("continue");
info!(
"Starting at {}, ending at {} ({}){}",
start_display,
end_display,
(end_display - start_display) / 1_000_000_000,
if continue_on { " then continuing" } else { "" },
);
let data_spec = DataSpec::from_file(spec_filename)?;
// TODO: parquet output
let mut points_writer_builder = if let Some(line_protocol_filename) = matches.value_of("OUTPUT")
{
PointsWriterBuilder::new_file(line_protocol_filename)?
} else if let Some(host) = matches.value_of("HOST") {
let (host, org, bucket, token, create_bucket, org_id) = validate_api_arguments(
host,
matches.value_of("ORG"),
matches.value_of("BUCKET"),
matches.value_of("TOKEN"),
matches.is_present("create"),
matches.value_of("ORG_ID"),
);
PointsWriterBuilder::new_api(host, org, bucket, token, create_bucket, org_id).await?
} else {
panic!("One of --output or --host must be provided.");
};
let result = iox_data_generator::generate::<rand::rngs::SmallRng>(
&data_spec,
&mut points_writer_builder,
start_datetime,
end_datetime,
execution_start_time.timestamp_nanos(),
continue_on,
)
.await;
match result {
Ok(total_points) => eprintln!("Submitted {} total points", total_points),
Err(e) => panic!("Execution failed: \n{}", e),
}
Ok(())
}
fn datetime_nanoseconds(arg: Option<&str>, now: DateTime<Local>) -> Option<i64> {
arg.map(|s| {
let datetime = parse_date_string(s, now, Dialect::Us).expect("Could not parse time");
datetime.timestamp_nanos()
})
}
fn validate_api_arguments<'a>(
host: &'a str,
org: Option<&'a str>,
bucket: Option<&'a str>,
token: Option<&'a str>,
create_bucket: bool,
org_id: Option<&'a str>,
) -> (&'a str, &'a str, &'a str, &'a str, bool, Option<&'a str>) {
let mut errors = vec![];
if create_bucket && org_id.is_none() {
panic!("When `--create` is specified, `--org_id` is required, but it was missing.");
}
if org.is_none() {
errors.push("`--org` is missing");
}
if bucket.is_none() {
errors.push("`--bucket` is missing");
}
if token.is_none() {
errors.push("`--token` is missing");
}
if errors.is_empty() {
// These `unwrap`s are safe because otherwise errors wouldn't be empty
(
host,
org.unwrap(),
bucket.unwrap(),
token.unwrap(),
create_bucket,
org_id,
)
} else {
panic!(
"When `--host` is specified, `--org`, `--bucket`, and `--token` are required, \
but {}",
errors.join(", ")
);
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn none_datetime_is_none_nanoseconds() {
let ns = datetime_nanoseconds(None, Local::now());
assert!(ns.is_none());
}
#[test]
#[ignore] // TODO: I think chrono-english isn't handling timezones the way I'd expect
fn rfc3339() {
let ns = datetime_nanoseconds(Some("2020-01-01T01:23:45-05:00"), Local::now());
assert_eq!(ns, Some(1577859825000000000));
}
#[test]
fn relative() {
let fixed_now = Local::now();
let ns = datetime_nanoseconds(Some("1hr ago"), fixed_now);
let expected = (fixed_now - chrono::Duration::hours(1)).timestamp_nanos();
assert_eq!(ns, Some(expected));
}
}

View File

@ -0,0 +1,960 @@
//! Generating a set of points for one measurement configuration
use crate::{
field::FieldGeneratorSet,
specification,
substitution::Substitute,
tag::{Tag, TagGeneratorSet},
DataGenRng, RandomNumberGenerator,
};
use influxdb2_client::models::DataPoint;
use itertools::Itertools;
use snafu::{ResultExt, Snafu};
use std::fmt;
/// Measurement-specific Results
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Errors that may happen while creating measurements
#[derive(Snafu, Debug)]
pub enum Error {
/// Error that may happen when building a data point with the Influx DB
/// client
#[snafu(display(
"Could not build data point for measurement `{}` with Influx Client, caused by:\n{}",
name,
source
))]
InfluxDataPointError {
/// The name of the relevant measurement
name: String,
/// Underlying Influx Client error that caused this problem
source: influxdb2_client::models::data_point::DataPointError,
},
/// Error that may happen when substituting placeholder values
#[snafu(display("Could not create measurement name, caused by:\n{}", source))]
CouldNotCreateMeasurementName {
/// Underlying `substitution` module error that caused this problem
source: crate::substitution::Error,
},
/// Error that may happen when creating tag generator sets
#[snafu(display(
"Could not create tag generator sets for measurement `{}`, caused by:\n{}",
name,
source
))]
CouldNotCreateTagGeneratorSets {
/// The name of the relevant measurement
name: String,
/// Underlying `tag` module error that caused this problem
source: crate::tag::Error,
},
/// Error that may happen when creating field generator sets
#[snafu(display(
"Could not create field generator sets for measurement `{}`, caused by:\n{}",
name,
source
))]
CouldNotCreateFieldGeneratorSets {
/// The name of the relevant measurement
name: String,
/// Underlying `field` module error that caused this problem
source: crate::field::Error,
},
/// Error that may happen when generating a particular set of tags
#[snafu(display(
"Could not generate tags for measurement `{}`, caused by:\n{}",
name,
source
))]
CouldNotGenerateTags {
/// The name of the relevant measurement
name: String,
/// Underlying `tag` module error that caused this problem
source: crate::tag::Error,
},
}
/// A set of `count` measurements that have the same configuration but different
/// `measurement_id`s. The `generate` method on a `MeasurementGeneratorSet` will
/// always return `count` points.
#[derive(Debug)]
pub struct MeasurementGeneratorSet<T: DataGenRng> {
measurement_generators: Vec<MeasurementGenerator<T>>,
}
impl<T: DataGenRng> MeasurementGeneratorSet<T> {
/// Create a new set of measurement generators for a particular agent and
/// measurement specification.
pub fn new(
agent_name: &str,
agent_id: usize,
spec: &specification::MeasurementSpec,
parent_seed: impl fmt::Display,
static_tags: &[Tag],
execution_start_time: i64,
) -> Result<Self> {
let count = spec.count.unwrap_or(1);
let measurement_generators = (0..count)
.map(|measurement_id| {
MeasurementGenerator::new(
agent_name,
agent_id,
measurement_id,
spec,
&parent_seed,
static_tags,
execution_start_time,
)
})
.collect::<Result<_>>()?;
Ok(Self {
measurement_generators,
})
}
/// Create one set of points
pub fn generate(&mut self, timestamp: i64) -> Result<Vec<DataPoint>> {
let generate_results = self
.measurement_generators
.iter_mut()
.map(|mg| mg.generate(timestamp));
itertools::process_results(generate_results, |points| points.flatten().collect())
}
}
/// Generate measurements
#[derive(Debug)]
pub struct MeasurementGenerator<T: DataGenRng> {
#[allow(dead_code)]
rng: RandomNumberGenerator<T>,
name: String,
static_tags: Vec<Tag>,
tag_generator_sets: Vec<TagGeneratorSet<T>>,
total_tag_cardinality: usize,
field_generator_sets: Vec<FieldGeneratorSet>,
count: usize,
}
impl<T: DataGenRng> MeasurementGenerator<T> {
/// Create a new way to generate measurements from a specification
pub fn new(
agent_name: impl Into<String>,
agent_id: usize,
measurement_id: usize,
spec: &specification::MeasurementSpec,
parent_seed: impl fmt::Display,
static_tags: &[Tag],
execution_start_time: i64,
) -> Result<Self> {
let agent_name = agent_name.into();
let spec_name = Substitute::once(
&spec.name,
&[
("agent_id", &agent_id.to_string()),
("agent_name", &agent_name),
("measurement_id", &measurement_id.to_string()),
],
)
.context(CouldNotCreateMeasurementName)?;
let seed = format!("{}-{}", parent_seed, spec_name);
let rng = RandomNumberGenerator::<T>::new(seed);
let tag_generator_sets: Vec<TagGeneratorSet<T>> = spec
.tags
.iter()
.map(|tag_spec| TagGeneratorSet::new(agent_id, measurement_id, tag_spec, &rng.seed))
.collect::<crate::tag::Result<_>>()
.context(CouldNotCreateTagGeneratorSets { name: &spec_name })?;
let total_tag_cardinality = tag_generator_sets
.iter()
.map(|tgs| tgs.tag_cardinality())
.product();
let field_generator_sets = spec
.fields
.iter()
.map(|field_spec| {
FieldGeneratorSet::new::<T>(
&agent_name,
agent_id,
measurement_id,
field_spec,
&rng.seed,
execution_start_time,
)
})
.collect::<crate::field::Result<_>>()
.context(CouldNotCreateFieldGeneratorSets { name: &spec_name })?;
Ok(Self {
rng,
name: spec_name,
static_tags: static_tags.to_vec(),
tag_generator_sets,
total_tag_cardinality,
field_generator_sets,
count: spec.count.unwrap_or(1),
})
}
}
impl<T: DataGenRng> MeasurementGenerator<T> {
fn generate(&mut self, timestamp: i64) -> Result<Vec<DataPoint>> {
// Split out the tags that we want all combinations of. Perhaps these should be
// a different type?
let mut tags_with_cardinality: Vec<_> = itertools::process_results(
self.tag_generator_sets
.iter_mut()
.filter(|tgs| tgs.tag_cardinality() > 1)
.map(TagGeneratorSet::generate),
|tags| {
tags.multi_cartesian_product()
.map(|tag_set| tag_set.into_iter().flatten().collect())
.collect()
},
)
.context(CouldNotGenerateTags { name: &self.name })?;
// Ensure we generate something even when there are no tags.
if tags_with_cardinality.is_empty() {
tags_with_cardinality.push(Vec::new());
}
let total_tag_cardinality = self.total_tag_cardinality;
assert_eq!(tags_with_cardinality.len(), total_tag_cardinality);
// Split out the tags that we don't want to include when we're generating all
// possible combinations above. Perhaps these should be a different
// type? Leaving the type annotation here because it's terrible and
// confusing otherwise.
//
// This type is made up of:
//
// - `Vec<Tag>` comes from one call to `TagGenerator::generate`. Tag
// configurations with a `count` value > 1 generate multiple tags with
// different keys but the same value for each generation. The length of this
// vector is the tag configuration's `count`.
// - `Vec<Vec<Tag>>` comes from one call to `TagGenerator::generate_to_zip` and
// is a list of either cloned or resampled tags from this TagGenerator. The
// length of this vector is `total_tag_cardinality`.
// - `Vec<Vec<Vec<Tag>>>` comes from collecting all these lists from each
// `TagGeneratorSet` that has a cardinality of 1 (the default). Each
// `TagGeneratorSet` corresponds to one tag configuration.
let tags_without_cardinality_columns = self
.tag_generator_sets
.iter_mut()
.filter(|tgs| tgs.tag_cardinality() == 1)
.map(|tgs| tgs.generate_to_zip(total_tag_cardinality).unwrap());
// This is doing a zip over an arbitrary number of iterators... itertools has
// something that produces tuples but I want it to produce Vectors
let mut tags_without_cardinality_column_iters: Vec<_> = tags_without_cardinality_columns
.map(|column| column.into_iter())
.collect();
// For each group of tags that will become one row,
for v in &mut tags_with_cardinality {
// Get the rest of the tags that belong with this row that were either cloned or
// resampled according to their configuration
let tag_row: Vec<Vec<Tag>> = tags_without_cardinality_column_iters
.iter_mut()
.map(|column_iter| {
column_iter.next().expect(
"Should have generated `total_tag_cardinality` items, \
which should match the length of `tags_with_cardinality`",
)
})
.collect();
// If count can't be combined with replacements, this `for` loop wouldn't be
// needed
for mut tags in tag_row {
v.append(&mut tags);
}
}
tags_with_cardinality
.iter()
.map(|tags| self.one(&tags[..], timestamp))
.collect()
}
fn one(&mut self, tags: &[Tag], timestamp: i64) -> Result<DataPoint> {
let mut point = DataPoint::builder(&self.name);
point = self
.static_tags
.iter()
.fold(point, |point, tag| point.tag(&tag.key, &tag.value));
point = tags
.iter()
.fold(point, |point, tag| point.tag(&tag.key, &tag.value));
for fgs in &mut self.field_generator_sets {
for field in fgs.generate(timestamp) {
point = point.field(&field.key, field.value);
}
}
point = point.timestamp(timestamp);
point
.build()
.context(InfluxDataPointError { name: &self.name })
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{specification::*, DynamicRng, ZeroRng, TEST_SEED};
use influxdb2_client::models::WriteDataPoint;
use std::str;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
impl<T: DataGenRng> MeasurementGenerator<T> {
fn generate_string(&mut self, timestamp: i64) -> Result<String> {
self.generate_strings(timestamp)
.map(|mut strings| strings.swap_remove(0))
}
fn generate_strings(&mut self, timestamp: i64) -> Result<Vec<String>> {
let points = self.generate(timestamp)?;
points
.into_iter()
.map(|point| {
let mut v = Vec::new();
point.write_data_point_to(&mut v)?;
Ok(String::from_utf8(v)?)
})
.collect()
}
}
#[test]
fn generate_measurement() -> Result {
let fake_now = 1234;
let measurement_spec = MeasurementSpec {
name: "cpu".into(),
count: None,
tags: vec![],
fields: vec![FieldSpec {
name: "response_time".into(),
field_value_spec: FieldValueSpec::I64 {
range: 0..60,
increment: false,
reset_after: None,
},
count: None,
}],
};
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
"agent_name",
0,
0,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let line_protocol = measurement_generator.generate_string(fake_now)?;
assert_eq!(
line_protocol,
format!("cpu response_time=0i {}\n", fake_now)
);
Ok(())
}
#[test]
fn generate_measurement_stable_rngs() -> Result {
let fake_now = 5678;
// This is the same as the previous test but with an additional field.
let measurement_spec = MeasurementSpec {
name: "cpu".into(),
count: Some(2),
tags: vec![],
fields: vec![
FieldSpec {
name: "load".into(),
field_value_spec: FieldValueSpec::F64 { range: 0.0..100.0 },
count: None,
},
FieldSpec {
name: "response_time".into(),
field_value_spec: FieldValueSpec::I64 {
range: 0..60_000,
increment: false,
reset_after: None,
},
count: None,
},
],
};
let mut measurement_generator = MeasurementGenerator::<DynamicRng>::new(
"agent_name",
0,
0,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let line_protocol = vec![measurement_generator.generate_string(fake_now)?];
let response_times = extract_field_values("response_time", &line_protocol);
let next_line_protocol = vec![measurement_generator.generate_string(fake_now + 1)?];
let next_response_times = extract_field_values("response_time", &next_line_protocol);
// Each line should have a different response time unless we get really, really unlucky
assert_ne!(response_times, next_response_times);
Ok(())
}
#[test]
fn generate_measurement_always_including_some_tags() -> Result {
let fake_now = 678;
let measurement_spec = MeasurementSpec {
name: "cpu".into(),
count: None,
tags: vec![],
fields: vec![FieldSpec {
name: "response_time".into(),
field_value_spec: FieldValueSpec::I64 {
range: 0..60,
increment: false,
reset_after: None,
},
count: None,
}],
};
let always_tags = vec![Tag::new("my_tag", "my_val")];
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
"agent_name",
0,
0,
&measurement_spec,
TEST_SEED,
&always_tags,
fake_now,
)
.unwrap();
let line_protocol = measurement_generator.generate_string(fake_now)?;
assert_eq!(
line_protocol,
format!("cpu,my_tag=my_val response_time=0i {}\n", fake_now),
);
Ok(())
}
#[test]
fn generate_measurement_with_basic_tags() -> Result {
let fake_now = 678;
let measurement_spec = MeasurementSpec {
name: "measurement".into(),
tags: vec![
TagSpec {
name: "tag_name".into(),
value: "tag_value".into(),
..Default::default()
},
TagSpec {
name: "some_name".into(),
value: "some_value".into(),
..Default::default()
},
],
fields: vec![FieldSpec {
name: "field_name".into(),
..FieldSpec::default()
}],
..Default::default()
};
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
"agent_name",
0,
0,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let line_protocol = measurement_generator.generate_string(fake_now)?;
assert_eq!(
line_protocol,
format!(
"measurement,some_name=some_value,tag_name=tag_value field_name=f {}\n",
fake_now
)
);
Ok(())
}
#[test]
fn generate_measurement_with_tags_with_count() -> Result {
let fake_now = 678;
let measurement_spec = MeasurementSpec {
name: "measurement".into(),
tags: vec![TagSpec {
name: "{{agent_id}}--{{measurement_id}}--tag_name--{{tag_id}}".into(),
value: "tag_value".into(),
count: Some(2),
..Default::default()
}],
fields: vec![FieldSpec {
name: "field_name".into(),
..FieldSpec::default()
}],
..Default::default()
};
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
"agent_name",
42,
99,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let line_protocol = measurement_generator.generate_string(fake_now)?;
assert_eq!(
line_protocol,
format!("measurement,42--99--tag_name--0=tag_value,42--99--tag_name--1=tag_value field_name=f {}\n", fake_now),
);
Ok(())
}
#[test]
fn generate_measurement_with_tags_with_cardinality() -> Result {
let fake_now = 678;
let measurement_spec = MeasurementSpec {
name: "measurement".into(),
tags: vec![TagSpec {
name: "tag_name".into(),
value: "tag_value--{{cardinality}}".into(),
cardinality: Some(2),
..Default::default()
}],
fields: vec![FieldSpec {
name: "field_name".into(),
..FieldSpec::default()
}],
..Default::default()
};
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
"agent_name",
0,
0,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let line_protocol = measurement_generator.generate_strings(fake_now)?;
assert_eq!(
line_protocol[0],
format!(
"measurement,tag_name=tag_value--0 field_name=f {}\n",
fake_now
)
);
assert_eq!(
line_protocol[1],
format!(
"measurement,tag_name=tag_value--1 field_name=f {}\n",
fake_now
)
);
Ok(())
}
#[test]
fn generate_measurement_with_tags_with_multiple_cardinality() -> Result {
let fake_now = 678;
let measurement_spec = MeasurementSpec {
name: "measurement".into(),
tags: vec![
TagSpec {
name: "alpha".into(),
value: "alpha--{{cardinality}}".into(),
cardinality: Some(2),
..Default::default()
},
TagSpec {
name: "beta".into(),
value: "beta--{{cardinality}}".into(),
cardinality: Some(2),
..Default::default()
},
],
fields: vec![FieldSpec {
name: "field_name".into(),
..FieldSpec::default()
}],
..Default::default()
};
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
"agent_name",
0,
0,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let line_protocol = measurement_generator.generate_strings(fake_now)?;
assert_eq!(
line_protocol[0],
format!(
"measurement,alpha=alpha--0,beta=beta--0 field_name=f {}\n",
fake_now
)
);
assert_eq!(
line_protocol[1],
format!(
"measurement,alpha=alpha--0,beta=beta--1 field_name=f {}\n",
fake_now
)
);
assert_eq!(
line_protocol[2],
format!(
"measurement,alpha=alpha--1,beta=beta--0 field_name=f {}\n",
fake_now
)
);
assert_eq!(
line_protocol[3],
format!(
"measurement,alpha=alpha--1,beta=beta--1 field_name=f {}\n",
fake_now
)
);
Ok(())
}
#[test]
fn generate_measurement_with_tags_with_increment_every() -> Result {
let fake_now = 678;
let measurement_spec = MeasurementSpec {
name: "measurement".into(),
tags: vec![TagSpec {
name: "tag_name".into(),
value: "tag_value--{{counter}}".into(),
increment_every: Some(2),
..Default::default()
}],
fields: vec![FieldSpec {
name: "field_name".into(),
..FieldSpec::default()
}],
..Default::default()
};
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
"agent_name",
0,
0,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let line_protocol_1 = measurement_generator.generate_string(fake_now)?;
let line_protocol_2 = measurement_generator.generate_string(fake_now)?;
let line_protocol_3 = measurement_generator.generate_string(fake_now)?;
assert_eq!(
line_protocol_1,
format!(
"measurement,tag_name=tag_value--0 field_name=f {}\n",
fake_now,
),
);
assert_eq!(
line_protocol_2,
format!(
"measurement,tag_name=tag_value--0 field_name=f {}\n",
fake_now,
),
);
assert_eq!(
line_protocol_3,
format!(
"measurement,tag_name=tag_value--1 field_name=f {}\n",
fake_now,
),
);
Ok(())
}
#[test]
fn generate_measurement_with_replacement() -> Result {
let fake_now = 91011;
let measurement_spec = MeasurementSpec {
name: "measurement-{{agent_id}}-{{measurement_id}}".into(),
count: Some(2),
tags: vec![],
fields: vec![FieldSpec {
name: "field-{{agent_id}}-{{measurement_id}}-{{field_id}}".into(),
field_value_spec: FieldValueSpec::I64 {
range: 0..60,
increment: false,
reset_after: None,
},
count: Some(2),
}],
};
let mut measurement_generator_set = MeasurementGeneratorSet::<ZeroRng>::new(
"agent_name",
42,
&measurement_spec,
TEST_SEED,
&[],
fake_now,
)
.unwrap();
let points = measurement_generator_set.generate(fake_now).unwrap();
let mut v = Vec::new();
for point in points {
point.write_data_point_to(&mut v)?;
}
let line_protocol = str::from_utf8(&v)?;
assert_eq!(
line_protocol,
format!(
"measurement-42-0 field-42-0-0=0i,field-42-0-1=0i {}
measurement-42-1 field-42-1-0=0i,field-42-1-1=0i {}
",
fake_now, fake_now
)
);
Ok(())
}
#[test]
fn guid_and_guid_with_cardinality() -> Result<()> {
let fake_now = 678;
let spec: specification::MeasurementSpec = toml::from_str(
r#"
name = "traces"
[[tags]]
name = "trace_id"
value = "value-{{guid}}"
[[tags]]
name = "span_id"
value = "value-{{guid}}"
cardinality = 2
[[fields]]
name = "timing"
i64_range = [5, 100]"#,
)
.unwrap();
let mut measurement_generator = MeasurementGenerator::<DynamicRng>::new(
"agent_name",
0,
0,
&spec,
TEST_SEED,
&[],
fake_now,
)?;
let line_protocol = measurement_generator.generate_strings(fake_now)?;
let mut trace_ids = extract_tag_values("trace_id", &line_protocol);
trace_ids.sort_unstable();
trace_ids.dedup();
// Both lines should have the same trace ID
assert_eq!(trace_ids.len(), 1);
let mut span_ids = extract_tag_values("span_id", &line_protocol);
span_ids.sort_unstable();
span_ids.dedup();
// Each line should have a different span ID
assert_eq!(span_ids.len(), 2);
let next_line_protocol = measurement_generator.generate_strings(fake_now)?;
let mut next_trace_ids = extract_tag_values("trace_id", &next_line_protocol);
next_trace_ids.sort_unstable();
next_trace_ids.dedup();
// Both lines should have the same trace ID
assert_eq!(next_trace_ids.len(), 1);
// On each generation, there should be a new trace id
assert_ne!(trace_ids, next_trace_ids);
let mut next_span_ids = extract_tag_values("span_id", &next_line_protocol);
next_span_ids.sort_unstable();
next_span_ids.dedup();
// Each line should have a different span ID
assert_eq!(next_span_ids.len(), 2);
// On each generation, there should be new span IDs too
assert_ne!(span_ids, next_span_ids);
Ok(())
}
#[test]
fn tag_replacements_with_resampling_true() -> Result<()> {
resampling_test("resample_every_line = true", true)
}
#[test]
fn tag_replacements_with_resampling_false() -> Result<()> {
resampling_test("resample_every_line = false", false)
}
#[test]
fn tag_replacements_with_default_resampling_false() -> Result<()> {
resampling_test("", false)
}
fn resampling_test(resampling_toml: &str, expect_different: bool) -> Result<()> {
let fake_now = 678;
let spec: specification::MeasurementSpec = toml::from_str(&format!(
r#"
name = "resampling"
[[tags]]
name = "tag-1"
value = "value-{{{{cardinality}}}}"
cardinality = 10
[[tags]]
name = "host"
value = "{{{{host}}}}"
replacements = [
{{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]}},
]
{}
[[fields]]
name = "timing"
i64_range = [5, 100]"#,
resampling_toml
))
.unwrap();
let mut measurement_generator = MeasurementGenerator::<DynamicRng>::new(
"agent_name",
0,
0,
&spec,
TEST_SEED,
&[],
fake_now,
)?;
let lines = measurement_generator.generate_strings(fake_now)?;
let mut host_values = extract_tag_values("host", &lines);
host_values.sort_unstable();
host_values.dedup();
if expect_different {
assert!(host_values.len() > 1);
} else {
assert_eq!(host_values.len(), 1);
}
Ok(())
}
// Hacktacular extracting of values from line protocol without pulling in another crate
fn extract_tag_values<'a>(tag_name: &str, lines: &'a [String]) -> Vec<&'a str> {
lines
.iter()
.map(|line| {
let before_space = line.splitn(2, ' ').next().unwrap();
let prefix = format!(",{}=", tag_name);
let after = before_space.rsplitn(2, &prefix).next().unwrap();
after.splitn(2, ',').next().unwrap()
})
.collect()
}
fn extract_field_values<'a>(field_name: &str, lines: &'a [String]) -> Vec<&'a str> {
lines
.iter()
.map(|line| {
let mut split = line.splitn(2, ' ');
split.next();
let after_space = split.next().unwrap();
let prefix = format!(",{}=", field_name);
let after = after_space.rsplitn(2, &prefix).next().unwrap();
after.splitn(2, ',').next().unwrap()
})
.collect()
}
}

View File

@ -0,0 +1,616 @@
//! Reading and interpreting data generation specifications.
use serde::Deserialize;
use snafu::{ResultExt, Snafu};
use std::{fs, ops::Range, str::FromStr};
/// Errors that may happen while reading a TOML specification.
#[derive(Snafu, Debug)]
pub enum Error {
/// File-related error that may happen while reading a specification
#[snafu(display(r#"Error reading data spec from TOML file: {}"#, source))]
ReadFile {
/// Underlying I/O error that caused this problem
source: std::io::Error,
},
/// TOML parsing error that may happen while interpreting a specification
#[snafu(display(r#"Error parsing data spec from TOML: {}"#, source))]
Parse {
/// Underlying TOML error that caused this problem
source: toml::de::Error,
},
}
type Result<T, E = Error> = std::result::Result<T, E>;
/// The full specification for the generation of a data set.
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct DataSpec {
/// Every point generated from this configuration will contain a tag
/// `data_spec=[this value]` to identify what generated that data. This
/// name can also be used in string replacements by using the
/// placeholder `{{data_spec}}`.
pub name: String,
/// A string to be used as the seed to the random number generators.
///
/// When specified, this is used as a base seed propagated through all
/// measurements, tags, and fields, which will each have their own
/// random number generator seeded by this seed plus their name. This
/// has the effect of keeping each value sequence generated per measurement,
/// tag, or field stable even if the configurations in other parts of the
/// schema are changed. That is, if you have a field named `temp` and on
/// the first run with base seed `foo` generates the values `[10, 50,
/// 72, 3]`, and then you add another field named `weight` to the schema
/// and run with base seed `foo` again, the values generated for `temp`
/// should again be `[10, 50, 72, 3]`. This enables incremental
/// development of a schema without churn, if that is undesired.
///
/// When this is not specified, the base seed will be randomly generated. It
/// will be printed to stdout so that the value used can be specified in
/// future configurations if reproducing a particular set of sequences
/// is desired.
pub base_seed: Option<String>,
/// The specification for the data-generating agents in this data set.
pub agents: Vec<AgentSpec>,
}
impl DataSpec {
/// Given a filename, read the file and parse the specification.
pub fn from_file(file_name: &str) -> Result<Self> {
let spec_toml = fs::read_to_string(file_name).context(ReadFile)?;
Self::from_str(&spec_toml)
}
}
impl FromStr for DataSpec {
type Err = Error;
fn from_str(spec_toml: &str) -> std::result::Result<Self, <Self as FromStr>::Err> {
let spec: Self = toml::from_str(spec_toml).context(Parse)?;
Ok(spec)
}
}
/// The specification of the behavior of an agent, the entity responsible for
/// generating a number of data points according to its configuration.
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(Default))]
#[serde(deny_unknown_fields)]
pub struct AgentSpec {
/// Used as the value for the `name` tag if `name_tag_key` is `Some`; has no
/// effect if `name_tag_key` is not specified.
///
/// Can be a plain string or a string with placeholders for:
///
/// - `{{agent_id}}` - the agent ID
pub name: String,
/// Specifies the number of agents that should be created with this spec.
/// Default value is 1.
pub count: Option<usize>,
/// How often this agent should generate samples, in number of seconds. If
/// not specified, this agent will only generate one sample.
pub sampling_interval: Option<usize>,
/// If specified, every measurement generated by this agent will include a
/// tag with this `String` as its key, and with the `AgentSpec`'s `name`
/// as the value (with any substitutions in the `name` performed)
pub name_tag_key: Option<String>,
/// If specified, the values of the tags will be cycled through per `Agent`
/// instance such that all measurements generated by that agent will
/// contain tags with the specified name and that agent's `name` field
/// (with replacements made) as the value.
#[serde(default)]
pub tags: Vec<AgentTag>,
/// The specifications for the measurements for the agent to generate.
pub measurements: Vec<MeasurementSpec>,
}
/// Tags that are associated to all measurements that a particular agent
/// generates. The values are rotated through so that each agent gets one of the
/// specified values for this key.
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct AgentTag {
/// The tag key to use when adding this tag to all measurements for an agent
pub key: String,
/// The values to cycle through for each agent for this tag key
pub values: Vec<String>,
}
/// The specification of how to generate data points for a particular
/// measurement.
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(Default))]
#[serde(deny_unknown_fields)]
pub struct MeasurementSpec {
/// Name of the measurement. Can be a plain string or a string with
/// placeholders for:
///
/// - `{{agent_id}}` - the agent ID
/// - `{{measurement_id}}` - the measurement's ID, which must be used if
/// `count` > 1 so that unique measurement names are created
pub name: String,
/// The number of measurements with this configuration that should be
/// created. Default value is 1. If specified, use `{{measurement_id}}`
/// in this measurement's `name` to create unique measurements.
pub count: Option<usize>,
/// Specification of the tags for this measurement
#[serde(default)]
pub tags: Vec<TagSpec>,
/// Specification of the fields for this measurement. At least one field is
/// required.
pub fields: Vec<FieldSpec>,
}
/// The specification of how to generate tag keys and values for a particular
/// measurement.
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(Default))]
#[serde(deny_unknown_fields)]
pub struct TagSpec {
/// Key/name for this tag. Can be a plain string or a string with
/// placeholders for:
///
/// - `{{agent_id}}` - the agent ID
/// - `{{measurement_id}}` - the measurement ID
/// - `{{tag_id}}` - the tag ID, which must be used if `count` > 1 so that
/// unique tag names are created
pub name: String,
/// Value for this tag. Can be a plain string or a string with placeholders
/// for:
///
/// - `{{agent_id}}` - the agent ID
/// - `{{measurement_id}}` - the measurement ID
/// - `{{cardinality}}` - the cardinality counter value. Must use this or
/// `{{guid}}` if `cardinality` > 1 so that unique tag values are created
/// - `{{counter}}` - the increment counter value. Only useful if
/// `increment_every` is set.
/// - `{{guid}}` - a randomly generated unique string. If `cardinality` > 1,
/// each tag will have a different GUID.
pub value: String,
/// The number of tags with this configuration that should be created.
/// Default value is 1. If specified, use `{{tag_id}}` in this tag's
/// `name` to create unique tags.
pub count: Option<usize>,
/// A number that controls how many values are generated, which impacts how
/// many rows are created for each agent generation. Default value is 1.
/// If specified, use `{{cardinality}}` or `{{guid}}` in this tag's
/// `value` to create unique values.
pub cardinality: Option<u32>,
/// How often to increment the `{{counter}}` value. For example, if
/// `increment_every` is set to 10, `{{counter}}` will increase by 1
/// after every 10 agent generations. This simulates temporal tag values
/// like process IDs or container IDs in tags. If not specified, the value
/// of `{{counter}}` will always be 0.
pub increment_every: Option<usize>,
/// A list of replacement placeholders and the values to replace them with.
/// The values can optionally have weights associated with them to
/// change the probabilities that its value will be used.
#[serde(default)]
pub replacements: Vec<Replacement>,
/// When there are replacements specified and other tags in this measurement
/// with cardinality greater than 1, this option controls whether this
/// tag will get a new replacement value on every line in a generation
/// (`true`) or whether it will be sampled once and have the same value
/// on every line in a generation (`false`). If there are no replacements on
/// this tag or any other tags with a cardinality greater than one, this
/// has no effect.
#[serde(default)]
pub resample_every_line: bool,
}
/// The specification of how to generate field keys and values for a particular
/// measurement.
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(Default))]
#[serde(from = "FieldSpecIntermediate")]
pub struct FieldSpec {
/// Key/name for this field. Can be a plain string or a string with
/// placeholders for:
///
/// - `{{agent_id}}` - the agent ID
/// - `{{measurement_id}}` - the measurement ID
/// - `{{field_id}}` - the field ID, which must be used if `count` > 1 so
/// that unique field names are created
pub name: String,
/// Specification for the value for this field.
pub field_value_spec: FieldValueSpec,
/// How many fields with this configuration should be created
pub count: Option<usize>,
}
impl From<FieldSpecIntermediate> for FieldSpec {
fn from(value: FieldSpecIntermediate) -> Self {
let field_value_spec = if let Some(b) = value.bool {
FieldValueSpec::Bool(b)
} else if let Some((start, end)) = value.i64_range {
FieldValueSpec::I64 {
range: (start..end),
increment: value.increment.unwrap_or(false),
reset_after: value.reset_after,
}
} else if let Some((start, end)) = value.f64_range {
FieldValueSpec::F64 {
range: (start..end),
}
} else if let Some(pattern) = value.pattern {
FieldValueSpec::String {
pattern,
replacements: value.replacements,
}
} else if let Some(kind) = value.uptime {
FieldValueSpec::Uptime { kind }
} else {
panic!(
"Can't tell what type of field value you're trying to specify with this \
configuration: `{:?}",
value
);
};
Self {
name: value.name,
field_value_spec,
count: value.count,
}
}
}
/// The specification of a field value of a particular type. Instances should be
/// created by converting a `FieldSpecIntermediate`, which more closely matches
/// the TOML structure.
#[derive(Debug, PartialEq)]
pub enum FieldValueSpec {
/// Configuration of a boolean field.
Bool(bool),
/// Configuration of an integer field.
I64 {
/// The `Range` in which random integer values will be generated. If the
/// range only contains one value, all instances of this field
/// will have the same value.
range: Range<i64>,
/// When set to true, after an initial random value in the range is
/// generated, a random increment in the range will be generated
/// and added to the initial value. That means the
/// value for this field will always be increasing. When the value
/// reaches the max value of i64, the value will wrap around to
/// the min value of i64 and increment again.
increment: bool,
/// If `increment` is true, after this many samples, reset the value to
/// start the increasing value over. If this is `None`, the
/// value won't restart until reaching the max value of i64. If
/// `increment` is false, this has no effect.
reset_after: Option<usize>,
},
/// Configuration of a floating point field.
F64 {
/// The `Range` in which random floating point values will be generated.
/// If start == end, all instances of this field will have the
/// same value.
range: Range<f64>,
},
/// Configuration of a string field.
String {
/// Pattern containing placeholders that specifies how to generate the
/// string values.
///
/// Valid placeholders include:
///
/// - `{{agent_name}}` - the agent spec's name, with any replacements
/// done
/// - `{{time}}` - the current time in nanoseconds since the epoch.
/// TODO: support specifying a strftime
/// - any other placeholders as specified in `replacements`. If a
/// placeholder has no value specified in `replacements`, it will end
/// up as-is in the field value.
pattern: String,
/// A list of replacement placeholders and the values to replace them
/// with. The values can optionally have weights associated with
/// them to change the probabilities that its value
/// will be used.
replacements: Vec<Replacement>,
},
/// Configuration of a field with the value of the number of seconds the
/// data generation tool has been running.
Uptime {
/// Format of the uptime value in this field
kind: UptimeKind,
},
}
/// The kind of field value to create using the data generation tool's uptime
#[derive(Debug, PartialEq, Copy, Clone, Deserialize)]
pub enum UptimeKind {
/// Number of seconds since the tool started running as an i64 field
#[serde(rename = "i64")]
I64,
/// Number of seconds since the tool started running, formatted as a string
/// field containing the value in the format "x day(s), HH:MM"
#[serde(rename = "telegraf")]
Telegraf,
}
#[cfg(test)]
impl Default for FieldValueSpec {
fn default() -> Self {
Self::Bool(true)
}
}
/// An intermediate representation of the field specification that more directly
/// corresponds to the way field configurations are expressed in TOML. This
/// structure is transformed into the `FieldValueSpec` enum that ensures the
/// options for the different field value types are mutually exclusive.
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
struct FieldSpecIntermediate {
/// Key/name for this field. Can be a plain string or a string with
/// placeholders for:
///
/// - `{{agent_id}}` - the agent ID
/// - `{{measurement_id}}` - the measurement ID
/// - `{{field_id}}` - the field ID, which must be used if `count` > 1 so
/// that unique field names are created
name: String,
/// The number of fields with this configuration that should be created.
/// Default value is 1. If specified, use `{{field_id}}` in this field's
/// `name` to create unique fields.
count: Option<usize>,
/// Specify `bool` to make a field that has the Boolean type. `true` means
/// to generate the boolean randomly with equal probability. `false`
/// means...? Specifying any other optional fields along with this one
/// is invalid.
bool: Option<bool>,
/// Specify `i64_range` to make an integer field. The values will be
/// randomly generated within the specified range with equal
/// probability. If the range only contains one element, all occurrences
/// of this field will have the same value. Can be combined with
/// `increment`; specifying any other optional fields is invalid.
i64_range: Option<(i64, i64)>,
/// Specify `f64_range` to make a floating point field. The values will be
/// randomly generated within the specified range. If start == end, all
/// occurrences of this field will have that value.
/// Can this be combined with `increment`?
f64_range: Option<(f64, f64)>,
/// When set to true with an `i64_range` (is this valid with any other
/// type?), after an initial random value is generated, a random
/// increment will be generated and added to the initial value. That
/// means the value for this field will always be increasing. When the value
/// reaches the end of the range...? The end of the range will be repeated
/// forever? The series will restart at the start of the range?
/// Something else? Setting this to `Some(false)` has the same effect as
/// `None`.
increment: Option<bool>,
/// If `increment` is true, after this many samples, reset the value to
/// start the increasing value over. If this is `None`, the value won't
/// restart until reaching the max value of i64. If `increment` is
/// false, this has no effect.
reset_after: Option<usize>,
/// Set `pattern` to make a field with the string type. If this doesn't
/// include any placeholders, all occurrences of this field will have
/// this value.
///
/// Valid placeholders include:
///
/// - `{{agent_name}}` - the agent spec's name, with any replacements done
/// - `{{time}}` - the current time in nanoseconds since the epoch. TODO:
/// support specifying a strftime
/// - any other placeholders as specified in `replacements`. If a
/// placeholder has no value specified in `replacements`, it will end up
/// as-is in the field value.
pattern: Option<String>,
/// A list of replacement placeholders and the values to replace them with.
/// If a placeholder specified here is not used in `pattern`, it will
/// have no effect. The values may optionally have a probability weight
/// specified with them; if not specified, the value will have weight 1.
/// If no weights are specified, the values will be generated with equal
/// probability.
#[serde(default)]
replacements: Vec<Replacement>,
/// The kind of uptime that should be used for this field. If specified, no
/// other options are valid. If not specified, this is not an uptime
/// field.
uptime: Option<UptimeKind>,
}
/// The specification of what values to substitute in for placeholders specified
/// in `String` field values.
#[derive(Deserialize, Debug, PartialEq, Clone)]
#[serde(deny_unknown_fields)]
pub struct Replacement {
/// A placeholder key that can be used in field `pattern`s.
pub replace: String,
/// The possible values to use instead of the placeholder key in `pattern`.
/// Values may optionally have a weight specified. If no weights are
/// specified, the values will be randomly generated with equal
/// probability. The weights are passed to [`rand`'s `choose_weighted`
/// method][choose_weighted] and are a relative likelihood such that the
/// probability of each item being selected is its weight divided by the sum
/// of all weights in this group.
///
/// [choose_weighted]: https://docs.rs/rand/0.7.3/rand/seq/trait.SliceRandom.html#tymethod.choose_weighted
pub with: Vec<ReplacementValue>,
}
#[derive(Debug, Deserialize, PartialEq, Clone)]
#[serde(untagged, deny_unknown_fields)]
/// A possible value to use instead of a placeholder key, optionally with an
/// associated weight. If no weight is specified, the weight used will be 1.
pub enum ReplacementValue {
/// Just a value without a weight
String(String),
/// A value with a specified relative likelihood weight that gets passed on
/// to [`rand`'s `choose_weighted` method][choose_weighted]. The
/// probability of each item being selected is its weight divided by the
/// sum of all weights in the `Replacement` group.
///
/// [choose_weighted]: https://docs.rs/rand/0.7.3/rand/seq/trait.SliceRandom.html#tymethod.choose_weighted
Weighted(String, u32),
}
impl ReplacementValue {
/// The associated replacement value
pub fn value(&self) -> &str {
use ReplacementValue::*;
match self {
String(s) => s,
Weighted(s, ..) => s,
}
}
/// The associated weight value specified; defaults to 1.
pub fn weight(&self) -> u32 {
use ReplacementValue::*;
match self {
String(..) => 1,
Weighted(.., w) => *w,
}
}
}
#[cfg(test)]
mod test {
use super::*;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
static TELEGRAF_TOML: &str = include_str!("../schemas/telegraf.toml");
#[test]
fn parse_spec() -> Result {
let spec = DataSpec::from_str(TELEGRAF_TOML)?;
assert_eq!(spec.name, "demo_schema");
assert_eq!(spec.agents.len(), 2);
let agent0 = &spec.agents[0];
assert_eq!(agent0.name, "demo");
let agent0_measurements = &agent0.measurements;
assert_eq!(agent0_measurements.len(), 1);
let a0m0 = &agent0_measurements[0];
assert_eq!(a0m0.name, "some_measurement");
let a0m0_fields = &a0m0.fields;
assert_eq!(a0m0_fields.len(), 5);
let a0m0f0 = &a0m0_fields[0];
assert_eq!(a0m0f0.name, "field1");
assert_eq!(a0m0f0.field_value_spec, FieldValueSpec::Bool(true));
let a0m0f1 = &a0m0_fields[1];
assert_eq!(a0m0f1.name, "field2");
assert_eq!(
a0m0f1.field_value_spec,
FieldValueSpec::I64 {
range: 3..200,
increment: false,
reset_after: None,
}
);
let a0m0f2 = &a0m0_fields[2];
assert_eq!(a0m0f2.name, "field3");
assert_eq!(
a0m0f2.field_value_spec,
FieldValueSpec::I64 {
range: 1000..5000,
increment: true,
reset_after: None,
}
);
let a0m0f3 = &a0m0_fields[3];
assert_eq!(a0m0f3.name, "field4");
assert_eq!(
a0m0f3.field_value_spec,
FieldValueSpec::F64 { range: 0.0..100.0 }
);
let a0m0f4 = &a0m0_fields[4];
assert_eq!(a0m0f4.name, "field5");
assert_eq!(
a0m0f4.field_value_spec,
FieldValueSpec::String {
pattern:
"{{agent_name}} foo {{level}} {{format-time \"%Y-%m-%d %H:%M\"}} {{random 200}}"
.into(),
replacements: vec![
Replacement {
replace: "color".into(),
with: vec![
ReplacementValue::String("red".into()),
ReplacementValue::String("blue".into()),
ReplacementValue::String("green".into())
],
},
Replacement {
replace: "level".into(),
with: vec![
ReplacementValue::Weighted("info".into(), 800),
ReplacementValue::Weighted("warn".into(), 195),
ReplacementValue::Weighted("error".into(), 5)
],
}
],
}
);
Ok(())
}
#[test]
fn parse_fully_supported_spec() -> Result<()> {
// The fully supported spec is mostly for manual testing, but we should make
// sure while developing that it's valid as well so that when we go to
// do manual testing it isn't broken
// Also read it from the file to test `DataSpec::from_file` rather than
// include_str
let data_spec = DataSpec::from_file("schemas/fully-supported.toml")?;
assert_eq!(data_spec.name, "demo_schema");
Ok(())
}
#[test]
fn not_specifying_vectors_gets_default_empty_vector() {
let toml = r#"
name = "demo_schema"
base_seed = "this is a demo"
[[agents]]
name = "basic"
[[agents.measurements]]
name = "cpu"
[[agents.measurements.fields]]
name = "host"
pattern = "server"
"#;
let spec = DataSpec::from_str(toml).unwrap();
let agent0 = &spec.agents[0];
assert!(agent0.tags.is_empty());
let agent0_measurements = &agent0.measurements;
let a0m0 = &agent0_measurements[0];
assert!(a0m0.tags.is_empty());
let a0m0_fields = &a0m0.fields;
let a0m0f0 = &a0m0_fields[0];
let field_spec = &a0m0f0.field_value_spec;
assert!(
matches!(field_spec, FieldValueSpec::String { replacements, .. } if replacements.is_empty()),
"expected a String field with empty replacements; was {:?}",
field_spec
);
}
}

View File

@ -0,0 +1,268 @@
//! Substituting dynamic values into a template as specified in various places
//! in the schema.
use crate::{specification, DataGenRng, RandomNumberGenerator};
use chrono::prelude::*;
use handlebars::{
Context, Handlebars, Helper, HelperDef, HelperResult, Output, RenderContext, RenderError,
};
use rand::{distributions::Alphanumeric, seq::SliceRandom, Rng};
use serde::Serialize;
use snafu::{ResultExt, Snafu};
use std::{collections::BTreeMap, convert::TryInto, sync::Mutex};
/// Substitution-specific Results
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Errors that may happen while substituting values into templates.
#[derive(Snafu, Debug)]
pub enum Error {
/// Error that may happen when substituting placeholder values
#[snafu(display(
"Could not perform text substitution in `{}`, caused by:\n{}",
template,
source
))]
CantCompileTemplate {
/// Underlying Handlebars error that caused this problem
source: handlebars::TemplateError,
/// Template that caused this problem
template: String,
},
/// Error that may happen when substituting placeholder values
#[snafu(display(
"Could not perform text substitution in `{}`, caused by:\n{}",
template,
source
))]
CantPerformSubstitution {
/// Underlying Handlebars error that caused this problem
source: handlebars::RenderError,
/// Template that caused this problem
template: String,
},
}
#[derive(Debug)]
struct RandomHelper<T: DataGenRng>(Mutex<RandomNumberGenerator<T>>);
impl<T: DataGenRng> HelperDef for RandomHelper<T> {
fn call<'reg: 'rc, 'rc>(
&self,
h: &Helper<'_, '_>,
_: &Handlebars<'_>,
_: &Context,
_: &mut RenderContext<'_, '_>,
out: &mut dyn Output,
) -> HelperResult {
let param = h
.param(0)
.ok_or_else(|| RenderError::new("`random` requires a parameter"))?
.value()
.as_u64()
.ok_or_else(|| RenderError::new("`random`'s parameter must be an unsigned integer"))?
.try_into()
.map_err(|_| RenderError::new("`random`'s parameter must fit in a usize"))?;
let rng = &mut *self.0.lock().expect("mutex poisoned");
let random: String = std::iter::repeat(())
.map(|()| rng.sample(Alphanumeric))
.map(char::from)
.take(param)
.collect();
out.write(&random)?;
Ok(())
}
}
#[derive(Debug)]
struct FormatNowHelper;
impl HelperDef for FormatNowHelper {
fn call<'reg: 'rc, 'rc>(
&self,
h: &Helper<'_, '_>,
_: &Handlebars<'_>,
c: &Context,
_: &mut RenderContext<'_, '_>,
out: &mut dyn Output,
) -> HelperResult {
let format = h
.param(0)
.ok_or_else(|| RenderError::new("`format-time` requires a parameter"))?
.render();
let timestamp = c
.data()
.get("timestamp")
.and_then(|t| t.as_i64())
.expect("Caller of `render` should have set `timestamp` to an `i64` value");
let datetime = Utc.timestamp_nanos(timestamp);
out.write(&datetime.format(&format).to_string())?;
Ok(())
}
}
/// Given a handlebars template containing placeholders within double curly
/// brackets like `{{placeholder}}` and a list of `(placeholder, substitution
/// value)` pairs, place the values in the template where the relevant
/// placeholder is.
#[derive(Debug)]
pub struct Substitute {
handlebars: Handlebars<'static>,
template: String,
}
impl Substitute {
/// Compile and evaluate a template once. If you need to evaluate
/// it multiple times, construct an instance via [`new`].
///
/// If a placeholder appears in a template but not in the list of
/// substitution values, this will return an error.
pub fn once(template: &str, values: &[(&str, &str)]) -> Result<String> {
let values = values
.iter()
.map(|&(k, v)| (k, v))
.collect::<BTreeMap<_, _>>();
let me = Self::new_minimal(template)?;
me.evaluate(&values)
}
/// Compiles the handlebars template once, then allows reusing the
/// template multiple times via [`evaluate`]. If you don't need to
/// reuse the template, you can use [`once`].
pub fn new<T: DataGenRng>(
template: impl Into<String>,
rng: RandomNumberGenerator<T>,
) -> Result<Self> {
let mut me = Self::new_minimal(template)?;
me.set_random_number_generator(rng);
Ok(me)
}
fn new_minimal(template: impl Into<String>) -> Result<Self> {
let template = template.into();
let mut handlebars = Handlebars::new();
handlebars.set_strict_mode(true);
handlebars.register_helper("format-time", Box::new(FormatNowHelper));
handlebars
.register_template_string("template", &template)
.context(CantCompileTemplate {
template: &template,
})?;
Ok(Self {
handlebars,
template,
})
}
fn set_random_number_generator<T: DataGenRng>(&mut self, rng: RandomNumberGenerator<T>) {
self.handlebars
.register_helper("random", Box::new(RandomHelper(Mutex::new(rng))));
}
/// Interpolates the values into the compiled template.
///
/// If a placeholder appears in a template but not in the list of
/// substitution values, this will return an error.
pub fn evaluate(&self, values: &impl Serialize) -> Result<String> {
self.handlebars
.render("template", &values)
.context(CantPerformSubstitution {
template: &self.template,
})
}
}
/// Given a random number generator and replacement specification, choose a
/// particular value from the list of possible values according to any specified
/// weights (or with equal probability if there are no weights).
pub fn pick_from_replacements<'a, T: DataGenRng>(
rng: &mut RandomNumberGenerator<T>,
replacements: &'a [specification::Replacement],
) -> BTreeMap<&'a str, &'a str> {
replacements
.iter()
.map(|replacement| {
let chosen = replacement
.with
.choose_weighted(rng, |value| value.weight())
.expect("`Replacement` `with` should have items")
.value();
(replacement.replace.as_str(), chosen)
})
.collect()
}
#[cfg(test)]
mod test {
use super::*;
use crate::test_rng;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
#[derive(Serialize)]
struct TimestampArgs {
timestamp: i64,
}
#[test]
fn format_now_valid_strftime() -> Result {
let rng = test_rng();
let args = TimestampArgs {
timestamp: 1599154445000000000,
};
let substitute =
Substitute::new(r#"the date is {{format-time "%Y-%m-%d"}}."#, rng).unwrap();
let value = substitute.evaluate(&args)?;
assert_eq!(value, "the date is 2020-09-03.");
Ok(())
}
#[test]
#[should_panic(expected = "a Display implementation returned an error unexpectedly: Error")]
fn format_now_invalid_strftime_panics() {
let rng = test_rng();
let args = TimestampArgs {
timestamp: 1599154445000000000,
};
let substitute = Substitute::new(r#"the date is {{format-time "%-B"}}."#, rng).unwrap();
substitute.evaluate(&args).expect("This is unreachable");
}
#[test]
fn format_now_missing_strftime() -> Result {
let rng = test_rng();
let args = TimestampArgs {
timestamp: 1599154445000000000,
};
let substitute = Substitute::new(r#"the date is {{format-time}}."#, rng).unwrap();
let result = substitute.evaluate(&args);
// TODO: better matching on the error
assert!(result.is_err());
Ok(())
}
}

View File

@ -0,0 +1,495 @@
//! Generating a set of tag keys and values given a specification
use crate::{
specification,
substitution::{pick_from_replacements, Substitute},
DataGenRng, RandomNumberGenerator,
};
use snafu::{ResultExt, Snafu};
use std::fmt;
/// Tag-specific Results
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// Errors that may happen while creating tags
#[derive(Snafu, Debug)]
pub enum Error {
/// Error that may happen when substituting placeholder values in tag keys
#[snafu(display("Could not create tag key, caused by:\n{}", source))]
CouldNotCreateTagKey {
/// Underlying `substitution` module error that caused this problem
source: crate::substitution::Error,
},
/// Error that may happen when substituting placeholder values in tag values
#[snafu(display(
"Could not generate tag value for tag `{}`, caused by:\n{}",
key,
source
))]
CouldNotGenerateTagValue {
/// The key of the tag we couldn't create a value for
key: String,
/// Underlying `substitution` module error that caused this problem
source: crate::substitution::Error,
},
}
/// A generated tag value that will be used in a generated data point.
#[derive(Debug, Clone, PartialEq)]
pub struct Tag {
/// The key for the tag
pub key: String,
/// The value for the tag
pub value: String,
}
impl Tag {
/// Create a new tag with the given key and value.
pub fn new(key: impl Into<String>, value: impl Into<String>) -> Self {
Self {
key: key.into(),
value: value.into(),
}
}
}
/// A set of `count` tags that have the same configuration but different
/// `tag_id`s.
#[derive(Debug)]
pub struct TagGeneratorSet<T: DataGenRng> {
tags: Vec<TagGenerator<T>>,
}
impl<T: DataGenRng> TagGeneratorSet<T> {
/// Create a new set of tag generators for a particular agent, measurement,
/// and tag specification.
pub fn new(
agent_id: usize,
measurement_id: usize,
spec: &specification::TagSpec,
parent_seed: impl fmt::Display,
) -> Result<Self> {
let cardinality = spec.cardinality.unwrap_or(1);
let seed = format!("{}-{}", parent_seed, spec.name);
let tags = (0..cardinality)
.map(|cardinality| {
TagGenerator::new(agent_id, measurement_id, spec, cardinality, &seed)
})
.collect::<Result<_>>()?;
Ok(Self { tags })
}
/// Generate one set of tags
pub fn generate(&mut self) -> Result<Vec<Vec<Tag>>> {
self.tags.iter_mut().map(TagGenerator::generate).collect()
}
/// For tags that shouldn't be included in the multi cartesian product
/// because they have cardinality 1, this method takes the number of
/// lines needed, looks at whether this tag should be resampled or not,
/// and generates the number of lines worth of tags requested.
pub fn generate_to_zip(&mut self, num_lines: usize) -> Result<Vec<Vec<Tag>>> {
// This is a hack. A better way would be to have a different type for tags with
// cardinality = 1, and only that type has this method.
if self.tags.len() != 1 {
panic!("generate_to_zip is only for use with cardinality 1")
}
(&mut self.tags[0]).generate_to_zip(num_lines)
}
/// The cardinality of this tag configuration, used to figure out how many
/// rows each generation will create in total.
pub fn tag_cardinality(&self) -> usize {
self.tags.len()
}
}
#[derive(Debug)]
struct TagGenerator<T: DataGenRng> {
agent_id: String,
measurement_id: String,
tags: Vec<Tag>,
cardinality: u32,
counter: usize,
current_tick: usize,
increment_every: Option<usize>,
rng: RandomNumberGenerator<T>,
replacements: Vec<specification::Replacement>,
resample_every_line: bool,
}
impl<T: DataGenRng> TagGenerator<T> {
fn new(
agent_id: usize,
measurement_id: usize,
spec: &specification::TagSpec,
cardinality: u32,
parent_seed: impl fmt::Display,
) -> Result<Self> {
let count = spec.count.unwrap_or(1);
let increment_every = spec.increment_every;
let agent_id = agent_id.to_string();
let measurement_id = measurement_id.to_string();
let seed = format!("{}-{}-{}", parent_seed, spec.name, cardinality);
let rng = RandomNumberGenerator::<T>::new(seed);
let tags = (0..count)
.map(|tag_id| {
let key = Substitute::once(
&spec.name,
&[
("agent_id", &agent_id),
("measurement_id", &measurement_id),
("tag_id", &tag_id.to_string()),
],
)
.context(CouldNotCreateTagKey)?;
Ok(Tag {
key,
value: spec.value.clone(),
})
})
.collect::<Result<_>>()?;
Ok(Self {
agent_id,
measurement_id,
tags,
cardinality,
counter: 0,
current_tick: 0,
increment_every,
rng,
replacements: spec.replacements.clone(),
resample_every_line: spec.resample_every_line,
})
}
fn generate(&mut self) -> Result<Vec<Tag>> {
let counter = self.increment().to_string();
let cardinality_string = self.cardinality.to_string();
let guid = self.rng.guid().to_string();
let mut substitutions = pick_from_replacements(&mut self.rng, &self.replacements);
substitutions.insert("agent_id", &self.agent_id);
substitutions.insert("measurement_id", &self.measurement_id);
substitutions.insert("counter", &counter);
substitutions.insert("cardinality", &cardinality_string);
substitutions.insert("guid", &guid);
let substitutions: Vec<_> = substitutions.into_iter().collect();
self.tags
.iter()
.map(|tag| {
let key = tag.key.clone();
let value = Substitute::once(&tag.value, &substitutions)
.context(CouldNotGenerateTagValue { key: &key })?;
Ok(Tag { key, value })
})
.collect()
}
// if count and replacements/resampling could never be used on the same tag
// configuration, then this could return `Result<Vec<Tag>>` I think. This
// could also possibly return an iterator rather than a Vec; the measurement
// immediately iterates over it
fn generate_to_zip(&mut self, num_lines: usize) -> Result<Vec<Vec<Tag>>> {
if self.resample_every_line {
Ok((0..num_lines)
.map(|_| self.generate())
.collect::<Result<_>>()?)
} else {
let tags = self.generate()?;
Ok(std::iter::repeat(tags).take(num_lines).collect())
}
}
/// Returns the current value and potentially increments the counter for
/// next time.
fn increment(&mut self) -> usize {
let counter = self.counter;
if let Some(increment) = self.increment_every {
self.current_tick += 1;
if self.current_tick >= increment {
self.counter += 1;
self.current_tick = 0;
}
}
counter
}
}
/// Cycles through each value for each agent tag
pub struct AgentTagIterator {
iters: Vec<Box<dyn Iterator<Item = Tag>>>,
}
impl fmt::Debug for AgentTagIterator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("AgentTagIterator")
.field("iters", &"(dynamic)")
.finish()
}
}
impl AgentTagIterator {
/// Create a new iterator to manage the cycling
pub fn new(agent_tags: &[specification::AgentTag]) -> Self {
Self {
iters: agent_tags
.iter()
.map(|agent_tag| {
boxed_cycling_iter(agent_tag.key.clone(), agent_tag.values.clone())
})
.collect(),
}
}
}
fn boxed_cycling_iter(key: String, values: Vec<String>) -> Box<dyn Iterator<Item = Tag>> {
Box::new(values.into_iter().cycle().map(move |v| Tag::new(&key, &v)))
}
impl Iterator for AgentTagIterator {
type Item = Vec<Tag>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.iters.iter_mut().flat_map(|i| i.next()).collect())
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{specification::*, ZeroRng, TEST_SEED};
#[test]
fn empty_agent_spec_tag_set_always_returns_empty_vec() {
let agent = AgentSpec {
tags: vec![],
..AgentSpec::default()
};
let mut iter = AgentTagIterator::new(&agent.tags);
assert_eq!(iter.next().unwrap(), vec![]);
}
#[test]
fn agent_spec_tag_set() {
let tag_alpha = toml::from_str(
r#"key = "alpha"
values = ["1", "2", "3"]"#,
)
.unwrap();
let tag_omega = toml::from_str(
r#"key = "omega"
values = ["apple", "grape"]"#,
)
.unwrap();
let agent = AgentSpec {
tags: vec![tag_alpha, tag_omega],
..AgentSpec::default()
};
let mut iter = AgentTagIterator::new(&agent.tags);
assert_eq!(
iter.next().unwrap(),
vec![Tag::new("alpha", "1"), Tag::new("omega", "apple"),]
);
assert_eq!(
iter.next().unwrap(),
vec![Tag::new("alpha", "2"), Tag::new("omega", "grape"),]
);
assert_eq!(
iter.next().unwrap(),
vec![Tag::new("alpha", "3"), Tag::new("omega", "apple"),]
);
assert_eq!(
iter.next().unwrap(),
vec![Tag::new("alpha", "1"), Tag::new("omega", "grape"),]
);
assert_eq!(
iter.next().unwrap(),
vec![Tag::new("alpha", "2"), Tag::new("omega", "apple"),]
);
assert_eq!(
iter.next().unwrap(),
vec![Tag::new("alpha", "3"), Tag::new("omega", "grape"),]
);
assert_eq!(
iter.next().unwrap(),
vec![Tag::new("alpha", "1"), Tag::new("omega", "apple"),]
);
}
#[test]
fn all_the_tag_substitutions_everywhere() -> Result<()> {
let spec = TagSpec {
name: "{{agent_id}}x{{measurement_id}}x{{tag_id}}".into(),
value: "{{agent_id}}v{{measurement_id}}v{{cardinality}}v{{counter}}".into(),
count: Some(2),
cardinality: Some(3),
increment_every: Some(1),
..Default::default()
};
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &spec, TEST_SEED)?;
let tags = tg.generate()?;
assert_eq!(
vec![
vec![
Tag::new("22x33x0", "22v33v0v0"),
Tag::new("22x33x1", "22v33v0v0"),
],
vec![
Tag::new("22x33x0", "22v33v1v0"),
Tag::new("22x33x1", "22v33v1v0"),
],
vec![
Tag::new("22x33x0", "22v33v2v0"),
Tag::new("22x33x1", "22v33v2v0"),
],
],
tags
);
let tags = tg.generate()?;
assert_eq!(
vec![
vec![
Tag::new("22x33x0", "22v33v0v1"),
Tag::new("22x33x1", "22v33v0v1"),
],
vec![
Tag::new("22x33x0", "22v33v1v1"),
Tag::new("22x33x1", "22v33v1v1"),
],
vec![
Tag::new("22x33x0", "22v33v2v1"),
Tag::new("22x33x1", "22v33v2v1"),
],
],
tags
);
Ok(())
}
#[test]
fn string_replacements() -> Result<()> {
let host_tag_spec: specification::TagSpec = toml::from_str(
r#"name = "host"
value = "{{host}}"
replacements = [
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
]"#,
)
.unwrap();
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
let tags = tg.generate()?;
assert_eq!(vec![vec![Tag::new("host", "serverA")]], tags);
Ok(())
}
#[test]
fn generate_to_zip_with_resample() -> Result<()> {
let host_tag_spec: specification::TagSpec = toml::from_str(
r#"name = "host"
value = "{{host}}"
replacements = [
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
]
resample_every_line = true
"#,
)
.unwrap();
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
let tags = tg.generate_to_zip(3)?;
assert_eq!(
vec![
vec![Tag::new("host", "serverA")],
vec![Tag::new("host", "serverA")],
vec![Tag::new("host", "serverA")],
],
tags
);
Ok(())
}
#[test]
fn generate_to_zip_without_resample() -> Result<()> {
let host_tag_spec: specification::TagSpec = toml::from_str(
r#"name = "host"
value = "{{host}}"
replacements = [
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
]
resample_every_line = false
"#,
)
.unwrap();
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
let tags = tg.generate_to_zip(3)?;
assert_eq!(
vec![
vec![Tag::new("host", "serverA")],
vec![Tag::new("host", "serverA")],
vec![Tag::new("host", "serverA")],
],
tags
);
Ok(())
}
#[test]
fn generate_to_zip_with_default_no_resample() -> Result<()> {
let host_tag_spec: specification::TagSpec = toml::from_str(
r#"name = "host"
value = "{{host}}"
replacements = [
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
]"#,
)
.unwrap();
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
let tags = tg.generate_to_zip(3)?;
assert_eq!(
vec![
vec![Tag::new("host", "serverA")],
vec![Tag::new("host", "serverA")],
vec![Tag::new("host", "serverA")]
],
tags
);
Ok(())
}
}

View File

@ -0,0 +1,361 @@
//! Writing generated points
use futures::stream;
use influxdb2_client::models::{DataPoint, PostBucketRequest, WriteDataPoint};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
#[cfg(test)]
use std::{
collections::BTreeMap,
sync::{Arc, Mutex},
};
use std::{
fs,
fs::OpenOptions,
path::{Path, PathBuf},
};
use tracing::info;
/// Errors that may happen while writing points.
#[derive(Snafu, Debug)]
pub enum Error {
/// Error that may happen when writing line protocol to a no-op sink
#[snafu(display("Could not generate line protocol: {}", source))]
CantWriteToNoOp {
/// Underlying IO error that caused this problem
source: std::io::Error,
},
/// Error that may happen when writing line protocol to a file
#[snafu(display("Could not write line protocol to file: {}", source))]
CantWriteToLineProtocolFile {
/// Underlying IO error that caused this problem
source: std::io::Error,
},
/// Error that may happen when creating a directory to store files to write
/// to
#[snafu(display("Could not create directory: {}", source))]
CantCreateDirectory {
/// Underlying IO error that caused this problem
source: std::io::Error,
},
/// Error that may happen when checking a path's metadata to see if it's a
/// directory
#[snafu(display("Could not get metadata: {}", source))]
CantGetMetadata {
/// Underlying IO error that caused this problem
source: std::io::Error,
},
/// Error that may happen if the path given to the file-based writer isn't a
/// directory
#[snafu(display("Expected to get a directory"))]
MustBeDirectory,
/// Error that may happen while writing points to the API
#[snafu(display("Could not write points to API: {}", source))]
CantWriteToApi {
/// Underlying Influx client request error that caused this problem
source: influxdb2_client::RequestError,
},
/// Error that may happen while trying to create a bucket via the API
#[snafu(display("Could not create bucket: {}", source))]
CantCreateBucket {
/// Underlying Influx client request error that caused this problem
source: influxdb2_client::RequestError,
},
/// Error that may happen if attempting to create a bucket without
/// specifying the org ID
#[snafu(display("Could not create a bucket without an `org_id`"))]
OrgIdRequiredToCreateBucket,
}
type Result<T, E = Error> = std::result::Result<T, E>;
/// Responsible for holding shared configuration needed to construct per-agent
/// points writers
#[derive(Debug)]
pub struct PointsWriterBuilder {
config: PointsWriterConfig,
}
#[derive(Debug)]
enum PointsWriterConfig {
Api {
client: influxdb2_client::Client,
org: String,
bucket: String,
},
Directory(PathBuf),
NoOp {
perform_write: bool,
},
#[cfg(test)]
Vector(BTreeMap<String, Arc<Mutex<Vec<u8>>>>),
}
impl PointsWriterBuilder {
/// Write points to the API at the specified host and put them in the
/// specified org and bucket.
pub async fn new_api(
host: impl Into<String>,
org: impl Into<String>,
bucket: impl Into<String>,
token: impl Into<String>,
create_bucket: bool,
org_id: Option<&str>,
) -> Result<Self> {
let host = host.into();
// Be somewhat lenient on what we accept as far as host; the client expects the
// protocol to be included. We could pull in the url crate and do more
// verification here.
let host = if host.starts_with("http") {
host
} else {
format!("http://{}", host)
};
let client = influxdb2_client::Client::new(host, token.into());
let org = org.into();
let bucket = bucket.into();
if create_bucket {
let org_id = org_id.context(OrgIdRequiredToCreateBucket)?.to_string();
let bucket = PostBucketRequest {
org_id,
name: bucket.clone(),
..Default::default()
};
client
.create_bucket(Some(bucket))
.await
.context(CantCreateBucket)?;
}
Ok(Self {
config: PointsWriterConfig::Api {
client,
org,
bucket,
},
})
}
/// Write points to a file in the directory specified.
pub fn new_file<P: AsRef<Path>>(path: P) -> Result<Self> {
fs::create_dir_all(&path).context(CantCreateDirectory)?;
let metadata = fs::metadata(&path).context(CantGetMetadata)?;
ensure!(metadata.is_dir(), MustBeDirectory);
Ok(Self {
config: PointsWriterConfig::Directory(PathBuf::from(path.as_ref())),
})
}
/// Generate points but do not write them anywhere
pub fn new_no_op(perform_write: bool) -> Self {
Self {
config: PointsWriterConfig::NoOp { perform_write },
}
}
/// Create a writer out of this writer's configuration for a particular
/// agent that runs in a separate thread/task.
pub fn build_for_agent(&mut self, agent_name: &str) -> PointsWriter {
let inner_writer = match &mut self.config {
PointsWriterConfig::Api {
client,
org,
bucket,
} => InnerPointsWriter::Api {
client: client.clone(),
org: org.clone(),
bucket: bucket.clone(),
},
PointsWriterConfig::Directory(dir_path) => {
let mut filename = dir_path.clone();
filename.push(agent_name);
filename.set_extension("txt");
InnerPointsWriter::File(filename)
}
PointsWriterConfig::NoOp { perform_write } => InnerPointsWriter::NoOp {
perform_write: *perform_write,
},
#[cfg(test)]
PointsWriterConfig::Vector(ref mut agents_by_name) => {
let v = agents_by_name
.entry(agent_name.to_string())
.or_insert_with(|| Arc::new(Mutex::new(Vec::new())));
InnerPointsWriter::Vec(Arc::clone(v))
}
};
PointsWriter { inner_writer }
}
}
/// Responsible for writing points to the location it's been configured for.
#[derive(Debug)]
pub struct PointsWriter {
inner_writer: InnerPointsWriter,
}
impl PointsWriter {
/// Write these points
pub async fn write_points(&mut self, points: Vec<DataPoint>) -> Result<()> {
self.inner_writer.write_points(points).await
}
}
#[derive(Debug)]
enum InnerPointsWriter {
Api {
client: influxdb2_client::Client,
org: String,
bucket: String,
},
File(PathBuf),
NoOp {
perform_write: bool,
},
#[cfg(test)]
Vec(Arc<Mutex<Vec<u8>>>),
}
impl InnerPointsWriter {
async fn write_points(&mut self, points: Vec<DataPoint>) -> Result<()> {
match self {
Self::Api {
client,
org,
bucket,
} => {
client
.write(org, bucket, stream::iter(points))
.await
.context(CantWriteToApi)?;
}
Self::File(filename) => {
info!("Opening file {:?}", filename);
let num_points = points.len();
let file = OpenOptions::new()
.append(true)
.create(true)
.open(&filename)
.context(CantWriteToLineProtocolFile)?;
let mut file = std::io::BufWriter::new(file);
for point in points {
point
.write_data_point_to(&mut file)
.context(CantWriteToLineProtocolFile)?;
}
info!("Wrote {} points to {:?}", num_points, filename);
}
Self::NoOp { perform_write } => {
if *perform_write {
let mut sink = std::io::sink();
for point in points {
point
.write_data_point_to(&mut sink)
.context(CantWriteToNoOp)?;
}
}
}
#[cfg(test)]
Self::Vec(ref mut vec) => {
let vec_ref = Arc::clone(vec);
let mut vec = vec_ref.lock().expect("Should be able to get lock");
for point in points {
point
.write_data_point_to(&mut *vec)
.expect("Should be able to write to vec");
}
}
}
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{generate, now_ns, specification::*, ZeroRng};
use std::str::FromStr;
type Error = Box<dyn std::error::Error>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
impl PointsWriterBuilder {
fn new_vec() -> Self {
Self {
config: PointsWriterConfig::Vector(BTreeMap::new()),
}
}
fn written_data(self, agent_name: &str) -> String {
match self.config {
PointsWriterConfig::Vector(agents_by_name) => {
let bytes_ref = agents_by_name
.get(agent_name)
.expect("Should have written some data, did not find any for this agent")
.clone();
let bytes = bytes_ref
.lock()
.expect("Should have been able to get a lock");
String::from_utf8(bytes.to_vec()).expect("we should be generating valid UTF-8")
}
_ => unreachable!("this method is only valid when writing to a vector for testing"),
}
}
}
#[tokio::test]
async fn test_generate() -> Result<()> {
let toml = r#"
name = "demo_schema"
base_seed = "this is a demo"
[[agents]]
name = "basic"
[[agents.measurements]]
name = "cpu"
[[agents.measurements.fields]]
name = "up"
bool = true"#;
let data_spec = DataSpec::from_str(toml).unwrap();
let mut points_writer_builder = PointsWriterBuilder::new_vec();
let now = now_ns();
generate::<ZeroRng>(
&data_spec,
&mut points_writer_builder,
Some(now),
Some(now),
now,
false,
)
.await?;
let line_protocol = points_writer_builder.written_data("basic");
let expected_line_protocol = format!(
r#"cpu,data_spec=demo_schema up=f {}
"#,
now
);
assert_eq!(line_protocol, expected_line_protocol);
Ok(())
}
}

View File

@ -6,8 +6,7 @@
# ./scripts/genlp.py | head -n 2000
# ```
#
# Please use https://github.com/influxdata/iox_data_generator for anything
# more complicated.
# Please use iox_data_generator for anything more complicated.
#
from signal import signal, SIGPIPE, SIG_DFL