Merge pull request #2357 from influxdata/pd/add-data-generator
refactor: move data generator to IOx repo and fix buildpull/24376/head
commit
19bdc00d4a
|
@ -271,7 +271,7 @@ dependencies = [
|
|||
"md5",
|
||||
"oauth2",
|
||||
"paste",
|
||||
"quick-error",
|
||||
"quick-error 1.2.3",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde-xml-rs",
|
||||
|
@ -300,7 +300,7 @@ dependencies = [
|
|||
"md5",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"quick-error",
|
||||
"quick-error 1.2.3",
|
||||
"ring",
|
||||
"serde",
|
||||
"serde-xml-rs",
|
||||
|
@ -398,13 +398,34 @@ dependencies = [
|
|||
"constant_time_eq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
|
||||
dependencies = [
|
||||
"block-padding",
|
||||
"byte-tools",
|
||||
"byteorder",
|
||||
"generic-array 0.12.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"generic-array 0.14.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-padding"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
|
||||
dependencies = [
|
||||
"byte-tools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -446,6 +467,12 @@ version = "3.7.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631"
|
||||
|
||||
[[package]]
|
||||
name = "byte-tools"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.7.2"
|
||||
|
@ -525,6 +552,17 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chrono-english"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0be5180df5f7c41fc2416bc038bc8d78d44db8136c415b94ccbc95f523dc38e9"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"scanlex",
|
||||
"time 0.1.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.2.0"
|
||||
|
@ -784,7 +822,7 @@ version = "0.11.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"generic-array 0.14.4",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
|
@ -917,13 +955,22 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
|
||||
dependencies = [
|
||||
"generic-array 0.12.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"generic-array 0.14.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1095,6 +1142,12 @@ dependencies = [
|
|||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fake-simd"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
|
||||
|
||||
[[package]]
|
||||
name = "fd-lock"
|
||||
version = "2.0.0"
|
||||
|
@ -1320,6 +1373,15 @@ dependencies = [
|
|||
"tonic-build 0.5.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.12.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.4"
|
||||
|
@ -1433,6 +1495,20 @@ version = "1.7.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
|
||||
|
||||
[[package]]
|
||||
name = "handlebars"
|
||||
version = "3.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4498fc115fa7d34de968184e473529abb40eeb6be8bc5f7faba3d08c316cb3e3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"pest",
|
||||
"pest_derive",
|
||||
"quick-error 2.0.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
|
@ -1488,7 +1564,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b"
|
||||
dependencies = [
|
||||
"crypto-mac",
|
||||
"digest",
|
||||
"digest 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1816,6 +1892,36 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iox_data_generator"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"chrono-english",
|
||||
"clap",
|
||||
"criterion",
|
||||
"data_types",
|
||||
"futures",
|
||||
"generated_types",
|
||||
"handlebars",
|
||||
"influxdb2_client",
|
||||
"influxdb_iox_client",
|
||||
"itertools 0.9.0",
|
||||
"packers",
|
||||
"rand 0.8.4",
|
||||
"rand_core 0.6.3",
|
||||
"rand_seeder",
|
||||
"serde",
|
||||
"snafu",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"toml",
|
||||
"tracing",
|
||||
"tracing-futures",
|
||||
"tracing-subscriber",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iox_object_store"
|
||||
version = "0.1.0"
|
||||
|
@ -2014,6 +2120,12 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.0.1"
|
||||
|
@ -2035,9 +2147,9 @@ version = "0.9.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"digest",
|
||||
"opaque-debug",
|
||||
"block-buffer 0.9.0",
|
||||
"digest 0.9.0",
|
||||
"opaque-debug 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2484,6 +2596,12 @@ version = "11.1.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.3.0"
|
||||
|
@ -2788,6 +2906,49 @@ dependencies = [
|
|||
"test_helpers",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
|
||||
dependencies = [
|
||||
"ucd-trie",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_derive"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_generator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_generator"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_meta",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_meta"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d"
|
||||
dependencies = [
|
||||
"maplit",
|
||||
"pest",
|
||||
"sha-1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.5.1"
|
||||
|
@ -3188,6 +3349,12 @@ version = "1.2.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.20.0"
|
||||
|
@ -3313,6 +3480,15 @@ dependencies = [
|
|||
"rand_core 0.6.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_seeder"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "612dd698949d531335b4c29d1c64fb11942798decfc08abc218578942e66d7d0"
|
||||
dependencies = [
|
||||
"rand_core 0.6.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.5.1"
|
||||
|
@ -3605,7 +3781,7 @@ dependencies = [
|
|||
"base64 0.13.0",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"digest",
|
||||
"digest 0.9.0",
|
||||
"futures",
|
||||
"hex",
|
||||
"hmac",
|
||||
|
@ -3728,6 +3904,12 @@ dependencies = [
|
|||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scanlex"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "088c5d71572124929ea7549a8ce98e1a6fd33d0a38367b09027b382e67c033db"
|
||||
|
||||
[[package]]
|
||||
name = "schannel"
|
||||
version = "0.1.19"
|
||||
|
@ -3950,6 +4132,18 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha-1"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df"
|
||||
dependencies = [
|
||||
"block-buffer 0.7.3",
|
||||
"digest 0.8.1",
|
||||
"fake-simd",
|
||||
"opaque-debug 0.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.6.0"
|
||||
|
@ -3962,11 +4156,11 @@ version = "0.9.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b362ae5752fd2137731f9fa25fd4d9058af34666ca1966fb969119cc35719f12"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"block-buffer 0.9.0",
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
"opaque-debug",
|
||||
"digest 0.9.0",
|
||||
"opaque-debug 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4892,6 +5086,12 @@ version = "1.13.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06"
|
||||
|
||||
[[package]]
|
||||
name = "ucd-trie"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.6"
|
||||
|
|
|
@ -25,6 +25,7 @@ members = [
|
|||
"influxdb_line_protocol",
|
||||
"influxdb_tsm",
|
||||
"internal_types",
|
||||
"iox_data_generator",
|
||||
"iox_object_store",
|
||||
"logfmt",
|
||||
"lifecycle",
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
[package]
|
||||
name = "iox_data_generator"
|
||||
version = "0.1.0"
|
||||
authors = ["Paul Dix <paul@pauldix.net>"]
|
||||
edition = "2018"
|
||||
default-run = "iox_data_generator"
|
||||
|
||||
[dependencies]
|
||||
chrono = "0.4.13"
|
||||
chrono-english = "0.1.4"
|
||||
clap = "2.33.1"
|
||||
futures = "0.3.5"
|
||||
handlebars = "3.3.0"
|
||||
data_types = { path = "../data_types" }
|
||||
generated_types = { path = "../generated_types" }
|
||||
influxdb2_client = { path = "../influxdb2_client" }
|
||||
influxdb_iox_client = { path = "../influxdb_iox_client" }
|
||||
packers = { path = "../packers" }
|
||||
itertools = "0.9.0"
|
||||
rand = { version = "0.8.3", features = ["small_rng"] }
|
||||
rand_core = "0.6.2"
|
||||
rand_seeder = "0.2.1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu = "0.6.8"
|
||||
tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] }
|
||||
toml = "0.5.6"
|
||||
tracing = "0.1"
|
||||
tracing-futures = "0.2.4"
|
||||
tracing-subscriber = "0.2.11"
|
||||
uuid = { version = "0.8.1", default_features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3.3"
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
||||
[[bench]]
|
||||
name = "point_generation"
|
||||
harness = false
|
|
@ -0,0 +1,106 @@
|
|||
# `iox_data_generator`
|
||||
|
||||
The `iox_data_generator` tool creates random data points according to a specification and loads them
|
||||
into an `iox` instance to simulate real data.
|
||||
|
||||
To build and run, [first install Rust](https://www.rust-lang.org/tools/install). Then from root of the `influxdb_iox` repo run:
|
||||
|
||||
```
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
And the built binary has command line help:
|
||||
|
||||
```
|
||||
./target/release/iox_data_generator --help
|
||||
```
|
||||
|
||||
For examples of specifications see the [schemas folder](schemas)
|
||||
|
||||
## Use with two IOx servers and Kafka
|
||||
|
||||
The data generator tool can be used to simulate data being written to IOx in various shapes. This
|
||||
is how to set up a local experiment for profiling or debugging purposes using a database in two IOx
|
||||
instances: one writing to Kafka and one reading from Kafka.
|
||||
|
||||
If you're profiling IOx, be sure you've compiled and are running a release build using either:
|
||||
|
||||
```
|
||||
cargo build --release
|
||||
./target/release/influxdb_iox run --server-id 1
|
||||
```
|
||||
|
||||
or:
|
||||
|
||||
```
|
||||
cargo run --release -- run --server-id 1
|
||||
```
|
||||
|
||||
Server ID is the only required attribute for running IOx; see `influxdb_iox run --help` for all the
|
||||
other configuration options for the server you may want to set for your experiment. Note that the
|
||||
default HTTP API address is `127.0.0.1:8080` unless you set something different with `--api-bind`
|
||||
and the default gRPC address is `127.0.0.1:8082` unless you set something different using
|
||||
`--grpc-bind`.
|
||||
|
||||
For the Kafka setup, you'll need to start two IOx servers, so you'll need to set the bind addresses
|
||||
for at least one of them. Here's an example of the two commands to run:
|
||||
|
||||
```
|
||||
cargo run --release -- run --server-id 1
|
||||
cargo run --release -- run --server-id 2 --api-bind 127.0.0.1:8084 --grpc-bind 127.0.0.1:8086
|
||||
```
|
||||
|
||||
You'll also need to run a Kafka instance. There's a Docker compose script in the influxdb_iox
|
||||
repo you can run with:
|
||||
|
||||
```
|
||||
docker-compose -f docker/ci-kafka-docker-compose.yml up kafka
|
||||
```
|
||||
|
||||
The Kafka instance will be accessible from `127.0.0.1:9093` if you run it with this script.
|
||||
|
||||
Once you have the two IOx servers and one Kafka instance running, create a database with a name in
|
||||
the format `[orgname]_[bucketname]`. For example, create a database in IOx named `mlb_pirates`, and
|
||||
the org you'll use in the data generator will be `mlb` and the bucket will be `pirates`. The
|
||||
`DatabaseRules` defined in `src/bin/create_database.rs` will set up a database in the "writer" IOx
|
||||
instance to write to Kafka and the database in the "reader" IOx instance to read from Kafka if
|
||||
you run it with:
|
||||
|
||||
```
|
||||
cargo run -p iox_data_generator --bin create_database -- --writer 127.0.0.1:8082 --reader 127.0.0.1:8086 mlb_pirates
|
||||
```
|
||||
|
||||
This script adds 3 rows to a `writer_test` table because [this issue with the Kafka Consumer
|
||||
needing data before it can find partitions](https://github.com/influxdata/influxdb_iox/issues/2189).
|
||||
|
||||
Once the database is created, decide what kind of data you would like to send it. You can use an
|
||||
existing data generation schema in the `schemas` directory or create a new one, perhaps starting
|
||||
from an existing schema as a guide. In this example, we're going to use
|
||||
`iox_data_generator/schemas/cap-write.toml`.
|
||||
|
||||
Next, run the data generation tool as follows:
|
||||
|
||||
```
|
||||
cargo run -p iox_data_generator -- --spec iox_data_generator/schemas/cap-write.toml --continue --host 127.0.0.1:8080 --token arbitrary --org mlb --bucket pirates
|
||||
```
|
||||
|
||||
- `--spec iox_data_generator/schemas/cap-write.toml` sets the schema you want to use to generate the data
|
||||
- `--continue` means the data generation tool should generate data every `sampling_interval` (which
|
||||
is set in the schema) until we stop it
|
||||
- `--host 127.0.0.1:8080` means to write to the writer IOx server running at the default HTTP API address
|
||||
of `127.0.0.1:8080` (note this is NOT the gRPC address used by the `create_database` command)
|
||||
- `--token arbitrary` - the data generator requires a token value but IOx doesn't use it, so this
|
||||
can be any value.
|
||||
- `--org mlb` is the part of the database name you created before the `_`
|
||||
- `--bucket pirates` is the part of the database name you created after the `_`
|
||||
|
||||
You should be able to use `influxdb_iox sql -h http://127.0.0.1:8086` to connect to the gRPC of the reader
|
||||
then `use database mlb_pirates;` and query the tables to see that the data is being inserted. That
|
||||
is,
|
||||
|
||||
```
|
||||
# in your influxdb_iox checkout
|
||||
cargo run -- sql -h http://127.0.0.1:8086
|
||||
```
|
||||
|
||||
Connecting to the writer instance won't show any data.
|
|
@ -0,0 +1,66 @@
|
|||
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
|
||||
use iox_data_generator::{
|
||||
specification::{AgentSpec, DataSpec, FieldSpec, FieldValueSpec, MeasurementSpec},
|
||||
write::PointsWriterBuilder,
|
||||
};
|
||||
|
||||
pub fn single_agent(c: &mut Criterion) {
|
||||
let spec = DataSpec {
|
||||
base_seed: Some("faster faster faster".into()),
|
||||
name: "benchmark".into(),
|
||||
agents: vec![AgentSpec {
|
||||
name: "agent-1".into(),
|
||||
count: None,
|
||||
sampling_interval: Some(1),
|
||||
name_tag_key: None,
|
||||
tags: vec![],
|
||||
measurements: vec![MeasurementSpec {
|
||||
name: "measurement-1".into(),
|
||||
count: None,
|
||||
tags: vec![],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field-1".into(),
|
||||
field_value_spec: FieldValueSpec::Bool(true),
|
||||
count: None,
|
||||
}],
|
||||
}],
|
||||
}],
|
||||
};
|
||||
|
||||
let mut points_writer = PointsWriterBuilder::new_no_op(true);
|
||||
|
||||
let start_datetime = Some(0);
|
||||
let one_hour_s = 60 * 60;
|
||||
let ns_per_second = 1_000_000_000;
|
||||
let end_datetime = Some(one_hour_s * ns_per_second);
|
||||
|
||||
let expected_points = 3601;
|
||||
|
||||
let mut group = c.benchmark_group("single_agent");
|
||||
group.throughput(Throughput::Elements(expected_points));
|
||||
|
||||
group.bench_function("single agent with basic configuration", |b| {
|
||||
b.iter(|| {
|
||||
let r = block_on({
|
||||
iox_data_generator::generate::<rand::rngs::SmallRng>(
|
||||
&spec,
|
||||
&mut points_writer,
|
||||
start_datetime,
|
||||
end_datetime,
|
||||
0,
|
||||
false,
|
||||
)
|
||||
});
|
||||
let n_points = r.expect("Could not generate data");
|
||||
assert_eq!(n_points, expected_points as usize);
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn block_on<F: std::future::Future>(f: F) -> F::Output {
|
||||
f.await
|
||||
}
|
||||
|
||||
criterion_group!(benches, single_agent);
|
||||
criterion_main!(benches);
|
|
@ -0,0 +1,428 @@
|
|||
# This config file aims to replicate the data produced by the capwrite tool:
|
||||
# https://github.com/influxdata/idpe/tree/e493a8e9b6b773e9374a8542ddcab7d8174d320d/performance/capacity/write
|
||||
name = "cap_write"
|
||||
base_seed = "correct horse battery staple"
|
||||
|
||||
[[agents]]
|
||||
name = "cap_write_{{agent_id}}"
|
||||
count = 3
|
||||
sampling_interval = 10
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "system"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "n_cpus"
|
||||
i64_range = [8, 8]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "n_users"
|
||||
i64_range = [2, 11]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "uptime"
|
||||
uptime = "i64"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "uptime_format"
|
||||
uptime = "telegraf"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "load1"
|
||||
f64_range = [0.0, 8.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "load5"
|
||||
f64_range = [0.0, 8.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "load15"
|
||||
f64_range = [0.0, 8.0]
|
||||
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "mem"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "active"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "available"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "buffered"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "cached"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "inactive"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "slab"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "used"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "avaiable_percent"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "used_percent"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "wired"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "commit_limit"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "committed_as"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "dirty"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "high_free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "high_total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "huge_page_size"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "huge_pages_free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "huge_pages_total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "low_free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "low_total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "mapped"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "page_tables"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "shared"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "swap_cached"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "swap_free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "swap_total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "vmalloc_chunk"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "vmalloc_total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "vmalloc_used"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "write_back"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "write_back_tmp"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "disk"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "used"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "used_percent"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "inodes_free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "inodes_total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "inodes_used"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "swap"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "free"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "total"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "used"
|
||||
i64_range = [0, 1000000] # Note this is an order of magnitude less deliberately to match
|
||||
# https://github.com/influxdata/idpe/blob/ffbceb04dd4b3aa0828d039135977a4f36f7b822/performance/capacity/write/swap.go#L17
|
||||
# not sure if that value was intentional, perhaps it is to ensure used < total?
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "used_percent"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "in"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "out"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "cpu"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "cpu"
|
||||
value = "cpu-total"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_user"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_nice"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_system"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_idle"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_irq"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_softirq"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_steal"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_guest"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "usage_guest_nice"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "processes"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "blocked"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "running"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "sleeping"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "stopped"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "total"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "zombie"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "dead"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "wait"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "idle"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "paging"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "total_threads"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "unknown"
|
||||
i64_range = [0, 255]
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "net"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "bytes_recv"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "bytes_sent"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "packets_sent"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "packets_recv"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "err_in"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "err_out"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "drop_in"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "drop_out"
|
||||
i64_range = [0, 10000000]
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "diskio"
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "host-{{agent_id}}"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "reads"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "writes"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "read_bytes"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "write_bytes"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "read_time"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "write_time"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "io_time"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "weighted_io_time"
|
||||
i64_range = [0, 1000000]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "iops_in_progress"
|
||||
i64_range = [0, 1000000]
|
|
@ -0,0 +1,39 @@
|
|||
# Every feature demonstrated in this schema is fully supported in the current implementation.
|
||||
# Other schemas may demonstrate future features.
|
||||
|
||||
# Every point generated by this schema will contain a tag `data_spec=[this_value]`.
|
||||
name = "demo_schema"
|
||||
# This seed can be any string and will be used to seed all random number generators. To change
|
||||
# the randomness in the points generated by this schema, change this value to something else.
|
||||
# To generate the same data in the same order as previous runs with this schema (except for any
|
||||
# elements in this schema you have changed), keep this value the same.
|
||||
base_seed = "this is a demo"
|
||||
|
||||
[[agents]]
|
||||
name = "basic"
|
||||
sampling_interval = 10 # in seconds. TODO: parse nice durations like "12m" and "30s"
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "cpu"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "temp"
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "location"
|
||||
pattern = "{{city}}, {{country}}"
|
||||
replacements = [
|
||||
{replace = "city", with = ["San Jose", "San Antonio", "Santa Maria"]},
|
||||
{replace = "country", with = ["United States", "Costa Rica", ["Argentina", 10]]},
|
||||
]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "wave_height"
|
||||
i64_range = [0, 10]
|
||||
increment = true
|
||||
reset_after = 20
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "uptime"
|
||||
uptime = "i64"
|
|
@ -0,0 +1,141 @@
|
|||
name = "demo_schema"
|
||||
base_seed = "correct horse battery staple"
|
||||
|
||||
# the most basic spec with no auto generating of agents, measurements, tags or fields
|
||||
[[agents]]
|
||||
name = "demo"
|
||||
sampling_interval = 10
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "some_measurement"
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "foo"
|
||||
value = "bar"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "field1"
|
||||
# it's a boolean field, the true means to generate the boolean randomly with equal probability
|
||||
bool = true
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "field2"
|
||||
# it's an i64 field, values will be generated using a pseudo random number generator
|
||||
# with a set seed and values in the range [3, 200). Setting it to [3, 3] or [3, 4] will
|
||||
# make the value always be 3
|
||||
i64_range = [3, 200]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "field3"
|
||||
# it's an i64 field, values will be generated using a pseudo random number generator
|
||||
# with a set seed and values in the range in the range [1000, 5000)
|
||||
i64_range = [1000, 5000]
|
||||
# The value after each same will be incremented by the next random amount. This is
|
||||
# useful when simulating a counter.
|
||||
increment = true
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "field4"
|
||||
# it's an f64 field, values will be generated using a pseudo random number generator
|
||||
# with a set seed with values in the range [0.0, 100.0). Setting both values to the same
|
||||
# number will make every value that number.
|
||||
f64_range = [0.0, 100.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "field5"
|
||||
# this is a string field. Parts of the string will be replaced. {{agent_name}} will be replaced
|
||||
# with the name of the agent, {{random 200}} will be replaced with a random alphanumeric string
|
||||
# of the length specified. {{format-time "%Y-%m-%d %H:%M"}} will be replaced with the time for
|
||||
# this line in the simulation (that is, the same value that this line will have for its
|
||||
# timestamp) formatted using a strftime specifier. Other patterns will be looked for based on
|
||||
# the keys in replacements.
|
||||
pattern = "{{agent_name}} foo {{level}} {{format-time \"%Y-%m-%d %H:%M\"}} {{random 200}}"
|
||||
# each key in string replacements will be replaced in the pattern with a value randomly
|
||||
# selected from the array of strings. Specify a weight as an integer greater than 1 to change
|
||||
# the probability that a given string will be selected.
|
||||
replacements = [
|
||||
{replace = "color", with = ["red", "blue", "green"]},
|
||||
{replace = "level", with = [
|
||||
["info", 800],
|
||||
["warn", 195],
|
||||
["error", 5]
|
||||
]}
|
||||
]
|
||||
|
||||
[[agents]]
|
||||
name = "some-server-{{agent_id}}"
|
||||
count = 10
|
||||
sampling_interval = 22
|
||||
|
||||
# Optional: every measurement (row) this agent produces will include a tag with the agent_id filled
|
||||
# in:
|
||||
# agent_name=some-server-{{agent_id}}
|
||||
name_tag_key = "agent_name"
|
||||
|
||||
# Optional: these values will be rotated through so that each agent that gets created will have one.
|
||||
# e.g: the first agent will always inject region=us-west and secnod will be region=us-east, etc.
|
||||
tags = [
|
||||
{key = "region", values = ["us-west", "us-east", "dublin", "frankfurt"]},
|
||||
{key = "foo", values = ["bar", "asdf"]},
|
||||
]
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "few-tags-measurement-{{measurement_id}}"
|
||||
count = 20
|
||||
[[agents.measurements.tags]]
|
||||
# {{measurement_id}} will be replaced with the id of the measurement this tag is for
|
||||
name = "tag-1-{{measurement_id}}"
|
||||
value = "value-1"
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "tag-2"
|
||||
# {{cardinality}} will be replaced with the cardinality counter
|
||||
value = "value-{{cardinality}}"
|
||||
# Optional: This means each collection on this agent will have 4 rows of this measurement with
|
||||
# unique values for this tag. This could be for things like org_id as a tag or for
|
||||
# something like cpu measurements in Telegraf where you have a separate line for each cpu:
|
||||
# cpu,cpu=cpu-total,host=foo usage_user=23.2,usage_system=33.3
|
||||
# cpu,cpu=cpu-0,host=foo usage_user=22.2,usage_system=34.5
|
||||
# cpu,cpu=cpu-1,host=foo usage_user=11.2,usage_system=56.5
|
||||
cardinality = 4
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "tag-3"
|
||||
# {{counter}} will be replaced with the increment counter
|
||||
value = "value-{{counter}}"
|
||||
# Optional: This means that {{counter}} will increase by 1 after every 10 samples that are
|
||||
# pulled.
|
||||
# This option simulates temporal tag values like process IDs or container IDs in tags
|
||||
increment_every = 10
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "tag-4"
|
||||
# {{counter}} will be replaced with the increment counter and {{cardinality}} will be replaced
|
||||
# with the cardinality counter
|
||||
value = "value-{{counter}}-{{cardinality}}"
|
||||
# Optional: This means that {{counter}} will increment by 1 after every 100 samples that are
|
||||
# pulled.
|
||||
# This option simulates temporal tag values like process IDs or container IDs in tags
|
||||
increment_every = 100
|
||||
# when paired with cardinality, this can simulate having many containers running on a single
|
||||
# host
|
||||
cardinality = 10
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "field-2"
|
||||
bool = true
|
||||
|
||||
# This example shows generating 10 different measurements that each have their own set of
|
||||
# tags (10 of them) and each have their own set of fields (4 of them)
|
||||
[[agents.measurements]]
|
||||
name = "mid-tags-measurement-{{measurement_id}}"
|
||||
count = 10
|
||||
[[agents.measurements.tags]]
|
||||
name = "tag-{{tag_id}}-{{measurement_id}}"
|
||||
count = 10
|
||||
value = "value-{{cardinality}}"
|
||||
cardinality = 3
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "field-1"
|
||||
bool = true
|
|
@ -0,0 +1,52 @@
|
|||
name = "tracing_schema"
|
||||
base_seed = "this is a demo"
|
||||
|
||||
[[agents]]
|
||||
name = "trace-sender"
|
||||
sampling_interval = 10 # in seconds. TODO: parse nice durations like "12m" and "30s"
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "traces"
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "trace_id"
|
||||
value = "{{guid}}"
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "span_id"
|
||||
value = "{{guid}}"
|
||||
cardinality = 10
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "host"
|
||||
value = "{{host}}"
|
||||
replacements = [
|
||||
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
|
||||
]
|
||||
resample_every_line = true
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "region"
|
||||
value = "{{region}}"
|
||||
replacements = [
|
||||
{replace = "region", with = ["us-west", "us-east"]},
|
||||
]
|
||||
resample_every_line = false
|
||||
|
||||
[[agents.measurements.tags]]
|
||||
name = "service"
|
||||
value = "{{service}}"
|
||||
replacements = [
|
||||
{replace = "service", with = ["nginx", "istio", "storage", "gateway", "redis", "mysql", "s3"]},
|
||||
]
|
||||
resample_every_line = true
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "timing"
|
||||
f64_range = [0.0, 500.0]
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "depth"
|
||||
i64_range = [0, 3]
|
||||
increment = true
|
||||
reset_after = 10
|
|
@ -0,0 +1,557 @@
|
|||
//! Agents responsible for generating points
|
||||
|
||||
use crate::{
|
||||
measurement::MeasurementGeneratorSet, now_ns, specification, tag::Tag, write::PointsWriter,
|
||||
DataGenRng, RandomNumberGenerator,
|
||||
};
|
||||
|
||||
use influxdb2_client::models::DataPoint;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{fmt, time::Duration};
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// Agent-specific Results
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Errors that may happen while creating points
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when generating points from measurements
|
||||
#[snafu(display("{}", source))]
|
||||
CouldNotGeneratePoint {
|
||||
/// Underlying `measurement` module error that caused this problem
|
||||
source: crate::measurement::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when creating measurement generator sets
|
||||
#[snafu(display("Could not create measurement generator sets, caused by:\n{}", source))]
|
||||
CouldNotCreateMeasurementGeneratorSets {
|
||||
/// Underlying `measurement` module error that caused this problem
|
||||
source: crate::measurement::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when writing points
|
||||
#[snafu(display("Could not write points, caused by:\n{}", source))]
|
||||
CouldNotWritePoints {
|
||||
/// Underlying `write` module error that caused this problem
|
||||
source: crate::write::Error,
|
||||
},
|
||||
}
|
||||
|
||||
/// Each `AgentSpec` informs the instantiation of an `Agent`, which coordinates
|
||||
/// the generation of the measurements in their specification.
|
||||
#[derive(Debug)]
|
||||
pub struct Agent<T: DataGenRng> {
|
||||
agent_id: usize,
|
||||
name: String,
|
||||
#[allow(dead_code)]
|
||||
rng: RandomNumberGenerator<T>,
|
||||
agent_tags: Vec<Tag>,
|
||||
measurement_generator_sets: Vec<MeasurementGeneratorSet<T>>,
|
||||
sampling_interval: Option<i64>,
|
||||
/// nanoseconds since the epoch, used as the timestamp for the next
|
||||
/// generated point
|
||||
current_datetime: i64,
|
||||
/// nanoseconds since the epoch, when current_datetime exceeds this, stop
|
||||
/// generating points
|
||||
end_datetime: i64,
|
||||
/// whether to continue generating points after reaching the current time
|
||||
continue_on: bool,
|
||||
/// whether this agent is done generating points or not
|
||||
finished: bool,
|
||||
/// Optional interval at which to re-run the agent if generating data in
|
||||
/// "continue" mode
|
||||
interval: Option<tokio::time::Interval>,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> Agent<T> {
|
||||
/// Create a new agent that will generate data points according to these
|
||||
/// specs. Substitutions in `name` and `agent_tags` should be made
|
||||
/// before using them to instantiate an agent.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
agent_spec: &specification::AgentSpec,
|
||||
agent_name: impl Into<String>,
|
||||
agent_id: usize,
|
||||
parent_seed: impl fmt::Display,
|
||||
agent_tags: Vec<Tag>,
|
||||
start_datetime: Option<i64>, // in nanoseconds since the epoch, defaults to now
|
||||
end_datetime: Option<i64>, // also in nanoseconds since the epoch, defaults to now
|
||||
execution_start_time: i64,
|
||||
continue_on: bool, // If true, run in "continue" mode after historical data is generated
|
||||
) -> Result<Self> {
|
||||
let name = agent_name.into();
|
||||
// Will agents actually need rngs? Might just need seeds...
|
||||
let seed = format!("{}-{}", parent_seed, name);
|
||||
let rng = RandomNumberGenerator::<T>::new(&seed);
|
||||
|
||||
let measurement_generator_sets = agent_spec
|
||||
.measurements
|
||||
.iter()
|
||||
.map(|spec| {
|
||||
MeasurementGeneratorSet::new(
|
||||
&name,
|
||||
agent_id,
|
||||
spec,
|
||||
&seed,
|
||||
&agent_tags,
|
||||
execution_start_time,
|
||||
)
|
||||
})
|
||||
.collect::<crate::measurement::Result<_>>()
|
||||
.context(CouldNotCreateMeasurementGeneratorSets)?;
|
||||
|
||||
let current_datetime = start_datetime.unwrap_or_else(now_ns);
|
||||
let end_datetime = end_datetime.unwrap_or_else(now_ns);
|
||||
|
||||
// Convert to nanoseconds
|
||||
let sampling_interval = agent_spec
|
||||
.sampling_interval
|
||||
.map(|s| s as i64 * 1_000_000_000);
|
||||
|
||||
Ok(Self {
|
||||
agent_id,
|
||||
name,
|
||||
rng,
|
||||
agent_tags,
|
||||
measurement_generator_sets,
|
||||
sampling_interval,
|
||||
current_datetime,
|
||||
end_datetime,
|
||||
continue_on,
|
||||
finished: false,
|
||||
interval: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Generate and write points in batches until `generate` doesn't return any
|
||||
/// points. Meant to be called in a `tokio::task`.
|
||||
pub async fn generate_all(&mut self, mut points_writer: PointsWriter) -> Result<usize> {
|
||||
let mut total_points = 0;
|
||||
|
||||
let mut points = self.generate().await?;
|
||||
while !points.is_empty() {
|
||||
info!("[agent {}] sending {} points", self.name, points.len());
|
||||
total_points += points.len();
|
||||
points_writer
|
||||
.write_points(points)
|
||||
.await
|
||||
.context(CouldNotWritePoints)?;
|
||||
points = self.generate().await?;
|
||||
}
|
||||
Ok(total_points)
|
||||
}
|
||||
|
||||
/// Generate data points from the configuration in this agent, one point per
|
||||
/// measurement contained in this agent's configuration.
|
||||
pub async fn generate(&mut self) -> Result<Vec<DataPoint>> {
|
||||
let mut points = Vec::new();
|
||||
|
||||
debug!(
|
||||
"[agent {}] generate more? {} current: {}, end: {}",
|
||||
self.name, self.finished, self.current_datetime, self.end_datetime
|
||||
);
|
||||
|
||||
if !self.finished {
|
||||
// Save the current_datetime to use in the set of points that we're generating
|
||||
// because we might increment current_datetime to see if we're done
|
||||
// or not.
|
||||
let point_timestamp = self.current_datetime;
|
||||
|
||||
if let Some(i) = &mut self.interval {
|
||||
i.tick().await;
|
||||
self.current_datetime = now_ns();
|
||||
} else if let Some(ns) = self.sampling_interval {
|
||||
self.current_datetime += ns;
|
||||
|
||||
if self.current_datetime > self.end_datetime {
|
||||
if self.continue_on {
|
||||
let mut i = tokio::time::interval(Duration::from_nanos(ns as u64));
|
||||
i.tick().await; // first tick completes immediately
|
||||
self.current_datetime = now_ns();
|
||||
self.interval = Some(i);
|
||||
} else {
|
||||
self.finished = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.finished = true;
|
||||
}
|
||||
|
||||
for mgs in &mut self.measurement_generator_sets {
|
||||
for point in mgs
|
||||
.generate(point_timestamp)
|
||||
.context(CouldNotGeneratePoint)?
|
||||
{
|
||||
points.push(point);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(points)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::{now_ns, specification::*, ZeroRng};
|
||||
use influxdb2_client::models::WriteDataPoint;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T = (), E = Error> = std::result::Result<T, E>;
|
||||
|
||||
impl<T: DataGenRng> Agent<T> {
|
||||
/// Instantiate an agent only with the parameters we're interested in
|
||||
/// testing, keeping everything else constant across different
|
||||
/// tests.
|
||||
fn test_instance(
|
||||
sampling_interval: Option<i64>,
|
||||
continue_on: bool,
|
||||
current_datetime: i64,
|
||||
end_datetime: i64,
|
||||
) -> Self {
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "measurement-{{agent_id}}-{{measurement_id}}".into(),
|
||||
count: Some(2),
|
||||
tags: vec![],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field-{{agent_id}}-{{measurement_id}}-{{field_id}}".into(),
|
||||
field_value_spec: FieldValueSpec::I64 {
|
||||
range: 0..60,
|
||||
increment: false,
|
||||
reset_after: None,
|
||||
},
|
||||
count: Some(2),
|
||||
}],
|
||||
};
|
||||
|
||||
let measurement_generator_set =
|
||||
MeasurementGeneratorSet::new("test", 42, &measurement_spec, "spec-test", &[], 0)
|
||||
.unwrap();
|
||||
|
||||
Self {
|
||||
agent_id: 0,
|
||||
name: String::from("test"),
|
||||
rng: RandomNumberGenerator::<T>::new("spec-test"),
|
||||
agent_tags: vec![],
|
||||
measurement_generator_sets: vec![measurement_generator_set],
|
||||
finished: false,
|
||||
interval: None,
|
||||
|
||||
sampling_interval,
|
||||
current_datetime,
|
||||
end_datetime,
|
||||
continue_on,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn timestamps(points: &[influxdb2_client::models::DataPoint]) -> Result<Vec<i64>> {
|
||||
points
|
||||
.iter()
|
||||
.map(|point| {
|
||||
let mut v = Vec::new();
|
||||
point.write_data_point_to(&mut v)?;
|
||||
let line = String::from_utf8(v)?;
|
||||
|
||||
Ok(line.split(' ').last().unwrap().trim().parse()?)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
// # Summary: No Sampling Interval
|
||||
//
|
||||
// If there isn't a sampling interval, we don't know how often to run, so we can neither
|
||||
// generate historical data nor can we continue into the future. The only thing we'll do is
|
||||
// generate once then stop.
|
||||
//
|
||||
// | sampling_interval | continue | cmp(current_time, end_time) | expected outcome |
|
||||
// |-------------------+----------+-----------------------------+------------------|
|
||||
// | None | false | Less | gen 1x, stop |
|
||||
// | None | false | Equal | gen 1x, stop |
|
||||
// | None | false | Greater | gen 1x, stop |
|
||||
// | None | true | Less | gen 1x, stop |
|
||||
// | None | true | Equal | gen 1x, stop |
|
||||
// | None | true | Greater | gen 1x, stop |
|
||||
|
||||
mod without_sampling_interval {
|
||||
use super::*;
|
||||
|
||||
mod without_continue {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_less_than_end_time() -> Result<()> {
|
||||
let mut agent = Agent::<ZeroRng>::test_instance(None, false, 0, 10);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_equal_end_time() -> Result<()> {
|
||||
let mut agent = Agent::<ZeroRng>::test_instance(None, false, 10, 10);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_greater_than_end_time() -> Result<()> {
|
||||
let mut agent = Agent::<ZeroRng>::test_instance(None, false, 11, 10);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
mod with_continue {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_less_than_end_time() -> Result<()> {
|
||||
let mut agent = Agent::<ZeroRng>::test_instance(None, true, 0, 10);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_equal_end_time() -> Result<()> {
|
||||
let mut agent = Agent::<ZeroRng>::test_instance(None, true, 10, 10);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_greater_than_end_time() -> Result<()> {
|
||||
let mut agent = Agent::<ZeroRng>::test_instance(None, true, 11, 10);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod with_sampling_interval {
|
||||
use super::*;
|
||||
|
||||
// The tests take about 5 ms to run on my computer, so set the sampling interval
|
||||
// to 10 ms to be able to test that the delay is happening when
|
||||
// `continue` is true without making the tests too artificially slow.
|
||||
const TEST_SAMPLING_INTERVAL: i64 = 10_000_000;
|
||||
|
||||
#[rustfmt::skip]
|
||||
// # Summary: Not continuing
|
||||
//
|
||||
// If there is a sampling interval but we're not continuing, we should generate points at
|
||||
// least once but if the current time is greater than the ending time (which might be set
|
||||
// to `now`), we've generated everything we need to and should stop.
|
||||
//
|
||||
// | sampling_interval | continue | cmp(current_time, end_time) | expected outcome |
|
||||
// |-------------------+----------+-----------------------------+------------------|
|
||||
// | Some(_) | false | Less | gen & increment |
|
||||
// | Some(_) | false | Equal | gen 1x, stop |
|
||||
// | Some(_) | false | Greater | gen 1x, stop |
|
||||
|
||||
mod without_continue {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_less_than_end_time() -> Result<()> {
|
||||
let current = 0;
|
||||
let end = TEST_SAMPLING_INTERVAL;
|
||||
|
||||
let mut agent =
|
||||
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), false, current, end);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_equal_end_time() -> Result<()> {
|
||||
let current = TEST_SAMPLING_INTERVAL;
|
||||
let end = current;
|
||||
|
||||
let mut agent =
|
||||
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), false, current, end);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_greater_than_end_time() -> Result<()> {
|
||||
let current = 2 * TEST_SAMPLING_INTERVAL;
|
||||
let end = TEST_SAMPLING_INTERVAL;
|
||||
|
||||
let mut agent =
|
||||
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), false, current, end);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert!(points.is_empty(), "expected no points, got {:?}", points);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
// # Summary: After generating historical data, continue sampling in "real time"
|
||||
//
|
||||
// If there is a sampling interval and we are continuing, generate points as fast as
|
||||
// possible (but with timestamps separated by sampling_interval amounts) until we catch up
|
||||
// to `now`. Then add pauses of the sampling_interval's duration, generating points with
|
||||
// their timestamps set to the current time to simulate "real" point generation.
|
||||
//
|
||||
// | sampling_interval | continue | cmp(current_time, end_time) | expected outcome |
|
||||
// |-------------------+----------+-----------------------------+------------------|
|
||||
// | Some(_) | true | Less | gen, no delay |
|
||||
// | Some(_) | true | Equal | gen, delay |
|
||||
// | Some(_) | true | Greater | gen, delay |
|
||||
|
||||
mod with_continue {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_less_than_end_time() -> Result<()> {
|
||||
let end = now_ns();
|
||||
let current = end - TEST_SAMPLING_INTERVAL;
|
||||
|
||||
let mut agent =
|
||||
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), true, current, end);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let times = timestamps(&points).unwrap();
|
||||
assert_eq!(vec![current, current], times);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let times = timestamps(&points).unwrap();
|
||||
assert_eq!(vec![end, end], times);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_equal_end_time() -> Result<()> {
|
||||
let end = now_ns();
|
||||
let current = end;
|
||||
|
||||
let mut agent =
|
||||
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), true, current, end);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let times = timestamps(&points).unwrap();
|
||||
assert_eq!(vec![end, end], times);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let real_now = now_ns();
|
||||
|
||||
let times = timestamps(&points).unwrap();
|
||||
for time in times {
|
||||
assert!(
|
||||
time <= real_now,
|
||||
"expected timestamp {} to be generated before now ({}); \
|
||||
was {} nanoseconds greater",
|
||||
time,
|
||||
real_now,
|
||||
time - real_now
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn current_time_greater_than_end_time() -> Result<()> {
|
||||
let end = now_ns();
|
||||
let current = end + TEST_SAMPLING_INTERVAL;
|
||||
|
||||
let mut agent =
|
||||
Agent::<ZeroRng>::test_instance(Some(TEST_SAMPLING_INTERVAL), true, current, end);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let times = timestamps(&points).unwrap();
|
||||
assert_eq!(vec![current, current], times);
|
||||
|
||||
let points = agent.generate().await?;
|
||||
assert_eq!(points.len(), 2);
|
||||
|
||||
let real_now = now_ns();
|
||||
|
||||
let times = timestamps(&points).unwrap();
|
||||
for time in times {
|
||||
assert!(
|
||||
time <= real_now,
|
||||
"expected timestamp {} to be generated before now ({}); \
|
||||
was {} nanoseconds greater",
|
||||
time,
|
||||
real_now,
|
||||
time - real_now
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
#![deny(rust_2018_idioms)]
|
||||
#![warn(
|
||||
missing_copy_implementations,
|
||||
missing_debug_implementations,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::use_self
|
||||
)]
|
||||
|
||||
use clap::{App, Arg};
|
||||
use generated_types::influxdata::iox::management::v1::{
|
||||
self as management, database_rules::*, lifecycle_rules::*, *,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let help = r#"IOx database creator
|
||||
|
||||
Examples:
|
||||
# Create a database named `foo_bar` with the IOx server listening at the default gRPC address:
|
||||
create_database foo_bar
|
||||
|
||||
# Create a database named `myorg_mybucket` with the IOx server listening at
|
||||
# 127.0.0.1:9000:
|
||||
create_database --grpc-bind 127.0.0.1:9000 myorg_mybucket
|
||||
"#;
|
||||
|
||||
let matches = App::new(help)
|
||||
.about("IOx Database creation script")
|
||||
.arg(
|
||||
Arg::with_name("DATABASE_NAME")
|
||||
.help("Name of the database to create")
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("WRITER")
|
||||
.long("writer")
|
||||
.help("The gRPC host and port of the IOx server that should write to Kafka")
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("READER")
|
||||
.long("reader")
|
||||
.help("The gRPC host and port of the IOx server that should read from Kafka")
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("KAFKA")
|
||||
.long("kafka")
|
||||
.help("The connection address of the Kafka instance")
|
||||
.takes_value(true)
|
||||
.default_value("127.0.0.1:9093"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let db_name = matches
|
||||
.value_of("DATABASE_NAME")
|
||||
.expect("DATABASE_NAME is required")
|
||||
.to_string();
|
||||
let writer = matches.value_of("WRITER").expect("WRITER is required");
|
||||
let reader = matches.value_of("READER").expect("READER is required");
|
||||
let kafka = matches
|
||||
.value_of("KAFKA")
|
||||
.expect("KAFKA has a default value");
|
||||
|
||||
// Edit these to whatever DatabaseRules you want to use
|
||||
let writer_database_rules = DatabaseRules {
|
||||
name: db_name.clone(),
|
||||
partition_template: Some(PartitionTemplate {
|
||||
parts: vec![partition_template::Part {
|
||||
part: Some(partition_template::part::Part::Time(
|
||||
"%Y-%m-%d %H:00:00".into(),
|
||||
)),
|
||||
}],
|
||||
}),
|
||||
lifecycle_rules: Some(LifecycleRules {
|
||||
immutable: true,
|
||||
..Default::default()
|
||||
}),
|
||||
worker_cleanup_avg_sleep: None,
|
||||
routing_rules: Some(RoutingRules::RoutingConfig(RoutingConfig {
|
||||
sink: Some(management::Sink {
|
||||
sink: Some(management::sink::Sink::Kafka(KafkaProducer {})),
|
||||
}),
|
||||
})),
|
||||
write_buffer_connection: Some(WriteBufferConnection::Writing(kafka.to_string())),
|
||||
};
|
||||
let reader_database_rules = DatabaseRules {
|
||||
name: db_name.clone(),
|
||||
partition_template: Some(PartitionTemplate {
|
||||
parts: vec![partition_template::Part {
|
||||
part: Some(partition_template::part::Part::Time(
|
||||
"%Y-%m-%d %H:00:00".into(),
|
||||
)),
|
||||
}],
|
||||
}),
|
||||
lifecycle_rules: Some(LifecycleRules {
|
||||
buffer_size_soft: 1024 * 1024 * 1024,
|
||||
buffer_size_hard: 1024 * 1024 * 1024 * 2,
|
||||
worker_backoff_millis: 100,
|
||||
max_active_compactions_cfg: Some(MaxActiveCompactionsCfg::MaxActiveCompactions(1)),
|
||||
persist: true,
|
||||
persist_row_threshold: 10 * 1000 * 1000,
|
||||
..Default::default()
|
||||
}),
|
||||
worker_cleanup_avg_sleep: None,
|
||||
routing_rules: Some(RoutingRules::RoutingConfig(RoutingConfig {
|
||||
sink: Some(management::Sink {
|
||||
sink: Some(management::sink::Sink::Kafka(KafkaProducer {})),
|
||||
}),
|
||||
})),
|
||||
write_buffer_connection: Some(WriteBufferConnection::Reading(kafka.to_string())),
|
||||
};
|
||||
|
||||
// Create the writer db
|
||||
let writer_grpc_bind_addr = format!("http://{}", writer);
|
||||
let writer_grpc_channel = influxdb_iox_client::connection::Builder::default()
|
||||
.build(writer_grpc_bind_addr)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut writer_management_client =
|
||||
influxdb_iox_client::management::Client::new(writer_grpc_channel.clone());
|
||||
writer_management_client
|
||||
.create_database(writer_database_rules)
|
||||
.await
|
||||
.expect("create writer database failed");
|
||||
|
||||
// Write a few points
|
||||
let mut write_client = influxdb_iox_client::write::Client::new(writer_grpc_channel);
|
||||
let lp_lines = [
|
||||
"write_test,region=west user=23.2 100",
|
||||
"write_test,region=west user=21.0 150",
|
||||
"write_test,region=east bytes=99i 200",
|
||||
];
|
||||
let num_lines_written = write_client
|
||||
.write(&db_name, lp_lines.join("\n"))
|
||||
.await
|
||||
.expect("cannot write");
|
||||
assert_eq!(num_lines_written, 3);
|
||||
|
||||
// Create the reader db
|
||||
let reader_grpc_bind_addr = format!("http://{}", reader);
|
||||
let reader_grpc_channel = influxdb_iox_client::connection::Builder::default()
|
||||
.build(reader_grpc_bind_addr)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut reader_management_client =
|
||||
influxdb_iox_client::management::Client::new(reader_grpc_channel.clone());
|
||||
reader_management_client
|
||||
.create_database(reader_database_rules)
|
||||
.await
|
||||
.expect("create reader database failed");
|
||||
|
||||
println!("Created database {}", db_name);
|
||||
}
|
|
@ -0,0 +1,777 @@
|
|||
//! Generating a set of field keys and values given a specification
|
||||
|
||||
use crate::{
|
||||
now_ns, specification,
|
||||
substitution::{pick_from_replacements, Substitute},
|
||||
DataGenRng, RandomNumberGenerator,
|
||||
};
|
||||
|
||||
use influxdb2_client::models::FieldValue;
|
||||
use rand::Rng;
|
||||
use serde::Serialize;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{collections::BTreeMap, fmt, ops::Range, time::Duration};
|
||||
|
||||
/// Field-specific Results
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Errors that may happen while creating fields
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when substituting placeholder values
|
||||
#[snafu(display("Could not create field name, caused by:\n{}", source))]
|
||||
CouldNotCreateFieldName {
|
||||
/// Underlying `substitution` module error that caused this problem
|
||||
source: crate::substitution::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when substituting placeholder values
|
||||
#[snafu(display("Could not compile field name template, caused by:\n{}", source))]
|
||||
CouldNotCompileStringTemplate {
|
||||
/// Underlying `substitution` module error that caused this problem
|
||||
source: crate::substitution::Error,
|
||||
},
|
||||
}
|
||||
|
||||
/// A generated field value that will be used in a generated data point.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Field {
|
||||
/// The key for the field
|
||||
pub key: String,
|
||||
/// The value for the field
|
||||
pub value: FieldValue,
|
||||
}
|
||||
|
||||
impl Field {
|
||||
/// Create a new field with the given key and value.
|
||||
pub fn new(key: impl Into<String>, value: impl Into<FieldValue>) -> Self {
|
||||
Self {
|
||||
key: key.into(),
|
||||
value: value.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A set of `count` fields that have the same configuration but different
|
||||
/// `field_id`s.
|
||||
pub struct FieldGeneratorSet {
|
||||
field_generators: Vec<Box<dyn FieldGenerator + Send>>,
|
||||
}
|
||||
|
||||
// field_generators doesn't implement Debug
|
||||
impl fmt::Debug for FieldGeneratorSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("FieldGeneratorSet")
|
||||
.field("field_generators", &"(dynamic)")
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldGeneratorSet {
|
||||
/// Create a new set of field generators for a particular agent,
|
||||
/// measurement, and field specification.
|
||||
pub fn new<T: DataGenRng>(
|
||||
agent_name: &str,
|
||||
agent_id: usize,
|
||||
measurement_id: usize,
|
||||
spec: &specification::FieldSpec,
|
||||
parent_seed: &str,
|
||||
execution_start_time: i64,
|
||||
) -> Result<Self> {
|
||||
let count = spec.count.unwrap_or(1);
|
||||
|
||||
let field_generators = (0..count)
|
||||
.map(|field_id| {
|
||||
field_spec_to_generator::<T>(
|
||||
agent_name,
|
||||
agent_id,
|
||||
measurement_id,
|
||||
field_id,
|
||||
spec,
|
||||
parent_seed,
|
||||
execution_start_time,
|
||||
)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(Self { field_generators })
|
||||
}
|
||||
|
||||
/// Create one set of fields
|
||||
pub fn generate(&mut self, timestamp: i64) -> Vec<Field> {
|
||||
self.field_generators
|
||||
.iter_mut()
|
||||
.map(|fg| fg.generate(timestamp))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
trait FieldGenerator {
|
||||
fn generate(&mut self, timestamp: i64) -> Field;
|
||||
}
|
||||
|
||||
/// Generate boolean field names and values.
|
||||
#[derive(Debug)]
|
||||
pub struct BooleanFieldGenerator<T: DataGenRng> {
|
||||
name: String,
|
||||
rng: RandomNumberGenerator<T>,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> BooleanFieldGenerator<T> {
|
||||
/// Create a new boolean field generator that will always use the specified
|
||||
/// name.
|
||||
pub fn new(name: &str, parent_seed: &str) -> Self {
|
||||
let name = name.into();
|
||||
let seed = format!("{}-{}", parent_seed, name);
|
||||
let rng = RandomNumberGenerator::<T>::new(seed);
|
||||
|
||||
Self { name, rng }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> FieldGenerator for BooleanFieldGenerator<T> {
|
||||
fn generate(&mut self, _timestamp: i64) -> Field {
|
||||
let b: bool = self.rng.gen();
|
||||
Field::new(&self.name, b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate integer field names and values.
|
||||
#[derive(Debug)]
|
||||
pub struct I64FieldGenerator<T: DataGenRng> {
|
||||
name: String,
|
||||
range: Range<i64>,
|
||||
increment: bool,
|
||||
rng: RandomNumberGenerator<T>,
|
||||
previous_value: i64,
|
||||
reset_after: Option<usize>,
|
||||
current_tick: usize,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> I64FieldGenerator<T> {
|
||||
/// Create a new integer field generator that will always use the specified
|
||||
/// name.
|
||||
pub fn new(
|
||||
name: impl Into<String>,
|
||||
range: &Range<i64>,
|
||||
increment: bool,
|
||||
reset_after: Option<usize>,
|
||||
parent_seed: impl fmt::Display,
|
||||
) -> Self {
|
||||
let name = name.into();
|
||||
let seed = format!("{}-{}", parent_seed, name);
|
||||
let rng = RandomNumberGenerator::<T>::new(seed);
|
||||
|
||||
Self {
|
||||
name,
|
||||
range: range.to_owned(),
|
||||
increment,
|
||||
rng,
|
||||
previous_value: 0,
|
||||
reset_after,
|
||||
current_tick: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> FieldGenerator for I64FieldGenerator<T> {
|
||||
fn generate(&mut self, _timestamp: i64) -> Field {
|
||||
let mut value = if self.range.start == self.range.end {
|
||||
self.range.start
|
||||
} else {
|
||||
self.rng.gen_range(self.range.clone())
|
||||
};
|
||||
|
||||
if self.increment {
|
||||
self.previous_value = self.previous_value.wrapping_add(value);
|
||||
value = self.previous_value;
|
||||
|
||||
if let Some(reset) = self.reset_after {
|
||||
self.current_tick += 1;
|
||||
if self.current_tick >= reset {
|
||||
self.previous_value = 0;
|
||||
self.current_tick = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Field::new(&self.name, value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate floating point field names and values.
|
||||
#[derive(Debug)]
|
||||
pub struct F64FieldGenerator<T: DataGenRng> {
|
||||
name: String,
|
||||
range: Range<f64>,
|
||||
rng: RandomNumberGenerator<T>,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> F64FieldGenerator<T> {
|
||||
/// Create a new floating point field generator that will always use the
|
||||
/// specified name.
|
||||
pub fn new(
|
||||
name: impl Into<String>,
|
||||
range: &Range<f64>,
|
||||
parent_seed: impl fmt::Display,
|
||||
) -> Self {
|
||||
let name = name.into();
|
||||
let seed = format!("{}-{}", parent_seed, name);
|
||||
let rng = RandomNumberGenerator::<T>::new(seed);
|
||||
|
||||
Self {
|
||||
name,
|
||||
range: range.to_owned(),
|
||||
rng,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> FieldGenerator for F64FieldGenerator<T> {
|
||||
fn generate(&mut self, _timestamp: i64) -> Field {
|
||||
let value = if (self.range.start - self.range.end).abs() < f64::EPSILON {
|
||||
self.range.start
|
||||
} else {
|
||||
self.rng.gen_range(self.range.clone())
|
||||
};
|
||||
|
||||
Field::new(&self.name, value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate string field names and values.
|
||||
#[derive(Debug)]
|
||||
pub struct StringFieldGenerator<T: DataGenRng> {
|
||||
agent_name: String,
|
||||
name: String,
|
||||
substitute: Substitute,
|
||||
rng: RandomNumberGenerator<T>,
|
||||
replacements: Vec<specification::Replacement>,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> StringFieldGenerator<T> {
|
||||
/// Create a new string field generator
|
||||
pub fn new(
|
||||
agent_name: impl Into<String>,
|
||||
name: impl Into<String>,
|
||||
pattern: impl Into<String>,
|
||||
parent_seed: impl fmt::Display,
|
||||
replacements: Vec<specification::Replacement>,
|
||||
) -> Result<Self> {
|
||||
let name = name.into();
|
||||
let seed = format!("{}-{}", parent_seed, name);
|
||||
let rng = RandomNumberGenerator::<T>::new(seed);
|
||||
let substitute = Substitute::new(pattern, RandomNumberGenerator::<T>::new(&rng.seed))
|
||||
.context(CouldNotCompileStringTemplate {})?;
|
||||
|
||||
Ok(Self {
|
||||
agent_name: agent_name.into(),
|
||||
name,
|
||||
substitute,
|
||||
rng,
|
||||
replacements,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> FieldGenerator for StringFieldGenerator<T> {
|
||||
fn generate(&mut self, timestamp: i64) -> Field {
|
||||
#[derive(Serialize)]
|
||||
struct Values<'a> {
|
||||
#[serde(flatten)]
|
||||
replacements: BTreeMap<&'a str, &'a str>,
|
||||
agent_name: &'a str,
|
||||
timestamp: i64,
|
||||
}
|
||||
|
||||
let values = Values {
|
||||
replacements: pick_from_replacements(&mut self.rng, &self.replacements),
|
||||
agent_name: &self.agent_name,
|
||||
timestamp,
|
||||
};
|
||||
|
||||
let value = self
|
||||
.substitute
|
||||
.evaluate(&values)
|
||||
.expect("Unable to substitute string field value");
|
||||
|
||||
Field::new(&self.name, value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate an i64 field that has the name `uptime` and the value of the number
|
||||
/// of seconds since the data generator started running
|
||||
#[derive(Debug)]
|
||||
pub struct UptimeFieldGenerator {
|
||||
name: String,
|
||||
execution_start_time: i64,
|
||||
kind: specification::UptimeKind,
|
||||
}
|
||||
|
||||
impl UptimeFieldGenerator {
|
||||
fn new(
|
||||
name: impl Into<String>,
|
||||
kind: &specification::UptimeKind,
|
||||
execution_start_time: i64,
|
||||
) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
kind: *kind,
|
||||
execution_start_time,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldGenerator for UptimeFieldGenerator {
|
||||
fn generate(&mut self, _timestamp: i64) -> Field {
|
||||
use specification::UptimeKind::*;
|
||||
|
||||
let elapsed = Duration::from_nanos((now_ns() - self.execution_start_time) as u64);
|
||||
let elapsed_seconds = elapsed.as_secs();
|
||||
|
||||
match self.kind {
|
||||
I64 => Field::new(&self.name, elapsed_seconds as i64),
|
||||
Telegraf => {
|
||||
let days = elapsed_seconds / (60 * 60 * 24);
|
||||
let days_plural = if days == 1 { "" } else { "s" };
|
||||
|
||||
let mut minutes = elapsed_seconds / 60;
|
||||
let mut hours = minutes / 60;
|
||||
hours %= 24;
|
||||
minutes %= 60;
|
||||
|
||||
let duration_string =
|
||||
format!("{} day{}, {:02}:{:02}", days, days_plural, hours, minutes);
|
||||
Field::new(&self.name, duration_string)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn field_spec_to_generator<T: DataGenRng>(
|
||||
agent_name: &str,
|
||||
agent_id: usize,
|
||||
measurement_id: usize,
|
||||
field_id: usize,
|
||||
spec: &specification::FieldSpec,
|
||||
parent_seed: &str,
|
||||
execution_start_time: i64,
|
||||
) -> Result<Box<dyn FieldGenerator + Send>> {
|
||||
use specification::FieldValueSpec::*;
|
||||
|
||||
let spec_name = Substitute::once(
|
||||
&spec.name,
|
||||
&[
|
||||
("agent_id", &agent_id.to_string()),
|
||||
("measurement_id", &measurement_id.to_string()),
|
||||
("field_id", &field_id.to_string()),
|
||||
],
|
||||
)
|
||||
.context(CouldNotCreateFieldName)?;
|
||||
|
||||
Ok(match &spec.field_value_spec {
|
||||
Bool(true) => Box::new(BooleanFieldGenerator::<T>::new(&spec_name, parent_seed)),
|
||||
Bool(false) => unimplemented!("Not sure what false means for bool fields yet"),
|
||||
I64 {
|
||||
range,
|
||||
increment,
|
||||
reset_after,
|
||||
} => Box::new(I64FieldGenerator::<T>::new(
|
||||
&spec_name,
|
||||
range,
|
||||
*increment,
|
||||
*reset_after,
|
||||
parent_seed,
|
||||
)),
|
||||
F64 { range } => Box::new(F64FieldGenerator::<T>::new(&spec_name, range, parent_seed)),
|
||||
String {
|
||||
pattern,
|
||||
replacements,
|
||||
} => Box::new(StringFieldGenerator::<T>::new(
|
||||
agent_name,
|
||||
&spec_name,
|
||||
pattern,
|
||||
parent_seed,
|
||||
replacements.to_vec(),
|
||||
)?),
|
||||
Uptime { kind } => Box::new(UptimeFieldGenerator::new(
|
||||
&spec_name,
|
||||
kind,
|
||||
execution_start_time,
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::{DynamicRng, ZeroRng, TEST_SEED};
|
||||
use test_helpers::approximately_equal;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T = (), E = Error> = std::result::Result<T, E>;
|
||||
|
||||
// Shortcut functions that panic for getting values out of fields for test convenience
|
||||
impl Field {
|
||||
fn i64(&self) -> i64 {
|
||||
match self.value {
|
||||
FieldValue::I64(v) => v,
|
||||
ref other => panic!("expected i64, got {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
fn f64(&self) -> f64 {
|
||||
match self.value {
|
||||
FieldValue::F64(v) => v,
|
||||
ref other => panic!("expected f64, got {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
fn bool(&self) -> bool {
|
||||
match self.value {
|
||||
FieldValue::Bool(v) => v,
|
||||
ref other => panic!("expected bool, got {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
fn string(&self) -> String {
|
||||
match &self.value {
|
||||
FieldValue::String(v) => v.clone(),
|
||||
ref other => panic!("expected String, got {:?}", other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_boolean_field() {
|
||||
let mut bfg = BooleanFieldGenerator::<ZeroRng>::new("bfg", TEST_SEED);
|
||||
|
||||
assert!(!bfg.generate(1234).bool());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_i64_field_always_the_same() {
|
||||
// If the specification has the same number for the start and end of the
|
||||
// range...
|
||||
let mut i64fg =
|
||||
I64FieldGenerator::<DynamicRng>::new("i64fg", &(3..3), false, None, TEST_SEED);
|
||||
|
||||
let i64_fields: Vec<_> = (0..10).map(|_| i64fg.generate(1234).i64()).collect();
|
||||
let expected = i64_fields[0];
|
||||
|
||||
// All the values generated will always be the same.
|
||||
assert!(
|
||||
i64_fields.iter().all(|f| *f == expected),
|
||||
"{:?}",
|
||||
i64_fields
|
||||
);
|
||||
|
||||
// If the specification has n for the start and n+1 for the end of the range...
|
||||
let mut i64fg =
|
||||
I64FieldGenerator::<DynamicRng>::new("i64fg", &(4..5), false, None, TEST_SEED);
|
||||
|
||||
let i64_fields: Vec<_> = (0..10).map(|_| i64fg.generate(1234).i64()).collect();
|
||||
// We know what the value will be even though we're using a real random number generator
|
||||
let expected = 4;
|
||||
|
||||
// All the values generated will also always be the same, because the end of the
|
||||
// range is exclusive.
|
||||
assert!(
|
||||
i64_fields.iter().all(|f| *f == expected),
|
||||
"{:?}",
|
||||
i64_fields
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_i64_field_within_a_range() {
|
||||
let range = 3..1000;
|
||||
|
||||
let mut i64fg =
|
||||
I64FieldGenerator::<DynamicRng>::new("i64fg", &range, false, None, TEST_SEED);
|
||||
|
||||
let val = i64fg.generate(1234).i64();
|
||||
|
||||
assert!(range.contains(&val), "`{}` was not in the range", val);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_incrementing_i64_field() {
|
||||
let mut i64fg =
|
||||
I64FieldGenerator::<DynamicRng>::new("i64fg", &(3..10), true, None, TEST_SEED);
|
||||
|
||||
let val1 = i64fg.generate(1234).i64();
|
||||
let val2 = i64fg.generate(1234).i64();
|
||||
let val3 = i64fg.generate(1234).i64();
|
||||
let val4 = i64fg.generate(1234).i64();
|
||||
|
||||
assert!(val1 < val2, "`{}` < `{}` was false", val1, val2);
|
||||
assert!(val2 < val3, "`{}` < `{}` was false", val2, val3);
|
||||
assert!(val3 < val4, "`{}` < `{}` was false", val3, val4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn incrementing_i64_wraps() {
|
||||
let rng = RandomNumberGenerator::<DynamicRng>::new(TEST_SEED);
|
||||
let range = 3..10;
|
||||
let previous_value = i64::MAX;
|
||||
|
||||
// Construct by hand to set the previous value at the end of i64's range
|
||||
let mut i64fg = I64FieldGenerator {
|
||||
name: "i64fg".into(),
|
||||
range: range.clone(),
|
||||
increment: true,
|
||||
reset_after: None,
|
||||
rng,
|
||||
previous_value,
|
||||
current_tick: 0,
|
||||
};
|
||||
|
||||
let resulting_range =
|
||||
range.start.wrapping_add(previous_value)..range.end.wrapping_add(previous_value);
|
||||
|
||||
let val = i64fg.generate(1234).i64();
|
||||
|
||||
assert!(
|
||||
resulting_range.contains(&val),
|
||||
"`{}` was not in the range",
|
||||
val
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn incrementing_i64_that_resets() {
|
||||
let reset_after = Some(3);
|
||||
let mut i64fg =
|
||||
I64FieldGenerator::<DynamicRng>::new("i64fg", &(3..10), true, reset_after, TEST_SEED);
|
||||
|
||||
let val1 = i64fg.generate(1234).i64();
|
||||
let val2 = i64fg.generate(1234).i64();
|
||||
let val3 = i64fg.generate(1234).i64();
|
||||
let val4 = i64fg.generate(1234).i64();
|
||||
|
||||
assert!(val1 < val2, "`{}` < `{}` was false", val1, val2);
|
||||
assert!(val2 < val3, "`{}` < `{}` was false", val2, val3);
|
||||
assert!(val4 < val3, "`{}` < `{}` was false", val4, val3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_f64_field_always_the_same() {
|
||||
// If the specification has the same number for the start and end of the
|
||||
// range...
|
||||
let start_and_end = 3.0;
|
||||
let range = start_and_end..start_and_end;
|
||||
let mut f64fg = F64FieldGenerator::<DynamicRng>::new("f64fg", &range, TEST_SEED);
|
||||
|
||||
let f64_fields: Vec<_> = (0..10).map(|_| f64fg.generate(1234).f64()).collect();
|
||||
|
||||
// All the values generated will always be the same known value.
|
||||
assert!(
|
||||
f64_fields
|
||||
.iter()
|
||||
.all(|f| approximately_equal(*f, start_and_end)),
|
||||
"{:?}",
|
||||
f64_fields
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_f64_field_within_a_range() {
|
||||
let range = 3.0..1000.0;
|
||||
let mut f64fg = F64FieldGenerator::<DynamicRng>::new("f64fg", &range, TEST_SEED);
|
||||
|
||||
let val = f64fg.generate(1234).f64();
|
||||
assert!(range.contains(&val), "`{}` was not in the range", val);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_string_field_without_replacements() {
|
||||
let fake_now = 11111;
|
||||
|
||||
let mut stringfg = StringFieldGenerator::<DynamicRng>::new(
|
||||
"agent_name",
|
||||
"stringfg",
|
||||
"my value",
|
||||
TEST_SEED,
|
||||
vec![],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!("my value", stringfg.generate(fake_now).string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_string_field_with_provided_replacements() {
|
||||
let fake_now = 5555555555;
|
||||
|
||||
let mut stringfg = StringFieldGenerator::<DynamicRng>::new(
|
||||
"double-oh-seven",
|
||||
"stringfg",
|
||||
r#"{{agent_name}}---{{random 16}}---{{format-time "%s%f"}}"#,
|
||||
TEST_SEED,
|
||||
vec![],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let string_val1 = stringfg.generate(fake_now).string();
|
||||
let string_val2 = stringfg.generate(fake_now).string();
|
||||
|
||||
assert!(
|
||||
string_val1.starts_with("double-oh-seven---"),
|
||||
"`{}` did not start with `double-oh-seven---`",
|
||||
string_val1
|
||||
);
|
||||
assert!(
|
||||
string_val1.ends_with("---5555555555"),
|
||||
"`{}` did not end with `---5555555555`",
|
||||
string_val1
|
||||
);
|
||||
assert!(
|
||||
string_val2.starts_with("double-oh-seven---"),
|
||||
"`{}` did not start with `double-oh-seven---`",
|
||||
string_val2
|
||||
);
|
||||
assert!(
|
||||
string_val2.ends_with("---5555555555"),
|
||||
"`{}` did not end with `---5555555555`",
|
||||
string_val2
|
||||
);
|
||||
|
||||
assert_ne!(string_val1, string_val2, "random value should change");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Unable to substitute string field value")]
|
||||
fn unknown_replacement_errors() {
|
||||
let fake_now = 55555;
|
||||
|
||||
let mut stringfg = StringFieldGenerator::<DynamicRng>::new(
|
||||
"arbitrary",
|
||||
"stringfg",
|
||||
"static-{{unknown}}",
|
||||
TEST_SEED,
|
||||
vec![],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
stringfg.generate(fake_now);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacements_no_weights() -> Result<()> {
|
||||
let fake_now = 55555;
|
||||
|
||||
let toml: specification::FieldSpec = toml::from_str(
|
||||
r#"
|
||||
name = "sf"
|
||||
pattern = "foo {{level}}"
|
||||
replacements = [
|
||||
{replace = "level", with = ["info", "warn", "error"]}
|
||||
]"#,
|
||||
)
|
||||
.unwrap();
|
||||
let mut stringfg =
|
||||
field_spec_to_generator::<ZeroRng>("agent_name", 0, 0, 0, &toml, TEST_SEED, fake_now)?;
|
||||
|
||||
assert_eq!("foo info", stringfg.generate(fake_now).string());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacements_with_weights() -> Result<()> {
|
||||
let fake_now = 55555;
|
||||
|
||||
let toml: specification::FieldSpec = toml::from_str(
|
||||
r#"
|
||||
name = "sf"
|
||||
pattern = "foo {{level}}"
|
||||
replacements = [
|
||||
{replace = "level", with = [["info", 1000000], ["warn", 1], ["error", 0]]}
|
||||
]"#,
|
||||
)
|
||||
.unwrap();
|
||||
let mut stringfg =
|
||||
field_spec_to_generator::<ZeroRng>("agent_name", 0, 0, 0, &toml, TEST_SEED, fake_now)?;
|
||||
|
||||
assert_eq!("foo info", stringfg.generate(fake_now).string());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uptime_i64() -> Result<()> {
|
||||
let fake_now = 55555;
|
||||
|
||||
// Pretend data generator started running 10 seconds ago
|
||||
let seconds_ago = 10;
|
||||
let fake_start_execution_time = now_ns() - seconds_ago * 1_000_000_000;
|
||||
|
||||
let toml: specification::FieldSpec = toml::from_str(
|
||||
r#"
|
||||
name = "arbitrary" # field name doesn't have to be uptime
|
||||
uptime = "i64""#,
|
||||
)
|
||||
.unwrap();
|
||||
let mut uptimefg = field_spec_to_generator::<DynamicRng>(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
&toml,
|
||||
TEST_SEED,
|
||||
fake_start_execution_time,
|
||||
)?;
|
||||
|
||||
assert_eq!(seconds_ago, uptimefg.generate(fake_now).i64());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uptime_telegraf() -> Result<()> {
|
||||
let fake_now = 55555;
|
||||
|
||||
// Pretend data generator started running 10 days, 2 hours, and 33 minutes ago
|
||||
let seconds_ago = 10 * 24 * 60 * 60 + 2 * 60 * 60 + 33 * 60;
|
||||
let fake_start_execution_time = now_ns() - seconds_ago * 1_000_000_000;
|
||||
|
||||
let toml: specification::FieldSpec = toml::from_str(
|
||||
r#"
|
||||
name = "arbitrary" # field name doesn't have to be uptime
|
||||
uptime = "telegraf""#,
|
||||
)
|
||||
.unwrap();
|
||||
let mut uptimefg = field_spec_to_generator::<DynamicRng>(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
&toml,
|
||||
TEST_SEED,
|
||||
fake_start_execution_time,
|
||||
)?;
|
||||
|
||||
assert_eq!("10 days, 02:33", uptimefg.generate(fake_now).string());
|
||||
|
||||
// Pretend data generator started running 1 day, 14 hours, and 5 minutes ago
|
||||
// to exercise different formatting
|
||||
let seconds_in_1_day = 24 * 60 * 60;
|
||||
let seconds_in_14_hours = 14 * 60 * 60;
|
||||
let seconds_in_5_minutes = 5 * 60;
|
||||
|
||||
let seconds_ago = seconds_in_1_day + seconds_in_14_hours + seconds_in_5_minutes;
|
||||
let fake_start_execution_time = now_ns() - seconds_ago * 1_000_000_000;
|
||||
|
||||
let mut uptimefg = field_spec_to_generator::<DynamicRng>(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
&toml,
|
||||
TEST_SEED,
|
||||
fake_start_execution_time,
|
||||
)?;
|
||||
|
||||
assert_eq!("1 day, 14:05", uptimefg.generate(fake_now).string());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,357 @@
|
|||
//! This crate contains structures and generators for specifying how to generate
|
||||
//! historical and real-time test data for Delorean. The rules for how to
|
||||
//! generate data and what shape it should take can be specified in a TOML file.
|
||||
//!
|
||||
//! Generators can output in line protocol, Parquet, or can be used to generate
|
||||
//! real-time load on a server that implements the [InfluxDB 2.0 write
|
||||
//! path][write-api].
|
||||
//!
|
||||
//! [write-api]: https://v2.docs.influxdata.com/v2.0/api/#tag/Write
|
||||
//!
|
||||
//! While this generator could be compared to [the Go based one that creates TSM
|
||||
//! data][go-gen], its purpose is meant to be more far reaching. In addition to
|
||||
//! generating historical data, it should be useful for generating data in a
|
||||
//! sequence as you would expect it to arrive in a production environment. That
|
||||
//! means many agents sending data with their different tags and timestamps.
|
||||
//!
|
||||
//! [go-gen]: https://github.com/influxdata/influxdb/pull/12710
|
||||
|
||||
#![deny(rust_2018_idioms)]
|
||||
#![warn(
|
||||
missing_copy_implementations,
|
||||
missing_debug_implementations,
|
||||
missing_docs,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::use_self
|
||||
)]
|
||||
|
||||
use crate::substitution::Substitute;
|
||||
use rand::Rng;
|
||||
use rand_seeder::Seeder;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{
|
||||
convert::TryFrom,
|
||||
time::{SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
|
||||
pub mod agent;
|
||||
pub mod field;
|
||||
pub mod measurement;
|
||||
pub mod specification;
|
||||
pub mod substitution;
|
||||
pub mod tag;
|
||||
pub mod write;
|
||||
|
||||
/// Errors that may happen while generating points.
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when waiting on a tokio task
|
||||
#[snafu(display("Could not join tokio task: {}", source))]
|
||||
TokioError {
|
||||
/// Underlying tokio error that caused this problem
|
||||
source: tokio::task::JoinError,
|
||||
},
|
||||
|
||||
/// Error that may happen when constructing an agent name
|
||||
#[snafu(display("Could not create agent name, caused by:\n{}", source))]
|
||||
CouldNotCreateAgentName {
|
||||
/// Underlying `substitution` module error that caused this problem
|
||||
source: substitution::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when an agent generates points
|
||||
#[snafu(display("Agent could not generate points, caused by:\n{}", source))]
|
||||
AgentCouldNotGeneratePoints {
|
||||
/// Underlying `agent` module error that caused this problem
|
||||
source: agent::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when creating agents
|
||||
#[snafu(display("Could not create agent `{}`, caused by:\n{}", name, source))]
|
||||
CouldNotCreateAgent {
|
||||
/// The name of the relevant agent
|
||||
name: String,
|
||||
/// Underlying `agent` module error that caused this problem
|
||||
source: agent::Error,
|
||||
},
|
||||
}
|
||||
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Generate data from the configuration in the spec.
|
||||
///
|
||||
/// Provide a writer that the line protocol should be written to.
|
||||
///
|
||||
/// If `start_datetime` or `end_datetime` are `None`, the current datetime will
|
||||
/// be used.
|
||||
pub async fn generate<T: DataGenRng>(
|
||||
spec: &specification::DataSpec,
|
||||
points_writer_builder: &mut write::PointsWriterBuilder,
|
||||
start_datetime: Option<i64>,
|
||||
end_datetime: Option<i64>,
|
||||
execution_start_time: i64,
|
||||
continue_on: bool,
|
||||
) -> Result<usize> {
|
||||
let seed = spec.base_seed.to_owned().unwrap_or_else(|| {
|
||||
let mut rng = rand::thread_rng();
|
||||
format!("{:04}", rng.gen_range(0..10000))
|
||||
});
|
||||
|
||||
let mut handles = vec![];
|
||||
|
||||
// for each agent specification
|
||||
for agent_spec in &spec.agents {
|
||||
// create iterators to `cycle` through for `agent_spec.tags`
|
||||
let tag_set_iterator = tag::AgentTagIterator::new(&agent_spec.tags);
|
||||
|
||||
// create `count` number of agent instances, or 1 agent if no count is specified
|
||||
let n_agents = agent_spec.count.unwrap_or(1);
|
||||
|
||||
for (agent_id, mut agent_tags) in tag_set_iterator.take(n_agents).enumerate() {
|
||||
let agent_name =
|
||||
Substitute::once(&agent_spec.name, &[("agent_id", &agent_id.to_string())])
|
||||
.context(CouldNotCreateAgentName)?;
|
||||
|
||||
agent_tags.push(tag::Tag::new("data_spec", &spec.name));
|
||||
|
||||
if let Some(name_tag_key) = &agent_spec.name_tag_key {
|
||||
agent_tags.push(tag::Tag::new(name_tag_key, &agent_name));
|
||||
}
|
||||
|
||||
let mut agent = agent::Agent::<T>::new(
|
||||
agent_spec,
|
||||
&agent_name,
|
||||
agent_id,
|
||||
&seed,
|
||||
agent_tags,
|
||||
start_datetime,
|
||||
end_datetime,
|
||||
execution_start_time,
|
||||
continue_on,
|
||||
)
|
||||
.context(CouldNotCreateAgent { name: &agent_name })?;
|
||||
|
||||
let agent_points_writer = points_writer_builder.build_for_agent(&agent_name);
|
||||
|
||||
handles.push(tokio::task::spawn(async move {
|
||||
agent.generate_all(agent_points_writer).await
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
let mut total_points = 0;
|
||||
for handle in handles {
|
||||
total_points += handle
|
||||
.await
|
||||
.context(TokioError)?
|
||||
.context(AgentCouldNotGeneratePoints)?;
|
||||
}
|
||||
|
||||
Ok(total_points)
|
||||
}
|
||||
|
||||
/// Shorthand trait for the functionality this crate needs a random number generator to have
|
||||
pub trait DataGenRng: rand::Rng + rand::SeedableRng + Send + 'static {}
|
||||
|
||||
impl<T: rand::Rng + rand::SeedableRng + Send + 'static> DataGenRng for T {}
|
||||
|
||||
/// Encapsulating the creation of an optionally-seedable random number generator
|
||||
/// to make this easy to change. Uses a 4-digit number expressed as a `String`
|
||||
/// as the seed type to enable easy creation of another instance using the same
|
||||
/// seed.
|
||||
#[derive(Debug)]
|
||||
pub struct RandomNumberGenerator<T: DataGenRng> {
|
||||
rng: T,
|
||||
/// The seed used for this instance.
|
||||
pub seed: String,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> Default for RandomNumberGenerator<T> {
|
||||
fn default() -> Self {
|
||||
let mut rng = rand::thread_rng();
|
||||
let seed = format!("{:04}", rng.gen_range(0..10000));
|
||||
Self::new(seed)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> RandomNumberGenerator<T> {
|
||||
/// Create a new instance using the specified seed.
|
||||
pub fn new(seed: impl Into<String>) -> Self {
|
||||
let seed = seed.into();
|
||||
Self {
|
||||
rng: Seeder::from(&seed).make_rng(),
|
||||
seed,
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a random GUID
|
||||
pub fn guid(&mut self) -> uuid::Uuid {
|
||||
let mut bytes = [0u8; 16];
|
||||
self.rng.fill_bytes(&mut bytes);
|
||||
uuid::Builder::from_bytes(bytes)
|
||||
.set_variant(uuid::Variant::RFC4122)
|
||||
.set_version(uuid::Version::Random)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> rand::RngCore for RandomNumberGenerator<T> {
|
||||
fn next_u32(&mut self) -> u32 {
|
||||
self.rng.next_u32()
|
||||
}
|
||||
|
||||
fn next_u64(&mut self) -> u64 {
|
||||
self.rng.next_u64()
|
||||
}
|
||||
|
||||
fn fill_bytes(&mut self, dest: &mut [u8]) {
|
||||
self.rng.fill_bytes(dest);
|
||||
}
|
||||
|
||||
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> std::result::Result<(), rand::Error> {
|
||||
self.rng.try_fill_bytes(dest)
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the current time in nanoseconds since the epoch
|
||||
pub fn now_ns() -> i64 {
|
||||
let since_the_epoch = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
i64::try_from(since_the_epoch.as_nanos()).expect("Time does not fit")
|
||||
}
|
||||
|
||||
// Always returns 0.
|
||||
#[cfg(test)]
|
||||
#[derive(Default)]
|
||||
struct ZeroRng;
|
||||
|
||||
#[cfg(test)]
|
||||
impl rand::RngCore for ZeroRng {
|
||||
fn next_u32(&mut self) -> u32 {
|
||||
self.next_u64() as u32
|
||||
}
|
||||
|
||||
fn next_u64(&mut self) -> u64 {
|
||||
0
|
||||
}
|
||||
|
||||
fn fill_bytes(&mut self, dest: &mut [u8]) {
|
||||
rand_core::impls::fill_bytes_via_next(self, dest)
|
||||
}
|
||||
|
||||
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> std::result::Result<(), rand::Error> {
|
||||
self.fill_bytes(dest);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl rand::SeedableRng for ZeroRng {
|
||||
type Seed = Vec<u8>;
|
||||
|
||||
// Ignore the seed value
|
||||
fn from_seed(_seed: Self::Seed) -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
|
||||
// The test rng ignores the seed anyway, so the seed specified doesn't matter.
|
||||
#[cfg(test)]
|
||||
const TEST_SEED: &str = "";
|
||||
|
||||
#[cfg(test)]
|
||||
fn test_rng() -> RandomNumberGenerator<ZeroRng> {
|
||||
RandomNumberGenerator::<ZeroRng>::new(TEST_SEED)
|
||||
}
|
||||
|
||||
// A random number type that does *not* have a predictable sequence of values for use in tests
|
||||
// that assert on properties rather than exact values. Aliased for convenience in changing to
|
||||
// a different Rng type.
|
||||
#[cfg(test)]
|
||||
type DynamicRng = rand::rngs::SmallRng;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::specification::*;
|
||||
use influxdb2_client::models::WriteDataPoint;
|
||||
use std::str::FromStr;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T = (), E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[tokio::test]
|
||||
async fn historical_data_sampling_interval() -> Result<()> {
|
||||
let toml = r#"
|
||||
name = "demo_schema"
|
||||
|
||||
[[agents]]
|
||||
name = "basic"
|
||||
sampling_interval = 10 # seconds
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "cpu"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "up"
|
||||
bool = true"#;
|
||||
let data_spec = DataSpec::from_str(toml).unwrap();
|
||||
let agent_id = 0;
|
||||
let agent_spec = &data_spec.agents[0];
|
||||
// Take agent_tags out of the equation for the purposes of this test
|
||||
let agent_tags = vec![];
|
||||
|
||||
let execution_start_time = now_ns();
|
||||
|
||||
// imagine we've specified at the command line that we want to generate metrics
|
||||
// for 1970
|
||||
let start_datetime = Some(0);
|
||||
// for the first 15 seconds of the year
|
||||
let end_datetime = Some(15 * 1_000_000_000);
|
||||
|
||||
let mut agent = agent::Agent::<ZeroRng>::new(
|
||||
agent_spec,
|
||||
&agent_spec.name,
|
||||
agent_id,
|
||||
TEST_SEED,
|
||||
agent_tags,
|
||||
start_datetime,
|
||||
end_datetime,
|
||||
execution_start_time,
|
||||
false,
|
||||
)?;
|
||||
|
||||
let data_points = agent.generate().await?;
|
||||
let mut v = Vec::new();
|
||||
for data_point in data_points {
|
||||
data_point.write_data_point_to(&mut v).unwrap();
|
||||
}
|
||||
let line_protocol = String::from_utf8(v).unwrap();
|
||||
|
||||
// Get a point for time 0
|
||||
let expected_line_protocol = "cpu up=f 0\n";
|
||||
assert_eq!(line_protocol, expected_line_protocol);
|
||||
|
||||
let data_points = agent.generate().await?;
|
||||
let mut v = Vec::new();
|
||||
for data_point in data_points {
|
||||
data_point.write_data_point_to(&mut v).unwrap();
|
||||
}
|
||||
let line_protocol = String::from_utf8(v).unwrap();
|
||||
|
||||
// Get a point for time 10s
|
||||
let expected_line_protocol = "cpu up=f 10000000000\n";
|
||||
assert_eq!(line_protocol, expected_line_protocol);
|
||||
|
||||
// Don't get any points anymore because we're past the ending datetime
|
||||
let data_points = agent.generate().await?;
|
||||
assert!(
|
||||
data_points.is_empty(),
|
||||
"expected no data points, got {:?}",
|
||||
data_points
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,265 @@
|
|||
#![deny(rust_2018_idioms)]
|
||||
#![warn(
|
||||
missing_copy_implementations,
|
||||
missing_debug_implementations,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::use_self
|
||||
)]
|
||||
|
||||
use chrono::prelude::*;
|
||||
use chrono_english::{parse_date_string, Dialect};
|
||||
use clap::{crate_authors, crate_version, App, Arg};
|
||||
use iox_data_generator::{specification::DataSpec, write::PointsWriterBuilder};
|
||||
use tracing::info;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let help = r#"IOx data point generator
|
||||
|
||||
Examples:
|
||||
# Generate data points using the specification in `spec.toml` and save in the `lp` directory
|
||||
iox_data_generator -s spec.toml -o lp
|
||||
|
||||
# Generate data points and write to the server running at localhost:8080 with the provided org,
|
||||
# bucket and authorization token, creating the bucket
|
||||
iox_data_generator -s spec.toml -h localhost:8080 --org myorg --org_id 0000111100001111 \
|
||||
--bucket mybucket --token mytoken --create
|
||||
|
||||
# Generate data points for the 24 hours between midnight 2020-01-01 and 2020-01-02
|
||||
iox_data_generator -s spec.toml -o lp --start 2020-01-01 --end 2020-01-02
|
||||
|
||||
# Generate data points starting from an hour ago until now, generating the historical data as
|
||||
# fast as possible. Then generate data according to the sampling interval until terminated.
|
||||
iox_data_generator -s spec.toml -o lp --start "1 hr ago" --continue
|
||||
|
||||
Logging:
|
||||
Use the RUST_LOG environment variable to configure the desired logging level.
|
||||
For example:
|
||||
|
||||
# Enable INFO level logging for all of iox_data_generator
|
||||
RUST_LOG=iox_data_generator=info iox_data_generator -s spec.toml -o lp
|
||||
|
||||
|
||||
"#;
|
||||
|
||||
let matches = App::new(help)
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!())
|
||||
.about("IOx data point generator")
|
||||
.arg(
|
||||
Arg::with_name("SPECIFICATION")
|
||||
.short("s")
|
||||
.long("spec")
|
||||
.help("Path to the specification TOML file describing the data generation")
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("OUTPUT")
|
||||
.short("o")
|
||||
.long("output")
|
||||
.help("The filename to write line protocol")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("HOST")
|
||||
.short("h")
|
||||
.long("host")
|
||||
.help("The host name part of the API endpoint to write to")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("ORG")
|
||||
.long("org")
|
||||
.help("The organization name to write to")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("ORG_ID")
|
||||
.long("org_id")
|
||||
.help("The 16-digit hex ID of the organization. Only needed if passing `--create`.")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("BUCKET")
|
||||
.long("bucket")
|
||||
.help("The bucket name to write to")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("TOKEN")
|
||||
.long("token")
|
||||
.help("The API authorization token used for all requests")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("START")
|
||||
.long("start")
|
||||
.help(
|
||||
"The date and time at which to start the timestamps of the generated data. \
|
||||
Can be an exact datetime like `2020-01-01T01:23:45-05:00` or a fuzzy \
|
||||
specification like `1 hour ago`. If not specified, defaults to now.",
|
||||
)
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("END")
|
||||
.long("end")
|
||||
.help(
|
||||
"The date and time at which to stop the timestamps of the generated data. \
|
||||
Can be an exact datetime like `2020-01-01T01:23:45-05:00` or a fuzzy \
|
||||
specification like `1 hour ago`. If not specified, defaults to now.",
|
||||
)
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("create")
|
||||
.long("create")
|
||||
.help("Create the bucket specified before sending points. Requires `--org_id`"),
|
||||
)
|
||||
.arg(Arg::with_name("continue").long("continue").help(
|
||||
"Generate live data using the intervals from the spec after generating historical \
|
||||
data. This option has no effect if you specify an end time.",
|
||||
))
|
||||
.get_matches();
|
||||
|
||||
let spec_filename = matches
|
||||
.value_of("SPECIFICATION")
|
||||
// This should never fail if clap is working properly
|
||||
.expect("SPECIFICATION is a required argument");
|
||||
|
||||
let execution_start_time = Local::now();
|
||||
|
||||
let start_datetime = datetime_nanoseconds(matches.value_of("START"), execution_start_time);
|
||||
let end_datetime = datetime_nanoseconds(matches.value_of("END"), execution_start_time);
|
||||
|
||||
let start_display = start_datetime.unwrap_or_else(|| execution_start_time.timestamp_nanos());
|
||||
let end_display = end_datetime.unwrap_or_else(|| execution_start_time.timestamp_nanos());
|
||||
|
||||
let continue_on = matches.is_present("continue");
|
||||
|
||||
info!(
|
||||
"Starting at {}, ending at {} ({}){}",
|
||||
start_display,
|
||||
end_display,
|
||||
(end_display - start_display) / 1_000_000_000,
|
||||
if continue_on { " then continuing" } else { "" },
|
||||
);
|
||||
|
||||
let data_spec = DataSpec::from_file(spec_filename)?;
|
||||
|
||||
// TODO: parquet output
|
||||
|
||||
let mut points_writer_builder = if let Some(line_protocol_filename) = matches.value_of("OUTPUT")
|
||||
{
|
||||
PointsWriterBuilder::new_file(line_protocol_filename)?
|
||||
} else if let Some(host) = matches.value_of("HOST") {
|
||||
let (host, org, bucket, token, create_bucket, org_id) = validate_api_arguments(
|
||||
host,
|
||||
matches.value_of("ORG"),
|
||||
matches.value_of("BUCKET"),
|
||||
matches.value_of("TOKEN"),
|
||||
matches.is_present("create"),
|
||||
matches.value_of("ORG_ID"),
|
||||
);
|
||||
|
||||
PointsWriterBuilder::new_api(host, org, bucket, token, create_bucket, org_id).await?
|
||||
} else {
|
||||
panic!("One of --output or --host must be provided.");
|
||||
};
|
||||
|
||||
let result = iox_data_generator::generate::<rand::rngs::SmallRng>(
|
||||
&data_spec,
|
||||
&mut points_writer_builder,
|
||||
start_datetime,
|
||||
end_datetime,
|
||||
execution_start_time.timestamp_nanos(),
|
||||
continue_on,
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(total_points) => eprintln!("Submitted {} total points", total_points),
|
||||
Err(e) => panic!("Execution failed: \n{}", e),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn datetime_nanoseconds(arg: Option<&str>, now: DateTime<Local>) -> Option<i64> {
|
||||
arg.map(|s| {
|
||||
let datetime = parse_date_string(s, now, Dialect::Us).expect("Could not parse time");
|
||||
datetime.timestamp_nanos()
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_api_arguments<'a>(
|
||||
host: &'a str,
|
||||
org: Option<&'a str>,
|
||||
bucket: Option<&'a str>,
|
||||
token: Option<&'a str>,
|
||||
create_bucket: bool,
|
||||
org_id: Option<&'a str>,
|
||||
) -> (&'a str, &'a str, &'a str, &'a str, bool, Option<&'a str>) {
|
||||
let mut errors = vec![];
|
||||
|
||||
if create_bucket && org_id.is_none() {
|
||||
panic!("When `--create` is specified, `--org_id` is required, but it was missing.");
|
||||
}
|
||||
|
||||
if org.is_none() {
|
||||
errors.push("`--org` is missing");
|
||||
}
|
||||
if bucket.is_none() {
|
||||
errors.push("`--bucket` is missing");
|
||||
}
|
||||
if token.is_none() {
|
||||
errors.push("`--token` is missing");
|
||||
}
|
||||
|
||||
if errors.is_empty() {
|
||||
// These `unwrap`s are safe because otherwise errors wouldn't be empty
|
||||
(
|
||||
host,
|
||||
org.unwrap(),
|
||||
bucket.unwrap(),
|
||||
token.unwrap(),
|
||||
create_bucket,
|
||||
org_id,
|
||||
)
|
||||
} else {
|
||||
panic!(
|
||||
"When `--host` is specified, `--org`, `--bucket`, and `--token` are required, \
|
||||
but {}",
|
||||
errors.join(", ")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn none_datetime_is_none_nanoseconds() {
|
||||
let ns = datetime_nanoseconds(None, Local::now());
|
||||
assert!(ns.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore] // TODO: I think chrono-english isn't handling timezones the way I'd expect
|
||||
fn rfc3339() {
|
||||
let ns = datetime_nanoseconds(Some("2020-01-01T01:23:45-05:00"), Local::now());
|
||||
assert_eq!(ns, Some(1577859825000000000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relative() {
|
||||
let fixed_now = Local::now();
|
||||
let ns = datetime_nanoseconds(Some("1hr ago"), fixed_now);
|
||||
let expected = (fixed_now - chrono::Duration::hours(1)).timestamp_nanos();
|
||||
assert_eq!(ns, Some(expected));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,960 @@
|
|||
//! Generating a set of points for one measurement configuration
|
||||
|
||||
use crate::{
|
||||
field::FieldGeneratorSet,
|
||||
specification,
|
||||
substitution::Substitute,
|
||||
tag::{Tag, TagGeneratorSet},
|
||||
DataGenRng, RandomNumberGenerator,
|
||||
};
|
||||
|
||||
use influxdb2_client::models::DataPoint;
|
||||
use itertools::Itertools;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::fmt;
|
||||
|
||||
/// Measurement-specific Results
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Errors that may happen while creating measurements
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when building a data point with the Influx DB
|
||||
/// client
|
||||
#[snafu(display(
|
||||
"Could not build data point for measurement `{}` with Influx Client, caused by:\n{}",
|
||||
name,
|
||||
source
|
||||
))]
|
||||
InfluxDataPointError {
|
||||
/// The name of the relevant measurement
|
||||
name: String,
|
||||
/// Underlying Influx Client error that caused this problem
|
||||
source: influxdb2_client::models::data_point::DataPointError,
|
||||
},
|
||||
|
||||
/// Error that may happen when substituting placeholder values
|
||||
#[snafu(display("Could not create measurement name, caused by:\n{}", source))]
|
||||
CouldNotCreateMeasurementName {
|
||||
/// Underlying `substitution` module error that caused this problem
|
||||
source: crate::substitution::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when creating tag generator sets
|
||||
#[snafu(display(
|
||||
"Could not create tag generator sets for measurement `{}`, caused by:\n{}",
|
||||
name,
|
||||
source
|
||||
))]
|
||||
CouldNotCreateTagGeneratorSets {
|
||||
/// The name of the relevant measurement
|
||||
name: String,
|
||||
/// Underlying `tag` module error that caused this problem
|
||||
source: crate::tag::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when creating field generator sets
|
||||
#[snafu(display(
|
||||
"Could not create field generator sets for measurement `{}`, caused by:\n{}",
|
||||
name,
|
||||
source
|
||||
))]
|
||||
CouldNotCreateFieldGeneratorSets {
|
||||
/// The name of the relevant measurement
|
||||
name: String,
|
||||
/// Underlying `field` module error that caused this problem
|
||||
source: crate::field::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when generating a particular set of tags
|
||||
#[snafu(display(
|
||||
"Could not generate tags for measurement `{}`, caused by:\n{}",
|
||||
name,
|
||||
source
|
||||
))]
|
||||
CouldNotGenerateTags {
|
||||
/// The name of the relevant measurement
|
||||
name: String,
|
||||
/// Underlying `tag` module error that caused this problem
|
||||
source: crate::tag::Error,
|
||||
},
|
||||
}
|
||||
|
||||
/// A set of `count` measurements that have the same configuration but different
|
||||
/// `measurement_id`s. The `generate` method on a `MeasurementGeneratorSet` will
|
||||
/// always return `count` points.
|
||||
#[derive(Debug)]
|
||||
pub struct MeasurementGeneratorSet<T: DataGenRng> {
|
||||
measurement_generators: Vec<MeasurementGenerator<T>>,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> MeasurementGeneratorSet<T> {
|
||||
/// Create a new set of measurement generators for a particular agent and
|
||||
/// measurement specification.
|
||||
pub fn new(
|
||||
agent_name: &str,
|
||||
agent_id: usize,
|
||||
spec: &specification::MeasurementSpec,
|
||||
parent_seed: impl fmt::Display,
|
||||
static_tags: &[Tag],
|
||||
execution_start_time: i64,
|
||||
) -> Result<Self> {
|
||||
let count = spec.count.unwrap_or(1);
|
||||
|
||||
let measurement_generators = (0..count)
|
||||
.map(|measurement_id| {
|
||||
MeasurementGenerator::new(
|
||||
agent_name,
|
||||
agent_id,
|
||||
measurement_id,
|
||||
spec,
|
||||
&parent_seed,
|
||||
static_tags,
|
||||
execution_start_time,
|
||||
)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(Self {
|
||||
measurement_generators,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create one set of points
|
||||
pub fn generate(&mut self, timestamp: i64) -> Result<Vec<DataPoint>> {
|
||||
let generate_results = self
|
||||
.measurement_generators
|
||||
.iter_mut()
|
||||
.map(|mg| mg.generate(timestamp));
|
||||
|
||||
itertools::process_results(generate_results, |points| points.flatten().collect())
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate measurements
|
||||
#[derive(Debug)]
|
||||
pub struct MeasurementGenerator<T: DataGenRng> {
|
||||
#[allow(dead_code)]
|
||||
rng: RandomNumberGenerator<T>,
|
||||
name: String,
|
||||
static_tags: Vec<Tag>,
|
||||
tag_generator_sets: Vec<TagGeneratorSet<T>>,
|
||||
total_tag_cardinality: usize,
|
||||
field_generator_sets: Vec<FieldGeneratorSet>,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> MeasurementGenerator<T> {
|
||||
/// Create a new way to generate measurements from a specification
|
||||
pub fn new(
|
||||
agent_name: impl Into<String>,
|
||||
agent_id: usize,
|
||||
measurement_id: usize,
|
||||
spec: &specification::MeasurementSpec,
|
||||
parent_seed: impl fmt::Display,
|
||||
static_tags: &[Tag],
|
||||
execution_start_time: i64,
|
||||
) -> Result<Self> {
|
||||
let agent_name = agent_name.into();
|
||||
let spec_name = Substitute::once(
|
||||
&spec.name,
|
||||
&[
|
||||
("agent_id", &agent_id.to_string()),
|
||||
("agent_name", &agent_name),
|
||||
("measurement_id", &measurement_id.to_string()),
|
||||
],
|
||||
)
|
||||
.context(CouldNotCreateMeasurementName)?;
|
||||
|
||||
let seed = format!("{}-{}", parent_seed, spec_name);
|
||||
let rng = RandomNumberGenerator::<T>::new(seed);
|
||||
|
||||
let tag_generator_sets: Vec<TagGeneratorSet<T>> = spec
|
||||
.tags
|
||||
.iter()
|
||||
.map(|tag_spec| TagGeneratorSet::new(agent_id, measurement_id, tag_spec, &rng.seed))
|
||||
.collect::<crate::tag::Result<_>>()
|
||||
.context(CouldNotCreateTagGeneratorSets { name: &spec_name })?;
|
||||
|
||||
let total_tag_cardinality = tag_generator_sets
|
||||
.iter()
|
||||
.map(|tgs| tgs.tag_cardinality())
|
||||
.product();
|
||||
|
||||
let field_generator_sets = spec
|
||||
.fields
|
||||
.iter()
|
||||
.map(|field_spec| {
|
||||
FieldGeneratorSet::new::<T>(
|
||||
&agent_name,
|
||||
agent_id,
|
||||
measurement_id,
|
||||
field_spec,
|
||||
&rng.seed,
|
||||
execution_start_time,
|
||||
)
|
||||
})
|
||||
.collect::<crate::field::Result<_>>()
|
||||
.context(CouldNotCreateFieldGeneratorSets { name: &spec_name })?;
|
||||
|
||||
Ok(Self {
|
||||
rng,
|
||||
name: spec_name,
|
||||
static_tags: static_tags.to_vec(),
|
||||
tag_generator_sets,
|
||||
total_tag_cardinality,
|
||||
field_generator_sets,
|
||||
count: spec.count.unwrap_or(1),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> MeasurementGenerator<T> {
|
||||
fn generate(&mut self, timestamp: i64) -> Result<Vec<DataPoint>> {
|
||||
// Split out the tags that we want all combinations of. Perhaps these should be
|
||||
// a different type?
|
||||
let mut tags_with_cardinality: Vec<_> = itertools::process_results(
|
||||
self.tag_generator_sets
|
||||
.iter_mut()
|
||||
.filter(|tgs| tgs.tag_cardinality() > 1)
|
||||
.map(TagGeneratorSet::generate),
|
||||
|tags| {
|
||||
tags.multi_cartesian_product()
|
||||
.map(|tag_set| tag_set.into_iter().flatten().collect())
|
||||
.collect()
|
||||
},
|
||||
)
|
||||
.context(CouldNotGenerateTags { name: &self.name })?;
|
||||
|
||||
// Ensure we generate something even when there are no tags.
|
||||
if tags_with_cardinality.is_empty() {
|
||||
tags_with_cardinality.push(Vec::new());
|
||||
}
|
||||
|
||||
let total_tag_cardinality = self.total_tag_cardinality;
|
||||
assert_eq!(tags_with_cardinality.len(), total_tag_cardinality);
|
||||
|
||||
// Split out the tags that we don't want to include when we're generating all
|
||||
// possible combinations above. Perhaps these should be a different
|
||||
// type? Leaving the type annotation here because it's terrible and
|
||||
// confusing otherwise.
|
||||
//
|
||||
// This type is made up of:
|
||||
//
|
||||
// - `Vec<Tag>` comes from one call to `TagGenerator::generate`. Tag
|
||||
// configurations with a `count` value > 1 generate multiple tags with
|
||||
// different keys but the same value for each generation. The length of this
|
||||
// vector is the tag configuration's `count`.
|
||||
// - `Vec<Vec<Tag>>` comes from one call to `TagGenerator::generate_to_zip` and
|
||||
// is a list of either cloned or resampled tags from this TagGenerator. The
|
||||
// length of this vector is `total_tag_cardinality`.
|
||||
// - `Vec<Vec<Vec<Tag>>>` comes from collecting all these lists from each
|
||||
// `TagGeneratorSet` that has a cardinality of 1 (the default). Each
|
||||
// `TagGeneratorSet` corresponds to one tag configuration.
|
||||
let tags_without_cardinality_columns = self
|
||||
.tag_generator_sets
|
||||
.iter_mut()
|
||||
.filter(|tgs| tgs.tag_cardinality() == 1)
|
||||
.map(|tgs| tgs.generate_to_zip(total_tag_cardinality).unwrap());
|
||||
|
||||
// This is doing a zip over an arbitrary number of iterators... itertools has
|
||||
// something that produces tuples but I want it to produce Vectors
|
||||
let mut tags_without_cardinality_column_iters: Vec<_> = tags_without_cardinality_columns
|
||||
.map(|column| column.into_iter())
|
||||
.collect();
|
||||
|
||||
// For each group of tags that will become one row,
|
||||
for v in &mut tags_with_cardinality {
|
||||
// Get the rest of the tags that belong with this row that were either cloned or
|
||||
// resampled according to their configuration
|
||||
let tag_row: Vec<Vec<Tag>> = tags_without_cardinality_column_iters
|
||||
.iter_mut()
|
||||
.map(|column_iter| {
|
||||
column_iter.next().expect(
|
||||
"Should have generated `total_tag_cardinality` items, \
|
||||
which should match the length of `tags_with_cardinality`",
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
// If count can't be combined with replacements, this `for` loop wouldn't be
|
||||
// needed
|
||||
for mut tags in tag_row {
|
||||
v.append(&mut tags);
|
||||
}
|
||||
}
|
||||
|
||||
tags_with_cardinality
|
||||
.iter()
|
||||
.map(|tags| self.one(&tags[..], timestamp))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn one(&mut self, tags: &[Tag], timestamp: i64) -> Result<DataPoint> {
|
||||
let mut point = DataPoint::builder(&self.name);
|
||||
|
||||
point = self
|
||||
.static_tags
|
||||
.iter()
|
||||
.fold(point, |point, tag| point.tag(&tag.key, &tag.value));
|
||||
|
||||
point = tags
|
||||
.iter()
|
||||
.fold(point, |point, tag| point.tag(&tag.key, &tag.value));
|
||||
|
||||
for fgs in &mut self.field_generator_sets {
|
||||
for field in fgs.generate(timestamp) {
|
||||
point = point.field(&field.key, field.value);
|
||||
}
|
||||
}
|
||||
|
||||
point = point.timestamp(timestamp);
|
||||
|
||||
point
|
||||
.build()
|
||||
.context(InfluxDataPointError { name: &self.name })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::{specification::*, DynamicRng, ZeroRng, TEST_SEED};
|
||||
use influxdb2_client::models::WriteDataPoint;
|
||||
use std::str;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T = (), E = Error> = std::result::Result<T, E>;
|
||||
|
||||
impl<T: DataGenRng> MeasurementGenerator<T> {
|
||||
fn generate_string(&mut self, timestamp: i64) -> Result<String> {
|
||||
self.generate_strings(timestamp)
|
||||
.map(|mut strings| strings.swap_remove(0))
|
||||
}
|
||||
|
||||
fn generate_strings(&mut self, timestamp: i64) -> Result<Vec<String>> {
|
||||
let points = self.generate(timestamp)?;
|
||||
points
|
||||
.into_iter()
|
||||
.map(|point| {
|
||||
let mut v = Vec::new();
|
||||
point.write_data_point_to(&mut v)?;
|
||||
Ok(String::from_utf8(v)?)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement() -> Result {
|
||||
let fake_now = 1234;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "cpu".into(),
|
||||
count: None,
|
||||
tags: vec![],
|
||||
fields: vec![FieldSpec {
|
||||
name: "response_time".into(),
|
||||
field_value_spec: FieldValueSpec::I64 {
|
||||
range: 0..60,
|
||||
increment: false,
|
||||
reset_after: None,
|
||||
},
|
||||
count: None,
|
||||
}],
|
||||
};
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol = measurement_generator.generate_string(fake_now)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol,
|
||||
format!("cpu response_time=0i {}\n", fake_now)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_stable_rngs() -> Result {
|
||||
let fake_now = 5678;
|
||||
|
||||
// This is the same as the previous test but with an additional field.
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "cpu".into(),
|
||||
count: Some(2),
|
||||
tags: vec![],
|
||||
fields: vec![
|
||||
FieldSpec {
|
||||
name: "load".into(),
|
||||
field_value_spec: FieldValueSpec::F64 { range: 0.0..100.0 },
|
||||
count: None,
|
||||
},
|
||||
FieldSpec {
|
||||
name: "response_time".into(),
|
||||
field_value_spec: FieldValueSpec::I64 {
|
||||
range: 0..60_000,
|
||||
increment: false,
|
||||
reset_after: None,
|
||||
},
|
||||
count: None,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<DynamicRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol = vec![measurement_generator.generate_string(fake_now)?];
|
||||
let response_times = extract_field_values("response_time", &line_protocol);
|
||||
|
||||
let next_line_protocol = vec![measurement_generator.generate_string(fake_now + 1)?];
|
||||
let next_response_times = extract_field_values("response_time", &next_line_protocol);
|
||||
|
||||
// Each line should have a different response time unless we get really, really unlucky
|
||||
assert_ne!(response_times, next_response_times);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_always_including_some_tags() -> Result {
|
||||
let fake_now = 678;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "cpu".into(),
|
||||
count: None,
|
||||
tags: vec![],
|
||||
fields: vec![FieldSpec {
|
||||
name: "response_time".into(),
|
||||
field_value_spec: FieldValueSpec::I64 {
|
||||
range: 0..60,
|
||||
increment: false,
|
||||
reset_after: None,
|
||||
},
|
||||
count: None,
|
||||
}],
|
||||
};
|
||||
|
||||
let always_tags = vec![Tag::new("my_tag", "my_val")];
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&always_tags,
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol = measurement_generator.generate_string(fake_now)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol,
|
||||
format!("cpu,my_tag=my_val response_time=0i {}\n", fake_now),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_with_basic_tags() -> Result {
|
||||
let fake_now = 678;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "measurement".into(),
|
||||
tags: vec![
|
||||
TagSpec {
|
||||
name: "tag_name".into(),
|
||||
value: "tag_value".into(),
|
||||
..Default::default()
|
||||
},
|
||||
TagSpec {
|
||||
name: "some_name".into(),
|
||||
value: "some_value".into(),
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field_name".into(),
|
||||
..FieldSpec::default()
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol = measurement_generator.generate_string(fake_now)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol,
|
||||
format!(
|
||||
"measurement,some_name=some_value,tag_name=tag_value field_name=f {}\n",
|
||||
fake_now
|
||||
)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_with_tags_with_count() -> Result {
|
||||
let fake_now = 678;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "measurement".into(),
|
||||
tags: vec![TagSpec {
|
||||
name: "{{agent_id}}--{{measurement_id}}--tag_name--{{tag_id}}".into(),
|
||||
value: "tag_value".into(),
|
||||
count: Some(2),
|
||||
..Default::default()
|
||||
}],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field_name".into(),
|
||||
..FieldSpec::default()
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
42,
|
||||
99,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol = measurement_generator.generate_string(fake_now)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol,
|
||||
format!("measurement,42--99--tag_name--0=tag_value,42--99--tag_name--1=tag_value field_name=f {}\n", fake_now),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_with_tags_with_cardinality() -> Result {
|
||||
let fake_now = 678;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "measurement".into(),
|
||||
tags: vec![TagSpec {
|
||||
name: "tag_name".into(),
|
||||
value: "tag_value--{{cardinality}}".into(),
|
||||
cardinality: Some(2),
|
||||
..Default::default()
|
||||
}],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field_name".into(),
|
||||
..FieldSpec::default()
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol = measurement_generator.generate_strings(fake_now)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol[0],
|
||||
format!(
|
||||
"measurement,tag_name=tag_value--0 field_name=f {}\n",
|
||||
fake_now
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
line_protocol[1],
|
||||
format!(
|
||||
"measurement,tag_name=tag_value--1 field_name=f {}\n",
|
||||
fake_now
|
||||
)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_with_tags_with_multiple_cardinality() -> Result {
|
||||
let fake_now = 678;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "measurement".into(),
|
||||
tags: vec![
|
||||
TagSpec {
|
||||
name: "alpha".into(),
|
||||
value: "alpha--{{cardinality}}".into(),
|
||||
cardinality: Some(2),
|
||||
..Default::default()
|
||||
},
|
||||
TagSpec {
|
||||
name: "beta".into(),
|
||||
value: "beta--{{cardinality}}".into(),
|
||||
cardinality: Some(2),
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field_name".into(),
|
||||
..FieldSpec::default()
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol = measurement_generator.generate_strings(fake_now)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol[0],
|
||||
format!(
|
||||
"measurement,alpha=alpha--0,beta=beta--0 field_name=f {}\n",
|
||||
fake_now
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
line_protocol[1],
|
||||
format!(
|
||||
"measurement,alpha=alpha--0,beta=beta--1 field_name=f {}\n",
|
||||
fake_now
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
line_protocol[2],
|
||||
format!(
|
||||
"measurement,alpha=alpha--1,beta=beta--0 field_name=f {}\n",
|
||||
fake_now
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
line_protocol[3],
|
||||
format!(
|
||||
"measurement,alpha=alpha--1,beta=beta--1 field_name=f {}\n",
|
||||
fake_now
|
||||
)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_with_tags_with_increment_every() -> Result {
|
||||
let fake_now = 678;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "measurement".into(),
|
||||
tags: vec![TagSpec {
|
||||
name: "tag_name".into(),
|
||||
value: "tag_value--{{counter}}".into(),
|
||||
increment_every: Some(2),
|
||||
..Default::default()
|
||||
}],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field_name".into(),
|
||||
..FieldSpec::default()
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_protocol_1 = measurement_generator.generate_string(fake_now)?;
|
||||
let line_protocol_2 = measurement_generator.generate_string(fake_now)?;
|
||||
let line_protocol_3 = measurement_generator.generate_string(fake_now)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol_1,
|
||||
format!(
|
||||
"measurement,tag_name=tag_value--0 field_name=f {}\n",
|
||||
fake_now,
|
||||
),
|
||||
);
|
||||
assert_eq!(
|
||||
line_protocol_2,
|
||||
format!(
|
||||
"measurement,tag_name=tag_value--0 field_name=f {}\n",
|
||||
fake_now,
|
||||
),
|
||||
);
|
||||
assert_eq!(
|
||||
line_protocol_3,
|
||||
format!(
|
||||
"measurement,tag_name=tag_value--1 field_name=f {}\n",
|
||||
fake_now,
|
||||
),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_measurement_with_replacement() -> Result {
|
||||
let fake_now = 91011;
|
||||
|
||||
let measurement_spec = MeasurementSpec {
|
||||
name: "measurement-{{agent_id}}-{{measurement_id}}".into(),
|
||||
count: Some(2),
|
||||
tags: vec![],
|
||||
fields: vec![FieldSpec {
|
||||
name: "field-{{agent_id}}-{{measurement_id}}-{{field_id}}".into(),
|
||||
field_value_spec: FieldValueSpec::I64 {
|
||||
range: 0..60,
|
||||
increment: false,
|
||||
reset_after: None,
|
||||
},
|
||||
count: Some(2),
|
||||
}],
|
||||
};
|
||||
|
||||
let mut measurement_generator_set = MeasurementGeneratorSet::<ZeroRng>::new(
|
||||
"agent_name",
|
||||
42,
|
||||
&measurement_spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let points = measurement_generator_set.generate(fake_now).unwrap();
|
||||
let mut v = Vec::new();
|
||||
for point in points {
|
||||
point.write_data_point_to(&mut v)?;
|
||||
}
|
||||
let line_protocol = str::from_utf8(&v)?;
|
||||
|
||||
assert_eq!(
|
||||
line_protocol,
|
||||
format!(
|
||||
"measurement-42-0 field-42-0-0=0i,field-42-0-1=0i {}
|
||||
measurement-42-1 field-42-1-0=0i,field-42-1-1=0i {}
|
||||
",
|
||||
fake_now, fake_now
|
||||
)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn guid_and_guid_with_cardinality() -> Result<()> {
|
||||
let fake_now = 678;
|
||||
|
||||
let spec: specification::MeasurementSpec = toml::from_str(
|
||||
r#"
|
||||
name = "traces"
|
||||
|
||||
[[tags]]
|
||||
name = "trace_id"
|
||||
value = "value-{{guid}}"
|
||||
|
||||
[[tags]]
|
||||
name = "span_id"
|
||||
value = "value-{{guid}}"
|
||||
cardinality = 2
|
||||
|
||||
[[fields]]
|
||||
name = "timing"
|
||||
i64_range = [5, 100]"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<DynamicRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)?;
|
||||
|
||||
let line_protocol = measurement_generator.generate_strings(fake_now)?;
|
||||
|
||||
let mut trace_ids = extract_tag_values("trace_id", &line_protocol);
|
||||
trace_ids.sort_unstable();
|
||||
trace_ids.dedup();
|
||||
// Both lines should have the same trace ID
|
||||
assert_eq!(trace_ids.len(), 1);
|
||||
|
||||
let mut span_ids = extract_tag_values("span_id", &line_protocol);
|
||||
span_ids.sort_unstable();
|
||||
span_ids.dedup();
|
||||
// Each line should have a different span ID
|
||||
assert_eq!(span_ids.len(), 2);
|
||||
|
||||
let next_line_protocol = measurement_generator.generate_strings(fake_now)?;
|
||||
|
||||
let mut next_trace_ids = extract_tag_values("trace_id", &next_line_protocol);
|
||||
next_trace_ids.sort_unstable();
|
||||
next_trace_ids.dedup();
|
||||
// Both lines should have the same trace ID
|
||||
assert_eq!(next_trace_ids.len(), 1);
|
||||
|
||||
// On each generation, there should be a new trace id
|
||||
assert_ne!(trace_ids, next_trace_ids);
|
||||
|
||||
let mut next_span_ids = extract_tag_values("span_id", &next_line_protocol);
|
||||
next_span_ids.sort_unstable();
|
||||
next_span_ids.dedup();
|
||||
// Each line should have a different span ID
|
||||
assert_eq!(next_span_ids.len(), 2);
|
||||
|
||||
// On each generation, there should be new span IDs too
|
||||
assert_ne!(span_ids, next_span_ids);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tag_replacements_with_resampling_true() -> Result<()> {
|
||||
resampling_test("resample_every_line = true", true)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tag_replacements_with_resampling_false() -> Result<()> {
|
||||
resampling_test("resample_every_line = false", false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tag_replacements_with_default_resampling_false() -> Result<()> {
|
||||
resampling_test("", false)
|
||||
}
|
||||
|
||||
fn resampling_test(resampling_toml: &str, expect_different: bool) -> Result<()> {
|
||||
let fake_now = 678;
|
||||
|
||||
let spec: specification::MeasurementSpec = toml::from_str(&format!(
|
||||
r#"
|
||||
name = "resampling"
|
||||
|
||||
[[tags]]
|
||||
name = "tag-1"
|
||||
value = "value-{{{{cardinality}}}}"
|
||||
cardinality = 10
|
||||
|
||||
[[tags]]
|
||||
name = "host"
|
||||
value = "{{{{host}}}}"
|
||||
replacements = [
|
||||
{{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]}},
|
||||
]
|
||||
{}
|
||||
|
||||
[[fields]]
|
||||
name = "timing"
|
||||
i64_range = [5, 100]"#,
|
||||
resampling_toml
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let mut measurement_generator = MeasurementGenerator::<DynamicRng>::new(
|
||||
"agent_name",
|
||||
0,
|
||||
0,
|
||||
&spec,
|
||||
TEST_SEED,
|
||||
&[],
|
||||
fake_now,
|
||||
)?;
|
||||
|
||||
let lines = measurement_generator.generate_strings(fake_now)?;
|
||||
let mut host_values = extract_tag_values("host", &lines);
|
||||
host_values.sort_unstable();
|
||||
host_values.dedup();
|
||||
|
||||
if expect_different {
|
||||
assert!(host_values.len() > 1);
|
||||
} else {
|
||||
assert_eq!(host_values.len(), 1);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Hacktacular extracting of values from line protocol without pulling in another crate
|
||||
fn extract_tag_values<'a>(tag_name: &str, lines: &'a [String]) -> Vec<&'a str> {
|
||||
lines
|
||||
.iter()
|
||||
.map(|line| {
|
||||
let before_space = line.splitn(2, ' ').next().unwrap();
|
||||
let prefix = format!(",{}=", tag_name);
|
||||
let after = before_space.rsplitn(2, &prefix).next().unwrap();
|
||||
after.splitn(2, ',').next().unwrap()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_field_values<'a>(field_name: &str, lines: &'a [String]) -> Vec<&'a str> {
|
||||
lines
|
||||
.iter()
|
||||
.map(|line| {
|
||||
let mut split = line.splitn(2, ' ');
|
||||
split.next();
|
||||
let after_space = split.next().unwrap();
|
||||
let prefix = format!(",{}=", field_name);
|
||||
let after = after_space.rsplitn(2, &prefix).next().unwrap();
|
||||
after.splitn(2, ',').next().unwrap()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,616 @@
|
|||
//! Reading and interpreting data generation specifications.
|
||||
|
||||
use serde::Deserialize;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{fs, ops::Range, str::FromStr};
|
||||
|
||||
/// Errors that may happen while reading a TOML specification.
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// File-related error that may happen while reading a specification
|
||||
#[snafu(display(r#"Error reading data spec from TOML file: {}"#, source))]
|
||||
ReadFile {
|
||||
/// Underlying I/O error that caused this problem
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
/// TOML parsing error that may happen while interpreting a specification
|
||||
#[snafu(display(r#"Error parsing data spec from TOML: {}"#, source))]
|
||||
Parse {
|
||||
/// Underlying TOML error that caused this problem
|
||||
source: toml::de::Error,
|
||||
},
|
||||
}
|
||||
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// The full specification for the generation of a data set.
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct DataSpec {
|
||||
/// Every point generated from this configuration will contain a tag
|
||||
/// `data_spec=[this value]` to identify what generated that data. This
|
||||
/// name can also be used in string replacements by using the
|
||||
/// placeholder `{{data_spec}}`.
|
||||
pub name: String,
|
||||
/// A string to be used as the seed to the random number generators.
|
||||
///
|
||||
/// When specified, this is used as a base seed propagated through all
|
||||
/// measurements, tags, and fields, which will each have their own
|
||||
/// random number generator seeded by this seed plus their name. This
|
||||
/// has the effect of keeping each value sequence generated per measurement,
|
||||
/// tag, or field stable even if the configurations in other parts of the
|
||||
/// schema are changed. That is, if you have a field named `temp` and on
|
||||
/// the first run with base seed `foo` generates the values `[10, 50,
|
||||
/// 72, 3]`, and then you add another field named `weight` to the schema
|
||||
/// and run with base seed `foo` again, the values generated for `temp`
|
||||
/// should again be `[10, 50, 72, 3]`. This enables incremental
|
||||
/// development of a schema without churn, if that is undesired.
|
||||
///
|
||||
/// When this is not specified, the base seed will be randomly generated. It
|
||||
/// will be printed to stdout so that the value used can be specified in
|
||||
/// future configurations if reproducing a particular set of sequences
|
||||
/// is desired.
|
||||
pub base_seed: Option<String>,
|
||||
/// The specification for the data-generating agents in this data set.
|
||||
pub agents: Vec<AgentSpec>,
|
||||
}
|
||||
|
||||
impl DataSpec {
|
||||
/// Given a filename, read the file and parse the specification.
|
||||
pub fn from_file(file_name: &str) -> Result<Self> {
|
||||
let spec_toml = fs::read_to_string(file_name).context(ReadFile)?;
|
||||
Self::from_str(&spec_toml)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for DataSpec {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(spec_toml: &str) -> std::result::Result<Self, <Self as FromStr>::Err> {
|
||||
let spec: Self = toml::from_str(spec_toml).context(Parse)?;
|
||||
Ok(spec)
|
||||
}
|
||||
}
|
||||
|
||||
/// The specification of the behavior of an agent, the entity responsible for
|
||||
/// generating a number of data points according to its configuration.
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[cfg_attr(test, derive(Default))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct AgentSpec {
|
||||
/// Used as the value for the `name` tag if `name_tag_key` is `Some`; has no
|
||||
/// effect if `name_tag_key` is not specified.
|
||||
///
|
||||
/// Can be a plain string or a string with placeholders for:
|
||||
///
|
||||
/// - `{{agent_id}}` - the agent ID
|
||||
pub name: String,
|
||||
/// Specifies the number of agents that should be created with this spec.
|
||||
/// Default value is 1.
|
||||
pub count: Option<usize>,
|
||||
/// How often this agent should generate samples, in number of seconds. If
|
||||
/// not specified, this agent will only generate one sample.
|
||||
pub sampling_interval: Option<usize>,
|
||||
/// If specified, every measurement generated by this agent will include a
|
||||
/// tag with this `String` as its key, and with the `AgentSpec`'s `name`
|
||||
/// as the value (with any substitutions in the `name` performed)
|
||||
pub name_tag_key: Option<String>,
|
||||
/// If specified, the values of the tags will be cycled through per `Agent`
|
||||
/// instance such that all measurements generated by that agent will
|
||||
/// contain tags with the specified name and that agent's `name` field
|
||||
/// (with replacements made) as the value.
|
||||
#[serde(default)]
|
||||
pub tags: Vec<AgentTag>,
|
||||
/// The specifications for the measurements for the agent to generate.
|
||||
pub measurements: Vec<MeasurementSpec>,
|
||||
}
|
||||
|
||||
/// Tags that are associated to all measurements that a particular agent
|
||||
/// generates. The values are rotated through so that each agent gets one of the
|
||||
/// specified values for this key.
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct AgentTag {
|
||||
/// The tag key to use when adding this tag to all measurements for an agent
|
||||
pub key: String,
|
||||
/// The values to cycle through for each agent for this tag key
|
||||
pub values: Vec<String>,
|
||||
}
|
||||
|
||||
/// The specification of how to generate data points for a particular
|
||||
/// measurement.
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[cfg_attr(test, derive(Default))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct MeasurementSpec {
|
||||
/// Name of the measurement. Can be a plain string or a string with
|
||||
/// placeholders for:
|
||||
///
|
||||
/// - `{{agent_id}}` - the agent ID
|
||||
/// - `{{measurement_id}}` - the measurement's ID, which must be used if
|
||||
/// `count` > 1 so that unique measurement names are created
|
||||
pub name: String,
|
||||
/// The number of measurements with this configuration that should be
|
||||
/// created. Default value is 1. If specified, use `{{measurement_id}}`
|
||||
/// in this measurement's `name` to create unique measurements.
|
||||
pub count: Option<usize>,
|
||||
/// Specification of the tags for this measurement
|
||||
#[serde(default)]
|
||||
pub tags: Vec<TagSpec>,
|
||||
/// Specification of the fields for this measurement. At least one field is
|
||||
/// required.
|
||||
pub fields: Vec<FieldSpec>,
|
||||
}
|
||||
|
||||
/// The specification of how to generate tag keys and values for a particular
|
||||
/// measurement.
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[cfg_attr(test, derive(Default))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TagSpec {
|
||||
/// Key/name for this tag. Can be a plain string or a string with
|
||||
/// placeholders for:
|
||||
///
|
||||
/// - `{{agent_id}}` - the agent ID
|
||||
/// - `{{measurement_id}}` - the measurement ID
|
||||
/// - `{{tag_id}}` - the tag ID, which must be used if `count` > 1 so that
|
||||
/// unique tag names are created
|
||||
pub name: String,
|
||||
/// Value for this tag. Can be a plain string or a string with placeholders
|
||||
/// for:
|
||||
///
|
||||
/// - `{{agent_id}}` - the agent ID
|
||||
/// - `{{measurement_id}}` - the measurement ID
|
||||
/// - `{{cardinality}}` - the cardinality counter value. Must use this or
|
||||
/// `{{guid}}` if `cardinality` > 1 so that unique tag values are created
|
||||
/// - `{{counter}}` - the increment counter value. Only useful if
|
||||
/// `increment_every` is set.
|
||||
/// - `{{guid}}` - a randomly generated unique string. If `cardinality` > 1,
|
||||
/// each tag will have a different GUID.
|
||||
pub value: String,
|
||||
/// The number of tags with this configuration that should be created.
|
||||
/// Default value is 1. If specified, use `{{tag_id}}` in this tag's
|
||||
/// `name` to create unique tags.
|
||||
pub count: Option<usize>,
|
||||
/// A number that controls how many values are generated, which impacts how
|
||||
/// many rows are created for each agent generation. Default value is 1.
|
||||
/// If specified, use `{{cardinality}}` or `{{guid}}` in this tag's
|
||||
/// `value` to create unique values.
|
||||
pub cardinality: Option<u32>,
|
||||
/// How often to increment the `{{counter}}` value. For example, if
|
||||
/// `increment_every` is set to 10, `{{counter}}` will increase by 1
|
||||
/// after every 10 agent generations. This simulates temporal tag values
|
||||
/// like process IDs or container IDs in tags. If not specified, the value
|
||||
/// of `{{counter}}` will always be 0.
|
||||
pub increment_every: Option<usize>,
|
||||
/// A list of replacement placeholders and the values to replace them with.
|
||||
/// The values can optionally have weights associated with them to
|
||||
/// change the probabilities that its value will be used.
|
||||
#[serde(default)]
|
||||
pub replacements: Vec<Replacement>,
|
||||
/// When there are replacements specified and other tags in this measurement
|
||||
/// with cardinality greater than 1, this option controls whether this
|
||||
/// tag will get a new replacement value on every line in a generation
|
||||
/// (`true`) or whether it will be sampled once and have the same value
|
||||
/// on every line in a generation (`false`). If there are no replacements on
|
||||
/// this tag or any other tags with a cardinality greater than one, this
|
||||
/// has no effect.
|
||||
#[serde(default)]
|
||||
pub resample_every_line: bool,
|
||||
}
|
||||
|
||||
/// The specification of how to generate field keys and values for a particular
|
||||
/// measurement.
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[cfg_attr(test, derive(Default))]
|
||||
#[serde(from = "FieldSpecIntermediate")]
|
||||
pub struct FieldSpec {
|
||||
/// Key/name for this field. Can be a plain string or a string with
|
||||
/// placeholders for:
|
||||
///
|
||||
/// - `{{agent_id}}` - the agent ID
|
||||
/// - `{{measurement_id}}` - the measurement ID
|
||||
/// - `{{field_id}}` - the field ID, which must be used if `count` > 1 so
|
||||
/// that unique field names are created
|
||||
pub name: String,
|
||||
/// Specification for the value for this field.
|
||||
pub field_value_spec: FieldValueSpec,
|
||||
/// How many fields with this configuration should be created
|
||||
pub count: Option<usize>,
|
||||
}
|
||||
|
||||
impl From<FieldSpecIntermediate> for FieldSpec {
|
||||
fn from(value: FieldSpecIntermediate) -> Self {
|
||||
let field_value_spec = if let Some(b) = value.bool {
|
||||
FieldValueSpec::Bool(b)
|
||||
} else if let Some((start, end)) = value.i64_range {
|
||||
FieldValueSpec::I64 {
|
||||
range: (start..end),
|
||||
increment: value.increment.unwrap_or(false),
|
||||
reset_after: value.reset_after,
|
||||
}
|
||||
} else if let Some((start, end)) = value.f64_range {
|
||||
FieldValueSpec::F64 {
|
||||
range: (start..end),
|
||||
}
|
||||
} else if let Some(pattern) = value.pattern {
|
||||
FieldValueSpec::String {
|
||||
pattern,
|
||||
replacements: value.replacements,
|
||||
}
|
||||
} else if let Some(kind) = value.uptime {
|
||||
FieldValueSpec::Uptime { kind }
|
||||
} else {
|
||||
panic!(
|
||||
"Can't tell what type of field value you're trying to specify with this \
|
||||
configuration: `{:?}",
|
||||
value
|
||||
);
|
||||
};
|
||||
|
||||
Self {
|
||||
name: value.name,
|
||||
field_value_spec,
|
||||
count: value.count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The specification of a field value of a particular type. Instances should be
|
||||
/// created by converting a `FieldSpecIntermediate`, which more closely matches
|
||||
/// the TOML structure.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum FieldValueSpec {
|
||||
/// Configuration of a boolean field.
|
||||
Bool(bool),
|
||||
/// Configuration of an integer field.
|
||||
I64 {
|
||||
/// The `Range` in which random integer values will be generated. If the
|
||||
/// range only contains one value, all instances of this field
|
||||
/// will have the same value.
|
||||
range: Range<i64>,
|
||||
/// When set to true, after an initial random value in the range is
|
||||
/// generated, a random increment in the range will be generated
|
||||
/// and added to the initial value. That means the
|
||||
/// value for this field will always be increasing. When the value
|
||||
/// reaches the max value of i64, the value will wrap around to
|
||||
/// the min value of i64 and increment again.
|
||||
increment: bool,
|
||||
/// If `increment` is true, after this many samples, reset the value to
|
||||
/// start the increasing value over. If this is `None`, the
|
||||
/// value won't restart until reaching the max value of i64. If
|
||||
/// `increment` is false, this has no effect.
|
||||
reset_after: Option<usize>,
|
||||
},
|
||||
/// Configuration of a floating point field.
|
||||
F64 {
|
||||
/// The `Range` in which random floating point values will be generated.
|
||||
/// If start == end, all instances of this field will have the
|
||||
/// same value.
|
||||
range: Range<f64>,
|
||||
},
|
||||
/// Configuration of a string field.
|
||||
String {
|
||||
/// Pattern containing placeholders that specifies how to generate the
|
||||
/// string values.
|
||||
///
|
||||
/// Valid placeholders include:
|
||||
///
|
||||
/// - `{{agent_name}}` - the agent spec's name, with any replacements
|
||||
/// done
|
||||
/// - `{{time}}` - the current time in nanoseconds since the epoch.
|
||||
/// TODO: support specifying a strftime
|
||||
/// - any other placeholders as specified in `replacements`. If a
|
||||
/// placeholder has no value specified in `replacements`, it will end
|
||||
/// up as-is in the field value.
|
||||
pattern: String,
|
||||
/// A list of replacement placeholders and the values to replace them
|
||||
/// with. The values can optionally have weights associated with
|
||||
/// them to change the probabilities that its value
|
||||
/// will be used.
|
||||
replacements: Vec<Replacement>,
|
||||
},
|
||||
/// Configuration of a field with the value of the number of seconds the
|
||||
/// data generation tool has been running.
|
||||
Uptime {
|
||||
/// Format of the uptime value in this field
|
||||
kind: UptimeKind,
|
||||
},
|
||||
}
|
||||
|
||||
/// The kind of field value to create using the data generation tool's uptime
|
||||
#[derive(Debug, PartialEq, Copy, Clone, Deserialize)]
|
||||
pub enum UptimeKind {
|
||||
/// Number of seconds since the tool started running as an i64 field
|
||||
#[serde(rename = "i64")]
|
||||
I64,
|
||||
/// Number of seconds since the tool started running, formatted as a string
|
||||
/// field containing the value in the format "x day(s), HH:MM"
|
||||
#[serde(rename = "telegraf")]
|
||||
Telegraf,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Default for FieldValueSpec {
|
||||
fn default() -> Self {
|
||||
Self::Bool(true)
|
||||
}
|
||||
}
|
||||
|
||||
/// An intermediate representation of the field specification that more directly
|
||||
/// corresponds to the way field configurations are expressed in TOML. This
|
||||
/// structure is transformed into the `FieldValueSpec` enum that ensures the
|
||||
/// options for the different field value types are mutually exclusive.
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct FieldSpecIntermediate {
|
||||
/// Key/name for this field. Can be a plain string or a string with
|
||||
/// placeholders for:
|
||||
///
|
||||
/// - `{{agent_id}}` - the agent ID
|
||||
/// - `{{measurement_id}}` - the measurement ID
|
||||
/// - `{{field_id}}` - the field ID, which must be used if `count` > 1 so
|
||||
/// that unique field names are created
|
||||
name: String,
|
||||
/// The number of fields with this configuration that should be created.
|
||||
/// Default value is 1. If specified, use `{{field_id}}` in this field's
|
||||
/// `name` to create unique fields.
|
||||
count: Option<usize>,
|
||||
/// Specify `bool` to make a field that has the Boolean type. `true` means
|
||||
/// to generate the boolean randomly with equal probability. `false`
|
||||
/// means...? Specifying any other optional fields along with this one
|
||||
/// is invalid.
|
||||
bool: Option<bool>,
|
||||
/// Specify `i64_range` to make an integer field. The values will be
|
||||
/// randomly generated within the specified range with equal
|
||||
/// probability. If the range only contains one element, all occurrences
|
||||
/// of this field will have the same value. Can be combined with
|
||||
/// `increment`; specifying any other optional fields is invalid.
|
||||
i64_range: Option<(i64, i64)>,
|
||||
/// Specify `f64_range` to make a floating point field. The values will be
|
||||
/// randomly generated within the specified range. If start == end, all
|
||||
/// occurrences of this field will have that value.
|
||||
/// Can this be combined with `increment`?
|
||||
f64_range: Option<(f64, f64)>,
|
||||
/// When set to true with an `i64_range` (is this valid with any other
|
||||
/// type?), after an initial random value is generated, a random
|
||||
/// increment will be generated and added to the initial value. That
|
||||
/// means the value for this field will always be increasing. When the value
|
||||
/// reaches the end of the range...? The end of the range will be repeated
|
||||
/// forever? The series will restart at the start of the range?
|
||||
/// Something else? Setting this to `Some(false)` has the same effect as
|
||||
/// `None`.
|
||||
increment: Option<bool>,
|
||||
/// If `increment` is true, after this many samples, reset the value to
|
||||
/// start the increasing value over. If this is `None`, the value won't
|
||||
/// restart until reaching the max value of i64. If `increment` is
|
||||
/// false, this has no effect.
|
||||
reset_after: Option<usize>,
|
||||
/// Set `pattern` to make a field with the string type. If this doesn't
|
||||
/// include any placeholders, all occurrences of this field will have
|
||||
/// this value.
|
||||
///
|
||||
/// Valid placeholders include:
|
||||
///
|
||||
/// - `{{agent_name}}` - the agent spec's name, with any replacements done
|
||||
/// - `{{time}}` - the current time in nanoseconds since the epoch. TODO:
|
||||
/// support specifying a strftime
|
||||
/// - any other placeholders as specified in `replacements`. If a
|
||||
/// placeholder has no value specified in `replacements`, it will end up
|
||||
/// as-is in the field value.
|
||||
pattern: Option<String>,
|
||||
/// A list of replacement placeholders and the values to replace them with.
|
||||
/// If a placeholder specified here is not used in `pattern`, it will
|
||||
/// have no effect. The values may optionally have a probability weight
|
||||
/// specified with them; if not specified, the value will have weight 1.
|
||||
/// If no weights are specified, the values will be generated with equal
|
||||
/// probability.
|
||||
#[serde(default)]
|
||||
replacements: Vec<Replacement>,
|
||||
/// The kind of uptime that should be used for this field. If specified, no
|
||||
/// other options are valid. If not specified, this is not an uptime
|
||||
/// field.
|
||||
uptime: Option<UptimeKind>,
|
||||
}
|
||||
|
||||
/// The specification of what values to substitute in for placeholders specified
|
||||
/// in `String` field values.
|
||||
#[derive(Deserialize, Debug, PartialEq, Clone)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct Replacement {
|
||||
/// A placeholder key that can be used in field `pattern`s.
|
||||
pub replace: String,
|
||||
/// The possible values to use instead of the placeholder key in `pattern`.
|
||||
/// Values may optionally have a weight specified. If no weights are
|
||||
/// specified, the values will be randomly generated with equal
|
||||
/// probability. The weights are passed to [`rand`'s `choose_weighted`
|
||||
/// method][choose_weighted] and are a relative likelihood such that the
|
||||
/// probability of each item being selected is its weight divided by the sum
|
||||
/// of all weights in this group.
|
||||
///
|
||||
/// [choose_weighted]: https://docs.rs/rand/0.7.3/rand/seq/trait.SliceRandom.html#tymethod.choose_weighted
|
||||
pub with: Vec<ReplacementValue>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, PartialEq, Clone)]
|
||||
#[serde(untagged, deny_unknown_fields)]
|
||||
/// A possible value to use instead of a placeholder key, optionally with an
|
||||
/// associated weight. If no weight is specified, the weight used will be 1.
|
||||
pub enum ReplacementValue {
|
||||
/// Just a value without a weight
|
||||
String(String),
|
||||
/// A value with a specified relative likelihood weight that gets passed on
|
||||
/// to [`rand`'s `choose_weighted` method][choose_weighted]. The
|
||||
/// probability of each item being selected is its weight divided by the
|
||||
/// sum of all weights in the `Replacement` group.
|
||||
///
|
||||
/// [choose_weighted]: https://docs.rs/rand/0.7.3/rand/seq/trait.SliceRandom.html#tymethod.choose_weighted
|
||||
Weighted(String, u32),
|
||||
}
|
||||
|
||||
impl ReplacementValue {
|
||||
/// The associated replacement value
|
||||
pub fn value(&self) -> &str {
|
||||
use ReplacementValue::*;
|
||||
match self {
|
||||
String(s) => s,
|
||||
Weighted(s, ..) => s,
|
||||
}
|
||||
}
|
||||
|
||||
/// The associated weight value specified; defaults to 1.
|
||||
pub fn weight(&self) -> u32 {
|
||||
use ReplacementValue::*;
|
||||
match self {
|
||||
String(..) => 1,
|
||||
Weighted(.., w) => *w,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T = (), E = Error> = std::result::Result<T, E>;
|
||||
|
||||
static TELEGRAF_TOML: &str = include_str!("../schemas/telegraf.toml");
|
||||
|
||||
#[test]
|
||||
fn parse_spec() -> Result {
|
||||
let spec = DataSpec::from_str(TELEGRAF_TOML)?;
|
||||
|
||||
assert_eq!(spec.name, "demo_schema");
|
||||
assert_eq!(spec.agents.len(), 2);
|
||||
|
||||
let agent0 = &spec.agents[0];
|
||||
assert_eq!(agent0.name, "demo");
|
||||
|
||||
let agent0_measurements = &agent0.measurements;
|
||||
assert_eq!(agent0_measurements.len(), 1);
|
||||
|
||||
let a0m0 = &agent0_measurements[0];
|
||||
assert_eq!(a0m0.name, "some_measurement");
|
||||
|
||||
let a0m0_fields = &a0m0.fields;
|
||||
assert_eq!(a0m0_fields.len(), 5);
|
||||
|
||||
let a0m0f0 = &a0m0_fields[0];
|
||||
assert_eq!(a0m0f0.name, "field1");
|
||||
assert_eq!(a0m0f0.field_value_spec, FieldValueSpec::Bool(true));
|
||||
|
||||
let a0m0f1 = &a0m0_fields[1];
|
||||
assert_eq!(a0m0f1.name, "field2");
|
||||
assert_eq!(
|
||||
a0m0f1.field_value_spec,
|
||||
FieldValueSpec::I64 {
|
||||
range: 3..200,
|
||||
increment: false,
|
||||
reset_after: None,
|
||||
}
|
||||
);
|
||||
|
||||
let a0m0f2 = &a0m0_fields[2];
|
||||
assert_eq!(a0m0f2.name, "field3");
|
||||
assert_eq!(
|
||||
a0m0f2.field_value_spec,
|
||||
FieldValueSpec::I64 {
|
||||
range: 1000..5000,
|
||||
increment: true,
|
||||
reset_after: None,
|
||||
}
|
||||
);
|
||||
|
||||
let a0m0f3 = &a0m0_fields[3];
|
||||
assert_eq!(a0m0f3.name, "field4");
|
||||
assert_eq!(
|
||||
a0m0f3.field_value_spec,
|
||||
FieldValueSpec::F64 { range: 0.0..100.0 }
|
||||
);
|
||||
|
||||
let a0m0f4 = &a0m0_fields[4];
|
||||
assert_eq!(a0m0f4.name, "field5");
|
||||
assert_eq!(
|
||||
a0m0f4.field_value_spec,
|
||||
FieldValueSpec::String {
|
||||
pattern:
|
||||
"{{agent_name}} foo {{level}} {{format-time \"%Y-%m-%d %H:%M\"}} {{random 200}}"
|
||||
.into(),
|
||||
replacements: vec![
|
||||
Replacement {
|
||||
replace: "color".into(),
|
||||
with: vec![
|
||||
ReplacementValue::String("red".into()),
|
||||
ReplacementValue::String("blue".into()),
|
||||
ReplacementValue::String("green".into())
|
||||
],
|
||||
},
|
||||
Replacement {
|
||||
replace: "level".into(),
|
||||
with: vec![
|
||||
ReplacementValue::Weighted("info".into(), 800),
|
||||
ReplacementValue::Weighted("warn".into(), 195),
|
||||
ReplacementValue::Weighted("error".into(), 5)
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_fully_supported_spec() -> Result<()> {
|
||||
// The fully supported spec is mostly for manual testing, but we should make
|
||||
// sure while developing that it's valid as well so that when we go to
|
||||
// do manual testing it isn't broken
|
||||
|
||||
// Also read it from the file to test `DataSpec::from_file` rather than
|
||||
// include_str
|
||||
|
||||
let data_spec = DataSpec::from_file("schemas/fully-supported.toml")?;
|
||||
|
||||
assert_eq!(data_spec.name, "demo_schema");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_specifying_vectors_gets_default_empty_vector() {
|
||||
let toml = r#"
|
||||
name = "demo_schema"
|
||||
base_seed = "this is a demo"
|
||||
|
||||
[[agents]]
|
||||
name = "basic"
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "cpu"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "host"
|
||||
pattern = "server"
|
||||
"#;
|
||||
let spec = DataSpec::from_str(toml).unwrap();
|
||||
|
||||
let agent0 = &spec.agents[0];
|
||||
assert!(agent0.tags.is_empty());
|
||||
|
||||
let agent0_measurements = &agent0.measurements;
|
||||
let a0m0 = &agent0_measurements[0];
|
||||
assert!(a0m0.tags.is_empty());
|
||||
|
||||
let a0m0_fields = &a0m0.fields;
|
||||
let a0m0f0 = &a0m0_fields[0];
|
||||
let field_spec = &a0m0f0.field_value_spec;
|
||||
|
||||
assert!(
|
||||
matches!(field_spec, FieldValueSpec::String { replacements, .. } if replacements.is_empty()),
|
||||
"expected a String field with empty replacements; was {:?}",
|
||||
field_spec
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
//! Substituting dynamic values into a template as specified in various places
|
||||
//! in the schema.
|
||||
|
||||
use crate::{specification, DataGenRng, RandomNumberGenerator};
|
||||
use chrono::prelude::*;
|
||||
use handlebars::{
|
||||
Context, Handlebars, Helper, HelperDef, HelperResult, Output, RenderContext, RenderError,
|
||||
};
|
||||
use rand::{distributions::Alphanumeric, seq::SliceRandom, Rng};
|
||||
use serde::Serialize;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{collections::BTreeMap, convert::TryInto, sync::Mutex};
|
||||
|
||||
/// Substitution-specific Results
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Errors that may happen while substituting values into templates.
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when substituting placeholder values
|
||||
#[snafu(display(
|
||||
"Could not perform text substitution in `{}`, caused by:\n{}",
|
||||
template,
|
||||
source
|
||||
))]
|
||||
CantCompileTemplate {
|
||||
/// Underlying Handlebars error that caused this problem
|
||||
source: handlebars::TemplateError,
|
||||
/// Template that caused this problem
|
||||
template: String,
|
||||
},
|
||||
|
||||
/// Error that may happen when substituting placeholder values
|
||||
#[snafu(display(
|
||||
"Could not perform text substitution in `{}`, caused by:\n{}",
|
||||
template,
|
||||
source
|
||||
))]
|
||||
CantPerformSubstitution {
|
||||
/// Underlying Handlebars error that caused this problem
|
||||
source: handlebars::RenderError,
|
||||
/// Template that caused this problem
|
||||
template: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RandomHelper<T: DataGenRng>(Mutex<RandomNumberGenerator<T>>);
|
||||
|
||||
impl<T: DataGenRng> HelperDef for RandomHelper<T> {
|
||||
fn call<'reg: 'rc, 'rc>(
|
||||
&self,
|
||||
h: &Helper<'_, '_>,
|
||||
_: &Handlebars<'_>,
|
||||
_: &Context,
|
||||
_: &mut RenderContext<'_, '_>,
|
||||
out: &mut dyn Output,
|
||||
) -> HelperResult {
|
||||
let param = h
|
||||
.param(0)
|
||||
.ok_or_else(|| RenderError::new("`random` requires a parameter"))?
|
||||
.value()
|
||||
.as_u64()
|
||||
.ok_or_else(|| RenderError::new("`random`'s parameter must be an unsigned integer"))?
|
||||
.try_into()
|
||||
.map_err(|_| RenderError::new("`random`'s parameter must fit in a usize"))?;
|
||||
|
||||
let rng = &mut *self.0.lock().expect("mutex poisoned");
|
||||
|
||||
let random: String = std::iter::repeat(())
|
||||
.map(|()| rng.sample(Alphanumeric))
|
||||
.map(char::from)
|
||||
.take(param)
|
||||
.collect();
|
||||
|
||||
out.write(&random)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FormatNowHelper;
|
||||
|
||||
impl HelperDef for FormatNowHelper {
|
||||
fn call<'reg: 'rc, 'rc>(
|
||||
&self,
|
||||
h: &Helper<'_, '_>,
|
||||
_: &Handlebars<'_>,
|
||||
c: &Context,
|
||||
_: &mut RenderContext<'_, '_>,
|
||||
out: &mut dyn Output,
|
||||
) -> HelperResult {
|
||||
let format = h
|
||||
.param(0)
|
||||
.ok_or_else(|| RenderError::new("`format-time` requires a parameter"))?
|
||||
.render();
|
||||
|
||||
let timestamp = c
|
||||
.data()
|
||||
.get("timestamp")
|
||||
.and_then(|t| t.as_i64())
|
||||
.expect("Caller of `render` should have set `timestamp` to an `i64` value");
|
||||
|
||||
let datetime = Utc.timestamp_nanos(timestamp);
|
||||
|
||||
out.write(&datetime.format(&format).to_string())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a handlebars template containing placeholders within double curly
|
||||
/// brackets like `{{placeholder}}` and a list of `(placeholder, substitution
|
||||
/// value)` pairs, place the values in the template where the relevant
|
||||
/// placeholder is.
|
||||
#[derive(Debug)]
|
||||
pub struct Substitute {
|
||||
handlebars: Handlebars<'static>,
|
||||
template: String,
|
||||
}
|
||||
|
||||
impl Substitute {
|
||||
/// Compile and evaluate a template once. If you need to evaluate
|
||||
/// it multiple times, construct an instance via [`new`].
|
||||
///
|
||||
/// If a placeholder appears in a template but not in the list of
|
||||
/// substitution values, this will return an error.
|
||||
pub fn once(template: &str, values: &[(&str, &str)]) -> Result<String> {
|
||||
let values = values
|
||||
.iter()
|
||||
.map(|&(k, v)| (k, v))
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
let me = Self::new_minimal(template)?;
|
||||
me.evaluate(&values)
|
||||
}
|
||||
|
||||
/// Compiles the handlebars template once, then allows reusing the
|
||||
/// template multiple times via [`evaluate`]. If you don't need to
|
||||
/// reuse the template, you can use [`once`].
|
||||
pub fn new<T: DataGenRng>(
|
||||
template: impl Into<String>,
|
||||
rng: RandomNumberGenerator<T>,
|
||||
) -> Result<Self> {
|
||||
let mut me = Self::new_minimal(template)?;
|
||||
me.set_random_number_generator(rng);
|
||||
Ok(me)
|
||||
}
|
||||
|
||||
fn new_minimal(template: impl Into<String>) -> Result<Self> {
|
||||
let template = template.into();
|
||||
|
||||
let mut handlebars = Handlebars::new();
|
||||
handlebars.set_strict_mode(true);
|
||||
|
||||
handlebars.register_helper("format-time", Box::new(FormatNowHelper));
|
||||
|
||||
handlebars
|
||||
.register_template_string("template", &template)
|
||||
.context(CantCompileTemplate {
|
||||
template: &template,
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
handlebars,
|
||||
template,
|
||||
})
|
||||
}
|
||||
|
||||
fn set_random_number_generator<T: DataGenRng>(&mut self, rng: RandomNumberGenerator<T>) {
|
||||
self.handlebars
|
||||
.register_helper("random", Box::new(RandomHelper(Mutex::new(rng))));
|
||||
}
|
||||
|
||||
/// Interpolates the values into the compiled template.
|
||||
///
|
||||
/// If a placeholder appears in a template but not in the list of
|
||||
/// substitution values, this will return an error.
|
||||
pub fn evaluate(&self, values: &impl Serialize) -> Result<String> {
|
||||
self.handlebars
|
||||
.render("template", &values)
|
||||
.context(CantPerformSubstitution {
|
||||
template: &self.template,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a random number generator and replacement specification, choose a
|
||||
/// particular value from the list of possible values according to any specified
|
||||
/// weights (or with equal probability if there are no weights).
|
||||
pub fn pick_from_replacements<'a, T: DataGenRng>(
|
||||
rng: &mut RandomNumberGenerator<T>,
|
||||
replacements: &'a [specification::Replacement],
|
||||
) -> BTreeMap<&'a str, &'a str> {
|
||||
replacements
|
||||
.iter()
|
||||
.map(|replacement| {
|
||||
let chosen = replacement
|
||||
.with
|
||||
.choose_weighted(rng, |value| value.weight())
|
||||
.expect("`Replacement` `with` should have items")
|
||||
.value();
|
||||
|
||||
(replacement.replace.as_str(), chosen)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test_rng;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T = (), E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TimestampArgs {
|
||||
timestamp: i64,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_now_valid_strftime() -> Result {
|
||||
let rng = test_rng();
|
||||
let args = TimestampArgs {
|
||||
timestamp: 1599154445000000000,
|
||||
};
|
||||
|
||||
let substitute =
|
||||
Substitute::new(r#"the date is {{format-time "%Y-%m-%d"}}."#, rng).unwrap();
|
||||
|
||||
let value = substitute.evaluate(&args)?;
|
||||
|
||||
assert_eq!(value, "the date is 2020-09-03.");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "a Display implementation returned an error unexpectedly: Error")]
|
||||
fn format_now_invalid_strftime_panics() {
|
||||
let rng = test_rng();
|
||||
let args = TimestampArgs {
|
||||
timestamp: 1599154445000000000,
|
||||
};
|
||||
|
||||
let substitute = Substitute::new(r#"the date is {{format-time "%-B"}}."#, rng).unwrap();
|
||||
|
||||
substitute.evaluate(&args).expect("This is unreachable");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_now_missing_strftime() -> Result {
|
||||
let rng = test_rng();
|
||||
let args = TimestampArgs {
|
||||
timestamp: 1599154445000000000,
|
||||
};
|
||||
|
||||
let substitute = Substitute::new(r#"the date is {{format-time}}."#, rng).unwrap();
|
||||
|
||||
let result = substitute.evaluate(&args);
|
||||
|
||||
// TODO: better matching on the error
|
||||
assert!(result.is_err());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,495 @@
|
|||
//! Generating a set of tag keys and values given a specification
|
||||
|
||||
use crate::{
|
||||
specification,
|
||||
substitution::{pick_from_replacements, Substitute},
|
||||
DataGenRng, RandomNumberGenerator,
|
||||
};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::fmt;
|
||||
|
||||
/// Tag-specific Results
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Errors that may happen while creating tags
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when substituting placeholder values in tag keys
|
||||
#[snafu(display("Could not create tag key, caused by:\n{}", source))]
|
||||
CouldNotCreateTagKey {
|
||||
/// Underlying `substitution` module error that caused this problem
|
||||
source: crate::substitution::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when substituting placeholder values in tag values
|
||||
#[snafu(display(
|
||||
"Could not generate tag value for tag `{}`, caused by:\n{}",
|
||||
key,
|
||||
source
|
||||
))]
|
||||
CouldNotGenerateTagValue {
|
||||
/// The key of the tag we couldn't create a value for
|
||||
key: String,
|
||||
/// Underlying `substitution` module error that caused this problem
|
||||
source: crate::substitution::Error,
|
||||
},
|
||||
}
|
||||
|
||||
/// A generated tag value that will be used in a generated data point.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Tag {
|
||||
/// The key for the tag
|
||||
pub key: String,
|
||||
/// The value for the tag
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
impl Tag {
|
||||
/// Create a new tag with the given key and value.
|
||||
pub fn new(key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
Self {
|
||||
key: key.into(),
|
||||
value: value.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A set of `count` tags that have the same configuration but different
|
||||
/// `tag_id`s.
|
||||
#[derive(Debug)]
|
||||
pub struct TagGeneratorSet<T: DataGenRng> {
|
||||
tags: Vec<TagGenerator<T>>,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> TagGeneratorSet<T> {
|
||||
/// Create a new set of tag generators for a particular agent, measurement,
|
||||
/// and tag specification.
|
||||
pub fn new(
|
||||
agent_id: usize,
|
||||
measurement_id: usize,
|
||||
spec: &specification::TagSpec,
|
||||
parent_seed: impl fmt::Display,
|
||||
) -> Result<Self> {
|
||||
let cardinality = spec.cardinality.unwrap_or(1);
|
||||
|
||||
let seed = format!("{}-{}", parent_seed, spec.name);
|
||||
|
||||
let tags = (0..cardinality)
|
||||
.map(|cardinality| {
|
||||
TagGenerator::new(agent_id, measurement_id, spec, cardinality, &seed)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(Self { tags })
|
||||
}
|
||||
|
||||
/// Generate one set of tags
|
||||
pub fn generate(&mut self) -> Result<Vec<Vec<Tag>>> {
|
||||
self.tags.iter_mut().map(TagGenerator::generate).collect()
|
||||
}
|
||||
|
||||
/// For tags that shouldn't be included in the multi cartesian product
|
||||
/// because they have cardinality 1, this method takes the number of
|
||||
/// lines needed, looks at whether this tag should be resampled or not,
|
||||
/// and generates the number of lines worth of tags requested.
|
||||
pub fn generate_to_zip(&mut self, num_lines: usize) -> Result<Vec<Vec<Tag>>> {
|
||||
// This is a hack. A better way would be to have a different type for tags with
|
||||
// cardinality = 1, and only that type has this method.
|
||||
if self.tags.len() != 1 {
|
||||
panic!("generate_to_zip is only for use with cardinality 1")
|
||||
}
|
||||
(&mut self.tags[0]).generate_to_zip(num_lines)
|
||||
}
|
||||
|
||||
/// The cardinality of this tag configuration, used to figure out how many
|
||||
/// rows each generation will create in total.
|
||||
pub fn tag_cardinality(&self) -> usize {
|
||||
self.tags.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TagGenerator<T: DataGenRng> {
|
||||
agent_id: String,
|
||||
measurement_id: String,
|
||||
tags: Vec<Tag>,
|
||||
cardinality: u32,
|
||||
counter: usize,
|
||||
current_tick: usize,
|
||||
increment_every: Option<usize>,
|
||||
rng: RandomNumberGenerator<T>,
|
||||
replacements: Vec<specification::Replacement>,
|
||||
resample_every_line: bool,
|
||||
}
|
||||
|
||||
impl<T: DataGenRng> TagGenerator<T> {
|
||||
fn new(
|
||||
agent_id: usize,
|
||||
measurement_id: usize,
|
||||
spec: &specification::TagSpec,
|
||||
cardinality: u32,
|
||||
parent_seed: impl fmt::Display,
|
||||
) -> Result<Self> {
|
||||
let count = spec.count.unwrap_or(1);
|
||||
let increment_every = spec.increment_every;
|
||||
let agent_id = agent_id.to_string();
|
||||
let measurement_id = measurement_id.to_string();
|
||||
|
||||
let seed = format!("{}-{}-{}", parent_seed, spec.name, cardinality);
|
||||
let rng = RandomNumberGenerator::<T>::new(seed);
|
||||
|
||||
let tags = (0..count)
|
||||
.map(|tag_id| {
|
||||
let key = Substitute::once(
|
||||
&spec.name,
|
||||
&[
|
||||
("agent_id", &agent_id),
|
||||
("measurement_id", &measurement_id),
|
||||
("tag_id", &tag_id.to_string()),
|
||||
],
|
||||
)
|
||||
.context(CouldNotCreateTagKey)?;
|
||||
|
||||
Ok(Tag {
|
||||
key,
|
||||
value: spec.value.clone(),
|
||||
})
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(Self {
|
||||
agent_id,
|
||||
measurement_id,
|
||||
tags,
|
||||
cardinality,
|
||||
counter: 0,
|
||||
current_tick: 0,
|
||||
increment_every,
|
||||
rng,
|
||||
replacements: spec.replacements.clone(),
|
||||
resample_every_line: spec.resample_every_line,
|
||||
})
|
||||
}
|
||||
|
||||
fn generate(&mut self) -> Result<Vec<Tag>> {
|
||||
let counter = self.increment().to_string();
|
||||
let cardinality_string = self.cardinality.to_string();
|
||||
let guid = self.rng.guid().to_string();
|
||||
|
||||
let mut substitutions = pick_from_replacements(&mut self.rng, &self.replacements);
|
||||
substitutions.insert("agent_id", &self.agent_id);
|
||||
substitutions.insert("measurement_id", &self.measurement_id);
|
||||
substitutions.insert("counter", &counter);
|
||||
substitutions.insert("cardinality", &cardinality_string);
|
||||
substitutions.insert("guid", &guid);
|
||||
let substitutions: Vec<_> = substitutions.into_iter().collect();
|
||||
|
||||
self.tags
|
||||
.iter()
|
||||
.map(|tag| {
|
||||
let key = tag.key.clone();
|
||||
let value = Substitute::once(&tag.value, &substitutions)
|
||||
.context(CouldNotGenerateTagValue { key: &key })?;
|
||||
|
||||
Ok(Tag { key, value })
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// if count and replacements/resampling could never be used on the same tag
|
||||
// configuration, then this could return `Result<Vec<Tag>>` I think. This
|
||||
// could also possibly return an iterator rather than a Vec; the measurement
|
||||
// immediately iterates over it
|
||||
fn generate_to_zip(&mut self, num_lines: usize) -> Result<Vec<Vec<Tag>>> {
|
||||
if self.resample_every_line {
|
||||
Ok((0..num_lines)
|
||||
.map(|_| self.generate())
|
||||
.collect::<Result<_>>()?)
|
||||
} else {
|
||||
let tags = self.generate()?;
|
||||
Ok(std::iter::repeat(tags).take(num_lines).collect())
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the current value and potentially increments the counter for
|
||||
/// next time.
|
||||
fn increment(&mut self) -> usize {
|
||||
let counter = self.counter;
|
||||
|
||||
if let Some(increment) = self.increment_every {
|
||||
self.current_tick += 1;
|
||||
if self.current_tick >= increment {
|
||||
self.counter += 1;
|
||||
self.current_tick = 0;
|
||||
}
|
||||
}
|
||||
|
||||
counter
|
||||
}
|
||||
}
|
||||
|
||||
/// Cycles through each value for each agent tag
|
||||
pub struct AgentTagIterator {
|
||||
iters: Vec<Box<dyn Iterator<Item = Tag>>>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for AgentTagIterator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("AgentTagIterator")
|
||||
.field("iters", &"(dynamic)")
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl AgentTagIterator {
|
||||
/// Create a new iterator to manage the cycling
|
||||
pub fn new(agent_tags: &[specification::AgentTag]) -> Self {
|
||||
Self {
|
||||
iters: agent_tags
|
||||
.iter()
|
||||
.map(|agent_tag| {
|
||||
boxed_cycling_iter(agent_tag.key.clone(), agent_tag.values.clone())
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn boxed_cycling_iter(key: String, values: Vec<String>) -> Box<dyn Iterator<Item = Tag>> {
|
||||
Box::new(values.into_iter().cycle().map(move |v| Tag::new(&key, &v)))
|
||||
}
|
||||
|
||||
impl Iterator for AgentTagIterator {
|
||||
type Item = Vec<Tag>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
Some(self.iters.iter_mut().flat_map(|i| i.next()).collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::{specification::*, ZeroRng, TEST_SEED};
|
||||
|
||||
#[test]
|
||||
fn empty_agent_spec_tag_set_always_returns_empty_vec() {
|
||||
let agent = AgentSpec {
|
||||
tags: vec![],
|
||||
..AgentSpec::default()
|
||||
};
|
||||
|
||||
let mut iter = AgentTagIterator::new(&agent.tags);
|
||||
|
||||
assert_eq!(iter.next().unwrap(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_spec_tag_set() {
|
||||
let tag_alpha = toml::from_str(
|
||||
r#"key = "alpha"
|
||||
values = ["1", "2", "3"]"#,
|
||||
)
|
||||
.unwrap();
|
||||
let tag_omega = toml::from_str(
|
||||
r#"key = "omega"
|
||||
values = ["apple", "grape"]"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let agent = AgentSpec {
|
||||
tags: vec![tag_alpha, tag_omega],
|
||||
..AgentSpec::default()
|
||||
};
|
||||
|
||||
let mut iter = AgentTagIterator::new(&agent.tags);
|
||||
|
||||
assert_eq!(
|
||||
iter.next().unwrap(),
|
||||
vec![Tag::new("alpha", "1"), Tag::new("omega", "apple"),]
|
||||
);
|
||||
assert_eq!(
|
||||
iter.next().unwrap(),
|
||||
vec![Tag::new("alpha", "2"), Tag::new("omega", "grape"),]
|
||||
);
|
||||
assert_eq!(
|
||||
iter.next().unwrap(),
|
||||
vec![Tag::new("alpha", "3"), Tag::new("omega", "apple"),]
|
||||
);
|
||||
assert_eq!(
|
||||
iter.next().unwrap(),
|
||||
vec![Tag::new("alpha", "1"), Tag::new("omega", "grape"),]
|
||||
);
|
||||
assert_eq!(
|
||||
iter.next().unwrap(),
|
||||
vec![Tag::new("alpha", "2"), Tag::new("omega", "apple"),]
|
||||
);
|
||||
assert_eq!(
|
||||
iter.next().unwrap(),
|
||||
vec![Tag::new("alpha", "3"), Tag::new("omega", "grape"),]
|
||||
);
|
||||
assert_eq!(
|
||||
iter.next().unwrap(),
|
||||
vec![Tag::new("alpha", "1"), Tag::new("omega", "apple"),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_the_tag_substitutions_everywhere() -> Result<()> {
|
||||
let spec = TagSpec {
|
||||
name: "{{agent_id}}x{{measurement_id}}x{{tag_id}}".into(),
|
||||
value: "{{agent_id}}v{{measurement_id}}v{{cardinality}}v{{counter}}".into(),
|
||||
count: Some(2),
|
||||
cardinality: Some(3),
|
||||
increment_every: Some(1),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &spec, TEST_SEED)?;
|
||||
|
||||
let tags = tg.generate()?;
|
||||
assert_eq!(
|
||||
vec![
|
||||
vec![
|
||||
Tag::new("22x33x0", "22v33v0v0"),
|
||||
Tag::new("22x33x1", "22v33v0v0"),
|
||||
],
|
||||
vec![
|
||||
Tag::new("22x33x0", "22v33v1v0"),
|
||||
Tag::new("22x33x1", "22v33v1v0"),
|
||||
],
|
||||
vec![
|
||||
Tag::new("22x33x0", "22v33v2v0"),
|
||||
Tag::new("22x33x1", "22v33v2v0"),
|
||||
],
|
||||
],
|
||||
tags
|
||||
);
|
||||
|
||||
let tags = tg.generate()?;
|
||||
assert_eq!(
|
||||
vec![
|
||||
vec![
|
||||
Tag::new("22x33x0", "22v33v0v1"),
|
||||
Tag::new("22x33x1", "22v33v0v1"),
|
||||
],
|
||||
vec![
|
||||
Tag::new("22x33x0", "22v33v1v1"),
|
||||
Tag::new("22x33x1", "22v33v1v1"),
|
||||
],
|
||||
vec![
|
||||
Tag::new("22x33x0", "22v33v2v1"),
|
||||
Tag::new("22x33x1", "22v33v2v1"),
|
||||
],
|
||||
],
|
||||
tags
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_replacements() -> Result<()> {
|
||||
let host_tag_spec: specification::TagSpec = toml::from_str(
|
||||
r#"name = "host"
|
||||
value = "{{host}}"
|
||||
replacements = [
|
||||
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
|
||||
]"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
|
||||
|
||||
let tags = tg.generate()?;
|
||||
|
||||
assert_eq!(vec![vec![Tag::new("host", "serverA")]], tags);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_to_zip_with_resample() -> Result<()> {
|
||||
let host_tag_spec: specification::TagSpec = toml::from_str(
|
||||
r#"name = "host"
|
||||
value = "{{host}}"
|
||||
replacements = [
|
||||
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
|
||||
]
|
||||
resample_every_line = true
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
|
||||
|
||||
let tags = tg.generate_to_zip(3)?;
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
vec![Tag::new("host", "serverA")],
|
||||
vec![Tag::new("host", "serverA")],
|
||||
vec![Tag::new("host", "serverA")],
|
||||
],
|
||||
tags
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_to_zip_without_resample() -> Result<()> {
|
||||
let host_tag_spec: specification::TagSpec = toml::from_str(
|
||||
r#"name = "host"
|
||||
value = "{{host}}"
|
||||
replacements = [
|
||||
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
|
||||
]
|
||||
resample_every_line = false
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
|
||||
|
||||
let tags = tg.generate_to_zip(3)?;
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
vec![Tag::new("host", "serverA")],
|
||||
vec![Tag::new("host", "serverA")],
|
||||
vec![Tag::new("host", "serverA")],
|
||||
],
|
||||
tags
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_to_zip_with_default_no_resample() -> Result<()> {
|
||||
let host_tag_spec: specification::TagSpec = toml::from_str(
|
||||
r#"name = "host"
|
||||
value = "{{host}}"
|
||||
replacements = [
|
||||
{replace = "host", with = ["serverA", "serverB", "serverC", "serverD"]},
|
||||
]"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut tg = TagGeneratorSet::<ZeroRng>::new(22, 33, &host_tag_spec, TEST_SEED)?;
|
||||
|
||||
let tags = tg.generate_to_zip(3)?;
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
vec![Tag::new("host", "serverA")],
|
||||
vec![Tag::new("host", "serverA")],
|
||||
vec![Tag::new("host", "serverA")]
|
||||
],
|
||||
tags
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,361 @@
|
|||
//! Writing generated points
|
||||
|
||||
use futures::stream;
|
||||
use influxdb2_client::models::{DataPoint, PostBucketRequest, WriteDataPoint};
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
#[cfg(test)]
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
use std::{
|
||||
fs,
|
||||
fs::OpenOptions,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use tracing::info;
|
||||
|
||||
/// Errors that may happen while writing points.
|
||||
#[derive(Snafu, Debug)]
|
||||
pub enum Error {
|
||||
/// Error that may happen when writing line protocol to a no-op sink
|
||||
#[snafu(display("Could not generate line protocol: {}", source))]
|
||||
CantWriteToNoOp {
|
||||
/// Underlying IO error that caused this problem
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when writing line protocol to a file
|
||||
#[snafu(display("Could not write line protocol to file: {}", source))]
|
||||
CantWriteToLineProtocolFile {
|
||||
/// Underlying IO error that caused this problem
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when creating a directory to store files to write
|
||||
/// to
|
||||
#[snafu(display("Could not create directory: {}", source))]
|
||||
CantCreateDirectory {
|
||||
/// Underlying IO error that caused this problem
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen when checking a path's metadata to see if it's a
|
||||
/// directory
|
||||
#[snafu(display("Could not get metadata: {}", source))]
|
||||
CantGetMetadata {
|
||||
/// Underlying IO error that caused this problem
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
/// Error that may happen if the path given to the file-based writer isn't a
|
||||
/// directory
|
||||
#[snafu(display("Expected to get a directory"))]
|
||||
MustBeDirectory,
|
||||
|
||||
/// Error that may happen while writing points to the API
|
||||
#[snafu(display("Could not write points to API: {}", source))]
|
||||
CantWriteToApi {
|
||||
/// Underlying Influx client request error that caused this problem
|
||||
source: influxdb2_client::RequestError,
|
||||
},
|
||||
|
||||
/// Error that may happen while trying to create a bucket via the API
|
||||
#[snafu(display("Could not create bucket: {}", source))]
|
||||
CantCreateBucket {
|
||||
/// Underlying Influx client request error that caused this problem
|
||||
source: influxdb2_client::RequestError,
|
||||
},
|
||||
|
||||
/// Error that may happen if attempting to create a bucket without
|
||||
/// specifying the org ID
|
||||
#[snafu(display("Could not create a bucket without an `org_id`"))]
|
||||
OrgIdRequiredToCreateBucket,
|
||||
}
|
||||
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Responsible for holding shared configuration needed to construct per-agent
|
||||
/// points writers
|
||||
#[derive(Debug)]
|
||||
pub struct PointsWriterBuilder {
|
||||
config: PointsWriterConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum PointsWriterConfig {
|
||||
Api {
|
||||
client: influxdb2_client::Client,
|
||||
org: String,
|
||||
bucket: String,
|
||||
},
|
||||
Directory(PathBuf),
|
||||
NoOp {
|
||||
perform_write: bool,
|
||||
},
|
||||
#[cfg(test)]
|
||||
Vector(BTreeMap<String, Arc<Mutex<Vec<u8>>>>),
|
||||
}
|
||||
|
||||
impl PointsWriterBuilder {
|
||||
/// Write points to the API at the specified host and put them in the
|
||||
/// specified org and bucket.
|
||||
pub async fn new_api(
|
||||
host: impl Into<String>,
|
||||
org: impl Into<String>,
|
||||
bucket: impl Into<String>,
|
||||
token: impl Into<String>,
|
||||
create_bucket: bool,
|
||||
org_id: Option<&str>,
|
||||
) -> Result<Self> {
|
||||
let host = host.into();
|
||||
|
||||
// Be somewhat lenient on what we accept as far as host; the client expects the
|
||||
// protocol to be included. We could pull in the url crate and do more
|
||||
// verification here.
|
||||
let host = if host.starts_with("http") {
|
||||
host
|
||||
} else {
|
||||
format!("http://{}", host)
|
||||
};
|
||||
|
||||
let client = influxdb2_client::Client::new(host, token.into());
|
||||
let org = org.into();
|
||||
let bucket = bucket.into();
|
||||
|
||||
if create_bucket {
|
||||
let org_id = org_id.context(OrgIdRequiredToCreateBucket)?.to_string();
|
||||
let bucket = PostBucketRequest {
|
||||
org_id,
|
||||
name: bucket.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
client
|
||||
.create_bucket(Some(bucket))
|
||||
.await
|
||||
.context(CantCreateBucket)?;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
config: PointsWriterConfig::Api {
|
||||
client,
|
||||
org,
|
||||
bucket,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/// Write points to a file in the directory specified.
|
||||
pub fn new_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||
fs::create_dir_all(&path).context(CantCreateDirectory)?;
|
||||
let metadata = fs::metadata(&path).context(CantGetMetadata)?;
|
||||
ensure!(metadata.is_dir(), MustBeDirectory);
|
||||
|
||||
Ok(Self {
|
||||
config: PointsWriterConfig::Directory(PathBuf::from(path.as_ref())),
|
||||
})
|
||||
}
|
||||
|
||||
/// Generate points but do not write them anywhere
|
||||
pub fn new_no_op(perform_write: bool) -> Self {
|
||||
Self {
|
||||
config: PointsWriterConfig::NoOp { perform_write },
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a writer out of this writer's configuration for a particular
|
||||
/// agent that runs in a separate thread/task.
|
||||
pub fn build_for_agent(&mut self, agent_name: &str) -> PointsWriter {
|
||||
let inner_writer = match &mut self.config {
|
||||
PointsWriterConfig::Api {
|
||||
client,
|
||||
org,
|
||||
bucket,
|
||||
} => InnerPointsWriter::Api {
|
||||
client: client.clone(),
|
||||
org: org.clone(),
|
||||
bucket: bucket.clone(),
|
||||
},
|
||||
PointsWriterConfig::Directory(dir_path) => {
|
||||
let mut filename = dir_path.clone();
|
||||
filename.push(agent_name);
|
||||
filename.set_extension("txt");
|
||||
InnerPointsWriter::File(filename)
|
||||
}
|
||||
PointsWriterConfig::NoOp { perform_write } => InnerPointsWriter::NoOp {
|
||||
perform_write: *perform_write,
|
||||
},
|
||||
#[cfg(test)]
|
||||
PointsWriterConfig::Vector(ref mut agents_by_name) => {
|
||||
let v = agents_by_name
|
||||
.entry(agent_name.to_string())
|
||||
.or_insert_with(|| Arc::new(Mutex::new(Vec::new())));
|
||||
InnerPointsWriter::Vec(Arc::clone(v))
|
||||
}
|
||||
};
|
||||
|
||||
PointsWriter { inner_writer }
|
||||
}
|
||||
}
|
||||
|
||||
/// Responsible for writing points to the location it's been configured for.
|
||||
#[derive(Debug)]
|
||||
pub struct PointsWriter {
|
||||
inner_writer: InnerPointsWriter,
|
||||
}
|
||||
|
||||
impl PointsWriter {
|
||||
/// Write these points
|
||||
pub async fn write_points(&mut self, points: Vec<DataPoint>) -> Result<()> {
|
||||
self.inner_writer.write_points(points).await
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum InnerPointsWriter {
|
||||
Api {
|
||||
client: influxdb2_client::Client,
|
||||
org: String,
|
||||
bucket: String,
|
||||
},
|
||||
File(PathBuf),
|
||||
NoOp {
|
||||
perform_write: bool,
|
||||
},
|
||||
#[cfg(test)]
|
||||
Vec(Arc<Mutex<Vec<u8>>>),
|
||||
}
|
||||
|
||||
impl InnerPointsWriter {
|
||||
async fn write_points(&mut self, points: Vec<DataPoint>) -> Result<()> {
|
||||
match self {
|
||||
Self::Api {
|
||||
client,
|
||||
org,
|
||||
bucket,
|
||||
} => {
|
||||
client
|
||||
.write(org, bucket, stream::iter(points))
|
||||
.await
|
||||
.context(CantWriteToApi)?;
|
||||
}
|
||||
Self::File(filename) => {
|
||||
info!("Opening file {:?}", filename);
|
||||
let num_points = points.len();
|
||||
let file = OpenOptions::new()
|
||||
.append(true)
|
||||
.create(true)
|
||||
.open(&filename)
|
||||
.context(CantWriteToLineProtocolFile)?;
|
||||
|
||||
let mut file = std::io::BufWriter::new(file);
|
||||
for point in points {
|
||||
point
|
||||
.write_data_point_to(&mut file)
|
||||
.context(CantWriteToLineProtocolFile)?;
|
||||
}
|
||||
info!("Wrote {} points to {:?}", num_points, filename);
|
||||
}
|
||||
Self::NoOp { perform_write } => {
|
||||
if *perform_write {
|
||||
let mut sink = std::io::sink();
|
||||
|
||||
for point in points {
|
||||
point
|
||||
.write_data_point_to(&mut sink)
|
||||
.context(CantWriteToNoOp)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
Self::Vec(ref mut vec) => {
|
||||
let vec_ref = Arc::clone(vec);
|
||||
let mut vec = vec_ref.lock().expect("Should be able to get lock");
|
||||
for point in points {
|
||||
point
|
||||
.write_data_point_to(&mut *vec)
|
||||
.expect("Should be able to write to vec");
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::{generate, now_ns, specification::*, ZeroRng};
|
||||
use std::str::FromStr;
|
||||
|
||||
type Error = Box<dyn std::error::Error>;
|
||||
type Result<T = (), E = Error> = std::result::Result<T, E>;
|
||||
|
||||
impl PointsWriterBuilder {
|
||||
fn new_vec() -> Self {
|
||||
Self {
|
||||
config: PointsWriterConfig::Vector(BTreeMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
fn written_data(self, agent_name: &str) -> String {
|
||||
match self.config {
|
||||
PointsWriterConfig::Vector(agents_by_name) => {
|
||||
let bytes_ref = agents_by_name
|
||||
.get(agent_name)
|
||||
.expect("Should have written some data, did not find any for this agent")
|
||||
.clone();
|
||||
let bytes = bytes_ref
|
||||
.lock()
|
||||
.expect("Should have been able to get a lock");
|
||||
String::from_utf8(bytes.to_vec()).expect("we should be generating valid UTF-8")
|
||||
}
|
||||
_ => unreachable!("this method is only valid when writing to a vector for testing"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate() -> Result<()> {
|
||||
let toml = r#"
|
||||
name = "demo_schema"
|
||||
base_seed = "this is a demo"
|
||||
|
||||
[[agents]]
|
||||
name = "basic"
|
||||
|
||||
[[agents.measurements]]
|
||||
name = "cpu"
|
||||
|
||||
[[agents.measurements.fields]]
|
||||
name = "up"
|
||||
bool = true"#;
|
||||
|
||||
let data_spec = DataSpec::from_str(toml).unwrap();
|
||||
let mut points_writer_builder = PointsWriterBuilder::new_vec();
|
||||
|
||||
let now = now_ns();
|
||||
|
||||
generate::<ZeroRng>(
|
||||
&data_spec,
|
||||
&mut points_writer_builder,
|
||||
Some(now),
|
||||
Some(now),
|
||||
now,
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let line_protocol = points_writer_builder.written_data("basic");
|
||||
|
||||
let expected_line_protocol = format!(
|
||||
r#"cpu,data_spec=demo_schema up=f {}
|
||||
"#,
|
||||
now
|
||||
);
|
||||
assert_eq!(line_protocol, expected_line_protocol);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -6,8 +6,7 @@
|
|||
# ./scripts/genlp.py | head -n 2000
|
||||
# ```
|
||||
#
|
||||
# Please use https://github.com/influxdata/iox_data_generator for anything
|
||||
# more complicated.
|
||||
# Please use iox_data_generator for anything more complicated.
|
||||
#
|
||||
|
||||
from signal import signal, SIGPIPE, SIG_DFL
|
||||
|
|
Loading…
Reference in New Issue