chore: Merge branch 'main' into ntran/more_delete_tests

pull/24376/head
Nga Tran 2021-09-22 16:47:15 -04:00
commit 2399a932fb
66 changed files with 4680 additions and 1320 deletions

View File

@ -151,7 +151,7 @@ jobs:
- cache_restore
- run:
name: Cargo test
command: cargo test --features=jaeger --workspace
command: cargo test --workspace
- cache_save
# end to end tests with Heappy (heap profiling enabled)
@ -278,7 +278,7 @@ jobs:
command: cargo test --workspace --benches --no-run
- run:
name: Build with object store + exporter support + HEAP profiling
command: cargo build --no-default-features --features="aws,gcp,azure,jaeger,heappy,pprof"
command: cargo build --no-default-features --features="aws,gcp,azure,heappy,pprof"
- cache_save
# Lint protobufs.
@ -337,10 +337,10 @@ jobs:
- cache_restore
- run:
name: Print rustc target CPU options
command: cargo run --release --no-default-features --features="aws,gcp,azure,jaeger,heappy" --bin print_cpu
command: cargo run --release --no-default-features --features="aws,gcp,azure,heappy" --bin print_cpu
- run:
name: Cargo release build with target arch set for CRoaring
command: cargo build --release --no-default-features --features="aws,gcp,azure,jaeger,heappy"
command: cargo build --release --no-default-features --features="aws,gcp,azure,heappy"
- run: |
echo sha256sum after build is
sha256sum target/release/influxdb_iox

1
.gitattributes vendored
View File

@ -1,3 +1,4 @@
generated_types/protos/google/ linguist-generated=true
generated_types/protos/grpc/ linguist-generated=true
generated_types/src/wal_generated.rs linguist-generated=true
trace_exporters/src/thrift/ linguist-generated=true

View File

@ -265,7 +265,7 @@ docker run -d --name jaeger \
### Step 2: Run IOx configured to send traces to the local Jaeger instance
Build IOx with `--features=jaeger` and run with the following environment variables set:
Build IOx and run with the following environment variable set:
```
TRACES_EXPORTER=jaeger
TRACES_EXPORTER_JAEGER_AGENT_HOST=localhost
@ -274,7 +274,7 @@ TRACES_EXPORTER_JAEGER_AGENT_PORT=6831
For example, a command such as this should do the trick:
```shell
TRACES_EXPORTER=jaeger TRACES_EXPORTER_JAEGER_AGENT_HOST=localhost TRACES_EXPORTER_JAEGER_AGENT_PORT=6831 cargo run --features=jaeger -- run -v --object-store=file --data-dir=$HOME/.influxdb_iox --server-id=42
TRACES_EXPORTER=jaeger TRACES_EXPORTER_JAEGER_AGENT_HOST=localhost TRACES_EXPORTER_JAEGER_AGENT_PORT=6831 cargo run -- run -v --server-id=42
```
### Step 3: Send a request with trace context
@ -286,8 +286,8 @@ For IOx to emit traces, the request must have a span context set. You can use th
./target/debug/influxdb_iox database create my_db
# load data
./target/debug/influxdb_iox database write my_db tests/fixtures/lineproto/metrics.lp
# run a query and include a span context
./target/debug/influxdb_iox database query my_db 'show tables' --header uber-trace-id:4459495:30434:0:1
# run a query and start a new trace
./target/debug/influxdb_iox database query my_db 'show tables' --header jaeger-debug-id:tracing-is-a-great-idea
```
### Step 4: Explore Spans in the UI

100
Cargo.lock generated
View File

@ -425,9 +425,9 @@ dependencies = [
[[package]]
name = "bstr"
version = "0.2.16"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90682c8d613ad3373e66de8c6411e0ae2ab2571e879d2efbf73558cc66f21279"
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
dependencies = [
"lazy_static",
"memchr",
@ -677,7 +677,7 @@ dependencies = [
"criterion-plot",
"csv",
"futures",
"itertools 0.10.1",
"itertools",
"lazy_static",
"num-traits",
"oorandom",
@ -700,7 +700,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
dependencies = [
"cast",
"itertools 0.10.1",
"itertools",
]
[[package]]
@ -828,7 +828,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "5.1.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=225825634cca55f210d0366422f1543893be67c2#225825634cca55f210d0366422f1543893be67c2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=1c858ce7baab1929cfdba97051ef4e5e4d0a866b#1c858ce7baab1929cfdba97051ef4e5e4d0a866b"
dependencies = [
"ahash",
"arrow",
@ -1259,7 +1259,6 @@ dependencies = [
"bytes",
"chrono",
"data_types",
"futures",
"google_types",
"num_cpus",
"observability_deps",
@ -1270,7 +1269,6 @@ dependencies = [
"prost-build",
"regex",
"serde",
"serde_json",
"thiserror",
"tonic",
"tonic-build",
@ -1483,9 +1481,9 @@ dependencies = [
[[package]]
name = "http"
version = "0.2.4"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11"
checksum = "1323096b05d41827dadeaee54c9981958c0f94e670bc94ed80037d1a7b8b186b"
dependencies = [
"bytes",
"fnv",
@ -1676,7 +1674,7 @@ dependencies = [
"influxdb_storage_client",
"internal_types",
"iox_object_store",
"itertools 0.10.1",
"itertools",
"libc",
"log",
"logfmt",
@ -1815,8 +1813,7 @@ dependencies = [
"futures",
"hashbrown 0.11.2",
"indexmap",
"itertools 0.10.1",
"observability_deps",
"itertools",
"snafu",
"tokio",
]
@ -1836,7 +1833,7 @@ dependencies = [
"humantime",
"influxdb2_client",
"influxdb_iox_client",
"itertools 0.9.0",
"itertools",
"packers",
"rand",
"rand_core",
@ -1875,15 +1872,6 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9"
[[package]]
name = "itertools"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.10.1"
@ -2232,7 +2220,6 @@ dependencies = [
"data_types",
"entry",
"hashbrown 0.11.2",
"influxdb_line_protocol",
"internal_types",
"metric",
"observability_deps",
@ -2512,7 +2499,7 @@ dependencies = [
"futures",
"futures-test",
"indexmap",
"itertools 0.10.1",
"itertools",
"percent-encoding",
"reqwest",
"rusoto_core",
@ -2592,46 +2579,6 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "opentelemetry"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf9b1c4e9a6c4de793c632496fa490bdc0e1eea73f0c91394f7b6990935d22"
dependencies = [
"async-trait",
"crossbeam-channel",
"futures",
"js-sys",
"lazy_static",
"percent-encoding",
"pin-project",
"rand",
"thiserror",
]
[[package]]
name = "opentelemetry-jaeger"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db22f492873ea037bc267b35a0e8e4fb846340058cb7c864efe3d0bf23684593"
dependencies = [
"async-trait",
"lazy_static",
"opentelemetry",
"opentelemetry-semantic-conventions",
"thiserror",
"thrift",
]
[[package]]
name = "opentelemetry-semantic-conventions"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffeac823339e8b0f27b961f4385057bf9f97f2863bc745bd015fd6091f2270e9"
dependencies = [
"opentelemetry",
]
[[package]]
name = "ordered-float"
version = "1.1.1"
@ -2783,7 +2730,6 @@ dependencies = [
"thrift",
"tokio",
"tokio-stream",
"tracker",
"uuid",
"zstd",
]
@ -2808,7 +2754,7 @@ name = "pbjson_build"
version = "0.1.0"
dependencies = [
"heck",
"itertools 0.10.1",
"itertools",
"pbjson_test",
"prost",
"prost-types",
@ -3042,7 +2988,7 @@ checksum = "c143348f141cc87aab5b950021bac6145d0e5ae754b0591de23244cee42c9308"
dependencies = [
"difflib",
"float-cmp",
"itertools 0.10.1",
"itertools",
"normalize-line-endings",
"predicates-core",
"regex",
@ -3165,7 +3111,7 @@ checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603"
dependencies = [
"bytes",
"heck",
"itertools 0.10.1",
"itertools",
"log",
"multimap",
"petgraph",
@ -3182,7 +3128,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba"
dependencies = [
"anyhow",
"itertools 0.10.1",
"itertools",
"proc-macro2",
"quote",
"syn",
@ -3213,7 +3159,7 @@ dependencies = [
"futures",
"hashbrown 0.11.2",
"internal_types",
"itertools 0.10.1",
"itertools",
"libc",
"observability_deps",
"parking_lot",
@ -3417,7 +3363,7 @@ dependencies = [
"either",
"hashbrown 0.11.2",
"internal_types",
"itertools 0.10.1",
"itertools",
"metric",
"observability_deps",
"packers",
@ -3899,7 +3845,7 @@ dependencies = [
"influxdb_line_protocol",
"internal_types",
"iox_object_store",
"itertools 0.10.1",
"itertools",
"lifecycle",
"metric",
"mutable_buffer",
@ -4356,9 +4302,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokio"
version = "1.11.0"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4efe6fc2395938c8155973d7be49fe8d03a843726e285e100a8a383cc0154ce"
checksum = "c2c2416fdedca8443ae44b4527de1ea633af61d8f7169ffa6e72c5b53d24efcc"
dependencies = [
"autocfg",
"bytes",
@ -4573,12 +4519,10 @@ dependencies = [
"chrono",
"futures",
"observability_deps",
"opentelemetry",
"opentelemetry-jaeger",
"snafu",
"structopt",
"thrift",
"tokio",
"tokio-util",
"trace",
]
@ -4590,7 +4534,7 @@ dependencies = [
"hashbrown 0.11.2",
"http",
"http-body",
"itertools 0.10.1",
"itertools",
"metric",
"observability_deps",
"parking_lot",

View File

@ -191,7 +191,6 @@ default = ["jemalloc_replacing_malloc"]
azure = ["object_store/azure"] # Optional Azure Object store support
gcp = ["object_store/gcp"] # Optional GCP object store support
aws = ["object_store/aws"] # Optional AWS / S3 object store support
jaeger = ["trace_exporters/jaeger"] # Enable optional jaeger tracing support
# pprof is an optional feature for pprof support
# heappy is an optional feature; Not on by default as it

View File

@ -13,7 +13,7 @@ RUN \
--mount=type=cache,id=influxdb_iox_git,sharing=locked,target=/usr/local/cargo/git \
--mount=type=cache,id=influxdb_iox_target,sharing=locked,target=/influxdb_iox/target \
du -cshx /usr/local/cargo/registry /usr/local/cargo/git /influxdb_iox/target && \
cargo build --target-dir /influxdb_iox/target --release --features azure,gcp,aws,jaeger,pprof && \
cargo build --target-dir /influxdb_iox/target --release --features azure,gcp,aws,pprof && \
cp /influxdb_iox/target/release/influxdb_iox /root/influxdb_iox && \
du -cshx /usr/local/cargo/registry /usr/local/cargo/git /influxdb_iox/target

View File

@ -9,4 +9,4 @@ description = "Re-exports datafusion at a specific version"
# Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. various crypo functions)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="225825634cca55f210d0366422f1543893be67c2", default-features = false, package = "datafusion" }
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="1c858ce7baab1929cfdba97051ef4e5e4d0a866b", default-features = false, package = "datafusion" }

View File

@ -7,20 +7,18 @@ edition = "2018"
[dependencies] # In alphabetical order
bytes = "1.0"
data_types = { path = "../data_types" }
futures = "0.3"
google_types = { path = "../google_types" }
num_cpus = "1.13.0"
observability_deps = { path = "../observability_deps" }
pbjson = { path = "../pbjson" }
prost = "0.8"
regex = "1.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.67"
thiserror = "1.0.28"
tonic = "0.5"
[dev-dependencies]
chrono = { version = "0.4", features = ["serde"] }
num_cpus = "1.13.0"
[build-dependencies] # In alphabetical order
# Pin specific version of the tonic-build dependencies to match arrow

View File

@ -11,7 +11,6 @@ arrow = { version = "5.0", features = ["prettyprint"] }
hashbrown = "0.11"
indexmap = "1.6"
itertools = "0.10.1"
observability_deps = { path = "../observability_deps" }
snafu = "0.6"
tokio = { version = "1.11", features = ["sync"] }

View File

@ -17,7 +17,7 @@ generated_types = { path = "../generated_types" }
influxdb2_client = { path = "../influxdb2_client" }
influxdb_iox_client = { path = "../influxdb_iox_client" }
packers = { path = "../packers" }
itertools = "0.9.0"
itertools = "0.10.0"
rand = { version = "0.8.3", features = ["small_rng"] }
rand_core = "0.6.2"
rand_seeder = "0.2.1"

View File

@ -21,7 +21,6 @@ chrono = "0.4"
data_types = { path = "../data_types" }
entry = { path = "../entry" }
hashbrown = "0.11"
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
internal_types = { path = "../internal_types" }
metric = { path = "../metric" }
observability_deps = { path = "../observability_deps" }

View File

@ -27,13 +27,13 @@ snafu = "0.6.10"
tokio = { version = "1.11", features = ["macros", "fs"] }
# Filesystem integration
tokio-util = { version = "0.6.3", features = [ "io" ] }
reqwest = "0.11"
reqwest = { version = "0.11", optional = true }
# Filesystem integration
walkdir = "2"
tempfile = "3.1.0"
[features]
azure = ["azure_core", "azure_storage", "indexmap"]
azure = ["azure_core", "azure_storage", "indexmap", "reqwest"]
gcp = ["cloud-storage"]
aws = ["rusoto_core", "rusoto_credential", "rusoto_s3"]

View File

@ -31,7 +31,6 @@ tempfile = "3.1.0"
thrift = "0.13"
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
tokio-stream = "0.1"
tracker = { path = "../tracker" }
uuid = { version = "0.8", features = ["serde", "v4"] }
zstd = "0.9"

View File

@ -12,8 +12,8 @@ use snafu::{ResultExt, Snafu};
use crate::catalog::{
core::PreservedCatalog,
interface::{
CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateDeletePredicateError,
CatalogStateRemoveError, ChunkAddrWithoutDatabase,
CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError,
ChunkAddrWithoutDatabase,
},
};
@ -146,10 +146,9 @@ impl CatalogState for TracerCatalogState {
&mut self,
_predicate: Arc<Predicate>,
_chunks: Vec<ChunkAddrWithoutDatabase>,
) -> Result<(), CatalogStateDeletePredicateError> {
) {
// No need to track delete predicates, because the cleanup's job is to remove unreferenced parquet files. Delete
// predicates however are stored directly within the preserved catalog and therefore don't need pruning.
Ok(())
}
}

View File

@ -147,11 +147,6 @@ pub enum Error {
source: crate::catalog::interface::CatalogStateRemoveError,
},
#[snafu(display("Cannot add delete predicate: {}", source))]
DeletePredicateError {
source: crate::catalog::interface::CatalogStateDeletePredicateError,
},
#[snafu(display("Cannot serialize predicate: {}", source))]
CannotSerializePredicate {
source: predicate::serialize::SerializeError,
@ -591,9 +586,7 @@ impl OpenTransaction {
chunk_id: ChunkId::new(chunk.chunk_id),
})
.collect();
state
.delete_predicate(predicate, chunks)
.context(DeletePredicateError)?;
state.delete_predicate(predicate, chunks);
}
};
Ok(())
@ -1739,12 +1732,12 @@ mod tests {
let predicate_1 = create_delete_predicate(&chunk_addrs[0].table_name, 42);
let chunks_1 = vec![chunk_addrs[0].clone().into()];
t.delete_predicate(&predicate_1, &chunks_1).unwrap();
state.delete_predicate(predicate_1, chunks_1).unwrap();
state.delete_predicate(predicate_1, chunks_1);
let predicate_2 = create_delete_predicate(&chunk_addrs[0].table_name, 1337);
let chunks_2 = vec![chunk_addrs[0].clone().into(), chunk_addrs[1].clone().into()];
t.delete_predicate(&predicate_2, &chunks_2).unwrap();
state.delete_predicate(predicate_2, chunks_2).unwrap();
state.delete_predicate(predicate_2, chunks_2);
t.commit().await.unwrap();
}

View File

@ -93,13 +93,6 @@ pub enum CatalogStateRemoveError {
ParquetFileDoesNotExist { path: ParquetFilePath },
}
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum CatalogStateDeletePredicateError {
#[snafu(display("Chunk does not exist in catalog: {}", chunk))]
ChunkDoesNotExist { chunk: ChunkAddrWithoutDatabase },
}
/// Abstraction over how the in-memory state of the catalog works.
pub trait CatalogState {
/// Input to create a new empty instance.
@ -127,7 +120,7 @@ pub trait CatalogState {
&mut self,
predicate: Arc<Predicate>,
chunks: Vec<ChunkAddrWithoutDatabase>,
) -> Result<(), CatalogStateDeletePredicateError>;
);
}
/// Structure that holds all information required to create a checkpoint.

View File

@ -10,15 +10,14 @@ use std::{
use data_types::chunk_metadata::ChunkId;
use iox_object_store::{IoxObjectStore, ParquetFilePath, TransactionFilePath};
use predicate::predicate::Predicate;
use snafu::{OptionExt, ResultExt};
use snafu::ResultExt;
use crate::{
catalog::{
core::PreservedCatalog,
interface::{
CatalogParquetInfo, CatalogState, CatalogStateAddError,
CatalogStateDeletePredicateError, CatalogStateRemoveError, CheckpointData,
ChunkAddrWithoutDatabase,
CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError,
CheckpointData, ChunkAddrWithoutDatabase,
},
internals::{
proto_io::{load_transaction_proto, store_transaction_proto},
@ -200,30 +199,19 @@ impl CatalogState for TestCatalogState {
&mut self,
predicate: Arc<Predicate>,
chunks: Vec<ChunkAddrWithoutDatabase>,
) -> Result<(), CatalogStateDeletePredicateError> {
use crate::catalog::interface::ChunkDoesNotExist;
) {
for addr in chunks {
self.tables
if let Some(chunk) = self
.tables
.get_mut(&addr.table_name)
.context(ChunkDoesNotExist {
chunk: addr.clone(),
})?
.partitions
.get_mut(&addr.partition_key)
.context(ChunkDoesNotExist {
chunk: addr.clone(),
})?
.chunks
.get_mut(&addr.chunk_id)
.context(ChunkDoesNotExist {
chunk: addr.clone(),
})?
.delete_predicates
.push(Arc::clone(&predicate));
.map(|table| table.partitions.get_mut(&addr.partition_key))
.flatten()
.map(|partition| partition.chunks.get_mut(&addr.chunk_id))
.flatten()
{
chunk.delete_predicates.push(Arc::clone(&predicate));
}
}
Ok(())
}
}
@ -535,17 +523,13 @@ where
// first predicate used only a single chunk
let predicate_1 = create_delete_predicate(&chunk_addr_1.table_name, 1);
let chunks_1 = vec![chunk_addr_1.clone().into()];
state
.delete_predicate(Arc::clone(&predicate_1), chunks_1.clone())
.unwrap();
state.delete_predicate(Arc::clone(&predicate_1), chunks_1.clone());
expected_predicates.push((predicate_1, chunks_1));
// second predicate uses both chunks (but not the older chunks)
let predicate_2 = create_delete_predicate(&chunk_addr_2.table_name, 2);
let chunks_2 = vec![chunk_addr_1.into(), chunk_addr_2.into()];
state
.delete_predicate(Arc::clone(&predicate_2), chunks_2.clone())
.unwrap();
state.delete_predicate(Arc::clone(&predicate_2), chunks_2.clone());
expected_predicates.push((predicate_2, chunks_2));
// chunks created afterwards are unaffected
@ -588,7 +572,8 @@ where
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
// registering predicates for unknown chunks errors
// Registering predicates for unknown chunks is just ignored because chunks might been in "persisting" intermediate
// state while the predicate was reported.
{
let predicate = create_delete_predicate("some_table", 1);
let chunks = vec![ChunkAddrWithoutDatabase {
@ -596,14 +581,9 @@ where
partition_key: Arc::from("part"),
chunk_id: ChunkId::new(1000),
}];
let err = state
.delete_predicate(Arc::clone(&predicate), chunks)
.unwrap_err();
assert!(matches!(
err,
CatalogStateDeletePredicateError::ChunkDoesNotExist { .. }
));
state.delete_predicate(Arc::clone(&predicate), chunks);
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
}
/// Assert that tracked files and their linked metadata are equal.

View File

@ -79,8 +79,7 @@ def main():
try:
if not args.skip_build:
build_with_aws = args.object_store == 's3'
build_with_jaeger = do_trace
cargo_build_iox(args.debug, build_with_aws, build_with_jaeger)
cargo_build_iox(args.debug, build_with_aws)
docker_create_network(dc)
if args.kafka_zookeeper:
@ -382,15 +381,13 @@ def docker_run_jaeger(dc):
return container
def cargo_build_iox(debug=False, build_with_aws=True, build_with_jaeger=True):
def cargo_build_iox(debug=False, build_with_aws=True):
t = time.time()
print('building IOx')
features = []
if build_with_aws:
features.append('aws')
if build_with_jaeger:
features.append('jaeger')
features = ','.join(features)
env = os.environ.copy()
@ -663,7 +660,7 @@ def run_test_battery(battery_name, router_id, writer_id, debug=False, do_trace=F
headers = {}
if do_trace:
# TODO remove this after IOx can be configured to sample 100% of traces
headers['uber-trace-id'] = '%x:%x:0:1' % (random.randrange(0, 2 ** 64), random.randrange(0, 2 ** 64))
headers['jaeger-debug-id'] = 'from-perf'
response = requests.get(url=query_url, params=params, headers=headers)
time_delta = '%dms' % math.floor((time.time() - time_start) * 1000)

View File

@ -11,9 +11,8 @@ use std::{
use data_types::timestamp::TimestampRange;
use datafusion::{
error::DataFusionError,
logical_plan::{col, lit, Column, Expr, Operator},
logical_plan::{col, lit, lit_timestamp_nano, Column, Expr, Operator},
optimizer::utils,
scalar::ScalarValue,
};
use datafusion_util::{make_range_expr, AndExprBuilder};
use internal_types::schema::TIME_COLUMN_NAME;
@ -212,15 +211,11 @@ impl Predicate {
// Time range
if let Some(range) = pred.range {
// cast int to timestamp
// NGA todo: add in DF a function timestamp_lit(i64_val) which does lit(ScalarValue::TimestampNanosecond(Some(i64_val))
// and use it here
let ts_start = ScalarValue::TimestampNanosecond(Some(range.start));
let ts_end = ScalarValue::TimestampNanosecond(Some(range.end));
// time_expr = NOT(start <= time_range < end)
// Equivalent to: (time < start OR time >= end)
let time_expr = col(TIME_COLUMN_NAME)
.lt(lit(ts_start))
.or(col(TIME_COLUMN_NAME).gt_eq(lit(ts_end)));
.lt(lit_timestamp_nano(range.start))
.or(col(TIME_COLUMN_NAME).gt_eq(lit_timestamp_nano(range.end)));
match expr {
None => expr = Some(time_expr),

View File

@ -1,6 +1,7 @@
//! This module contains plumbing to connect InfluxDB IOx extensions to
//! DataFusion
use async_trait::async_trait;
use std::{fmt, sync::Arc};
use arrow::record_batch::RecordBatch;
@ -50,10 +51,11 @@ pub const DEFAULT_SCHEMA: &str = "iox";
/// and is needed to create plans with the IOx extension nodes.
struct IOxQueryPlanner {}
#[async_trait]
impl QueryPlanner for IOxQueryPlanner {
/// Given a `LogicalPlan` created from above, create an
/// `ExecutionPlan` suitable for execution
fn create_physical_plan(
async fn create_physical_plan(
&self,
logical_plan: &LogicalPlan,
ctx_state: &ExecutionContextState,
@ -63,7 +65,9 @@ impl QueryPlanner for IOxQueryPlanner {
let physical_planner =
DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(IOxExtensionPlanner {})]);
// Delegate most work of physical planning to the default physical planner
physical_planner.create_physical_plan(logical_plan, ctx_state)
physical_planner
.create_physical_plan(logical_plan, ctx_state)
.await
}
}
@ -246,15 +250,15 @@ impl IOxExecutionContext {
/// Prepare a SQL statement for execution. This assumes that any
/// tables referenced in the SQL have been registered with this context
pub fn prepare_sql(&self, sql: &str) -> Result<Arc<dyn ExecutionPlan>> {
pub async fn prepare_sql(&self, sql: &str) -> Result<Arc<dyn ExecutionPlan>> {
let ctx = self.child_ctx("prepare_sql");
debug!(text=%sql, "planning SQL query");
let logical_plan = ctx.inner.create_logical_plan(sql)?;
ctx.prepare_plan(&logical_plan)
ctx.prepare_plan(&logical_plan).await
}
/// Prepare (optimize + plan) a pre-created logical plan for execution
pub fn prepare_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
pub async fn prepare_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
let mut ctx = self.child_ctx("prepare_plan");
debug!(text=%plan.display_indent_schema(), "prepare_plan: initial plan");
@ -263,7 +267,7 @@ impl IOxExecutionContext {
ctx.recorder.event("optimized plan");
trace!(text=%plan.display_indent_schema(), graphviz=%plan.display_graphviz(), "optimized plan");
let physical_plan = ctx.inner.create_physical_plan(&plan)?;
let physical_plan = ctx.inner.create_physical_plan(&plan).await?;
ctx.recorder.event("plan to run");
debug!(text=%displayable(physical_plan.as_ref()).indent(), "prepare_plan: plan to run");
@ -366,7 +370,7 @@ impl IOxExecutionContext {
let tag_columns = Arc::new(tag_columns);
let physical_plan = ctx.prepare_plan(&plan)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
let it = ctx.execute_stream(physical_plan).await?;
@ -411,7 +415,7 @@ impl IOxExecutionContext {
.map(|plan| {
let ctx = self.child_ctx("to_field_list");
self.run(async move {
let physical_plan = ctx.prepare_plan(&plan)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
// TODO: avoid this buffering
let field_list =
@ -468,7 +472,7 @@ impl IOxExecutionContext {
.map(|plan| {
let ctx = self.child_ctx("run_logical_plans");
self.run(async move {
let physical_plan = ctx.prepare_plan(&plan)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
// TODO: avoid this buffering
ctx.collect(physical_plan).await

View File

@ -78,6 +78,7 @@ mod test {
let plan = executor
.new_context(ExecutorType::Reorg)
.prepare_plan(&split_plan)
.await
.unwrap();
let mut stream0 = plan.execute(0).await.expect("ran the plan");

View File

@ -194,7 +194,7 @@ impl ReorgPlanner {
/// that will be best for RLE encoding.
///
/// Prefer to query::provider::build_scan_plan for the detail of the plan
///
///
fn sorted_scan_plan<C, I>(&self, schema: Arc<Schema>, chunks: I) -> Result<ScanPlan<C>>
where
C: QueryChunk + 'static,
@ -350,6 +350,7 @@ mod test {
let physical_plan = executor
.new_context(ExecutorType::Reorg)
.prepare_plan(&compact_plan)
.await
.unwrap();
assert_eq!(
physical_plan.output_partitioning().partition_count(),
@ -406,6 +407,7 @@ mod test {
let physical_plan = executor
.new_context(ExecutorType::Reorg)
.prepare_plan(&split_plan)
.await
.unwrap();
assert_eq!(

View File

@ -14,7 +14,11 @@ impl SqlQueryPlanner {
/// Plan a SQL query against the catalogs registered with `ctx`, and return a
/// DataFusion physical execution plan that runs on the query executor.
pub fn query(&self, query: &str, ctx: &IOxExecutionContext) -> Result<Arc<dyn ExecutionPlan>> {
ctx.prepare_sql(query)
pub async fn query(
&self,
query: &str,
ctx: &IOxExecutionContext,
) -> Result<Arc<dyn ExecutionPlan>> {
ctx.prepare_sql(query).await
}
}

View File

@ -1,5 +1,6 @@
//! Implementation of a DataFusion `TableProvider` in terms of `QueryChunk`s
use async_trait::async_trait;
use std::sync::Arc;
use arrow::{datatypes::SchemaRef as ArrowSchemaRef, error::ArrowError};
@ -212,6 +213,7 @@ impl<C: QueryChunk + 'static> ChunkTableProvider<C> {
}
}
#[async_trait]
impl<C: QueryChunk + 'static> TableProvider for ChunkTableProvider<C> {
fn as_any(&self) -> &dyn std::any::Any {
self
@ -222,7 +224,7 @@ impl<C: QueryChunk + 'static> TableProvider for ChunkTableProvider<C> {
self.arrow_schema()
}
fn scan(
async fn scan(
&self,
projection: &Option<Vec<usize>>,
_batch_size: usize,
@ -950,12 +952,9 @@ impl<C: QueryChunk + 'static> Deduplicater<C> {
}
fn no_delete_predicates(chunks: &[Arc<C>]) -> bool {
for chunk in chunks {
if !chunk.delete_predicates().is_empty() {
return false;
}
}
true
chunks
.iter()
.all(|chunk| chunk.delete_predicates().is_empty())
}
/// Find the columns needed in the primary key across schemas

View File

@ -1,7 +1,7 @@
use arrow::datatypes::DataType;
use arrow_util::assert_batches_eq;
use datafusion::logical_plan::{col, lit};
use predicate::predicate::PredicateBuilder;
use predicate::predicate::{Predicate, PredicateBuilder};
use query::{
exec::{
fieldlist::{Field, FieldList},
@ -17,42 +17,45 @@ use crate::scenarios::*;
///
/// runs field_column_names(predicate) and compares it to the expected
/// output
macro_rules! run_field_columns_test_case {
($DB_SETUP:expr, $PREDICATE:expr, $EXPECTED_FIELDS:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
let expected_fields = $EXPECTED_FIELDS;
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
async fn run_field_columns_test_case<D>(
db_setup: D,
predicate: Predicate,
expected_fields: FieldList,
) where
D: DbSetup,
{
test_helpers::maybe_start_logging();
let plan = planner
.field_columns(db.as_ref(), predicate.clone())
.expect("built plan successfully");
let fields = ctx
.to_field_list(plan)
.await
.expect("converted plan to strings successfully");
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
assert_eq!(
fields, expected_fields,
"Error in scenario '{}'\n\nexpected:\n{:#?}\nactual:\n{:#?}",
scenario_name, expected_fields, fields
);
}
};
let plan = planner
.field_columns(db.as_ref(), predicate.clone())
.expect("built plan successfully");
let fields = ctx
.to_field_list(plan)
.await
.expect("converted plan to strings successfully");
assert_eq!(
fields, expected_fields,
"Error in scenario '{}'\n\nexpected:\n{:#?}\nactual:\n{:#?}",
scenario_name, expected_fields, fields
);
}
}
#[tokio::test]
async fn test_field_columns_empty_database() {
let predicate = PredicateBuilder::default().build();
let expected_fields = FieldList::default();
run_field_columns_test_case!(NoData {}, predicate, expected_fields);
run_field_columns_test_case(NoData {}, predicate, expected_fields).await;
}
#[tokio::test]
@ -62,7 +65,7 @@ async fn test_field_columns_no_predicate() {
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build();
let expected_fields = FieldList::default();
run_field_columns_test_case!(TwoMeasurementsManyFields {}, predicate, expected_fields);
run_field_columns_test_case(TwoMeasurementsManyFields {}, predicate, expected_fields).await;
}
#[tokio::test]
@ -93,7 +96,7 @@ async fn test_field_columns_with_pred() {
],
};
run_field_columns_test_case!(TwoMeasurementsManyFields {}, predicate, expected_fields);
run_field_columns_test_case(TwoMeasurementsManyFields {}, predicate, expected_fields).await;
}
#[tokio::test]
@ -112,7 +115,7 @@ async fn test_field_columns_with_ts_pred() {
}],
};
run_field_columns_test_case!(TwoMeasurementsManyFields {}, predicate, expected_fields);
run_field_columns_test_case(TwoMeasurementsManyFields {}, predicate, expected_fields).await;
}
#[tokio::test]

View File

@ -34,33 +34,36 @@ impl DbSetup for TwoMeasurementsMultiSeries {
/// runs read_filter(predicate) and compares it to the expected
/// output
macro_rules! run_read_filter_test_case {
($DB_SETUP:expr, $PREDICATE:expr, $EXPECTED_RESULTS:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
let expected_results = $EXPECTED_RESULTS;
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
async fn run_read_filter_test_case<D>(
db_setup: D,
predicate: Predicate,
expected_results: Vec<&str>,
) where
D: DbSetup,
{
test_helpers::maybe_start_logging();
let plan = planner
.read_filter(db.as_ref(), predicate.clone())
.expect("built plan successfully");
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
let string_results = run_series_set_plan(&ctx, plan).await;
let plan = planner
.read_filter(db.as_ref(), predicate.clone())
.expect("built plan successfully");
assert_eq!(
expected_results, string_results,
"Error in scenario '{}'\n\nexpected:\n{:#?}\n\nactual:\n{:#?}",
scenario_name, expected_results, string_results
);
}
};
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
let string_results = run_series_set_plan(&ctx, plan).await;
assert_eq!(
expected_results, string_results,
"Error in scenario '{}'\n\nexpected:\n{:#?}\n\nactual:\n{:#?}",
scenario_name, expected_results, string_results
);
}
}
#[tokio::test]
@ -68,7 +71,7 @@ async fn test_read_filter_no_data_no_pred() {
let predicate = EMPTY_PREDICATE;
let expected_results = vec![] as Vec<&str>;
run_read_filter_test_case!(NoData {}, predicate, expected_results);
run_read_filter_test_case(NoData {}, predicate, expected_results).await;
}
#[tokio::test]
@ -130,7 +133,7 @@ async fn test_read_filter_data_no_pred() {
"+--------+-------+---------+------+--------------------------------+",
];
run_read_filter_test_case!(TwoMeasurementsMultiSeries {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurementsMultiSeries {}, predicate, expected_results).await;
}
#[tokio::test]
@ -159,11 +162,12 @@ async fn test_read_filter_data_filter() {
"+------+-------+------+--------------------------------+",
];
run_read_filter_test_case!(
run_read_filter_test_case(
TwoMeasurementsMultiSeries {},
predicate,
expected_results.clone()
);
expected_results.clone(),
)
.await;
// Same results via a != predicate.
let predicate = PredicateBuilder::default()
@ -171,7 +175,7 @@ async fn test_read_filter_data_filter() {
.add_expr(col("state").not_eq(lit("MA"))) // state=CA
.build();
run_read_filter_test_case!(TwoMeasurementsMultiSeries {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurementsMultiSeries {}, predicate, expected_results).await;
}
#[tokio::test]
@ -214,7 +218,7 @@ async fn test_read_filter_data_filter_fields() {
"+------+-------+--------------------------------+",
];
run_read_filter_test_case!(TwoMeasurementsManyFields {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurementsManyFields {}, predicate, expected_results).await;
}
#[tokio::test]
@ -225,7 +229,7 @@ async fn test_read_filter_data_pred_refers_to_non_existent_column() {
let expected_results = vec![] as Vec<&str>;
run_read_filter_test_case!(TwoMeasurements {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurements {}, predicate, expected_results).await;
}
#[tokio::test]
@ -267,7 +271,7 @@ async fn test_read_filter_data_pred_no_columns() {
"+--------+-------+--------------------------------+",
];
run_read_filter_test_case!(TwoMeasurements {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurements {}, predicate, expected_results).await;
}
#[tokio::test]
@ -280,7 +284,7 @@ async fn test_read_filter_data_pred_refers_to_good_and_non_existent_columns() {
let expected_results = vec![] as Vec<&str>;
run_read_filter_test_case!(TwoMeasurements {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurements {}, predicate, expected_results).await;
}
#[tokio::test]
@ -309,7 +313,7 @@ async fn test_read_filter_data_pred_using_regex_match() {
"+------+-------+------+--------------------------------+",
];
run_read_filter_test_case!(TwoMeasurementsMultiSeries {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurementsMultiSeries {}, predicate, expected_results).await;
}
#[tokio::test]
@ -354,7 +358,7 @@ async fn test_read_filter_data_pred_using_regex_not_match() {
"+--------+-------+---------+------+--------------------------------+",
];
run_read_filter_test_case!(TwoMeasurementsMultiSeries {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurementsMultiSeries {}, predicate, expected_results).await;
}
#[tokio::test]
@ -391,7 +395,7 @@ async fn test_read_filter_data_pred_unsupported_in_scan() {
"+--------+-------+---------+------+--------------------------------+",
];
run_read_filter_test_case!(TwoMeasurementsMultiSeries {}, predicate, expected_results);
run_read_filter_test_case(TwoMeasurementsMultiSeries {}, predicate, expected_results).await;
}
#[derive(Debug)]
@ -523,5 +527,5 @@ async fn test_read_filter_data_plan_order() {
"+----------+-------+--------+-------+------+--------------------------------+",
];
run_read_filter_test_case!(MeasurementsSortableTags {}, predicate, expected_results);
run_read_filter_test_case(MeasurementsSortableTags {}, predicate, expected_results).await;
}

View File

@ -9,70 +9,73 @@ use query::{frontend::influxrpc::InfluxRpcPlanner, group_by::Aggregate};
/// runs read_group(predicate) and compares it to the expected
/// output
macro_rules! run_read_group_test_case {
($DB_SETUP:expr, $PREDICATE:expr, $AGG:expr, $GROUP_COLUMNS:expr, $EXPECTED_RESULTS:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
let agg = $AGG;
let group_columns = $GROUP_COLUMNS;
let expected_results = $EXPECTED_RESULTS;
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
async fn run_read_group_test_case<D>(
db_setup: D,
predicate: Predicate,
agg: Aggregate,
group_columns: Vec<&str>,
expected_results: Vec<&str>,
) where
D: DbSetup,
{
test_helpers::maybe_start_logging();
let plans = planner
.read_group(db.as_ref(), predicate.clone(), agg, &group_columns)
.expect("built plan successfully");
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
let plans = plans.into_inner();
let plans = planner
.read_group(db.as_ref(), predicate.clone(), agg, &group_columns)
.expect("built plan successfully");
for (i, plan) in plans.iter().enumerate() {
assert_eq!(
plan.num_prefix_tag_group_columns,
Some(group_columns.len()),
"Mismatch in plan index {}",
i
);
}
let mut string_results = vec![];
for plan in plans.into_iter() {
let batches = ctx
.run_logical_plan(plan.plan)
.await
.expect("ok running plan");
string_results.extend(
pretty_format_batches(&batches)
.expect("formatting results")
.trim()
.split('\n')
.map(|s| s.to_string()),
);
}
let plans = plans.into_inner();
for (i, plan) in plans.iter().enumerate() {
assert_eq!(
expected_results, string_results,
"Error in scenario '{}'\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}",
scenario_name, expected_results, string_results
plan.num_prefix_tag_group_columns,
Some(group_columns.len()),
"Mismatch in plan index {}",
i
);
}
};
let mut string_results = vec![];
for plan in plans.into_iter() {
let batches = ctx
.run_logical_plan(plan.plan)
.await
.expect("ok running plan");
string_results.extend(
pretty_format_batches(&batches)
.expect("formatting results")
.trim()
.split('\n')
.map(|s| s.to_string()),
);
}
assert_eq!(
expected_results, string_results,
"Error in scenario '{}'\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}",
scenario_name, expected_results, string_results
);
}
}
#[tokio::test]
async fn test_read_group_no_data_no_pred() {
let predicate = Predicate::default();
let agg = Aggregate::Mean;
let group_columns = vec![] as Vec<String>;
let group_columns = vec![] as Vec<&str>;
let expected_results = vec![] as Vec<&str>;
run_read_group_test_case!(NoData {}, predicate, agg, group_columns, expected_results);
run_read_group_test_case(NoData {}, predicate, agg, group_columns, expected_results).await;
}
struct OneMeasurementForAggs {}
@ -110,13 +113,14 @@ async fn test_read_group_data_pred() {
"+-------+------+------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
OneMeasurementForAggs {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
#[tokio::test]
@ -136,13 +140,14 @@ async fn test_read_group_data_field_restriction() {
"+-------+--------+-------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
OneMeasurementForAggs {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
struct AnotherMeasurementForAggs {}
@ -194,13 +199,14 @@ async fn test_grouped_series_set_plan_sum() {
"+-------+-----------+----------+------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
AnotherMeasurementForAggs {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
#[tokio::test]
@ -228,13 +234,14 @@ async fn test_grouped_series_set_plan_count() {
"+-------+-----------+----------+------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
AnotherMeasurementForAggs {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
#[tokio::test]
@ -262,13 +269,14 @@ async fn test_grouped_series_set_plan_mean() {
"+-------+-----------+----------+------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
AnotherMeasurementForAggs {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
struct MeasurementForSelectors {}
@ -306,13 +314,14 @@ async fn test_grouped_series_set_plan_first() {
"+-------+-----------+------+-----------------------------+---+-----------------------------+---+-----------------------------+---+-----------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
MeasurementForSelectors {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
#[tokio::test]
@ -333,13 +342,14 @@ async fn test_grouped_series_set_plan_last() {
"+-------+-----------+-------+-----------------------------+---+-----------------------------+---+-----------------------------+---+-----------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
MeasurementForSelectors {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
struct MeasurementForMin {}
@ -379,13 +389,14 @@ async fn test_grouped_series_set_plan_min() {
"+-------+-----------+-------+-----------------------------+---+-----------------------------+---+-----------------------------+---+-----------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
MeasurementForMin {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
struct MeasurementForMax {}
@ -423,13 +434,14 @@ async fn test_grouped_series_set_plan_max() {
"+-------+-----------+------+-----------------------------+---+-----------------------------+---+-----------------------------+---+-----------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
MeasurementForMax {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
struct MeasurementForGroupKeys {}
@ -472,13 +484,14 @@ async fn test_grouped_series_set_plan_group_by_state_city() {
"+-------+-----------+----------+------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
MeasurementForGroupKeys {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
#[tokio::test]
@ -500,13 +513,14 @@ async fn test_grouped_series_set_plan_group_by_city_state() {
"+-----------+-------+----------+------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
MeasurementForGroupKeys {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}
#[tokio::test]
@ -532,11 +546,12 @@ async fn test_grouped_series_set_plan_group_aggregate_none() {
"+-----------+-------+----------+------+--------------------------------+",
];
run_read_group_test_case!(
run_read_group_test_case(
MeasurementForGroupKeys {},
predicate,
agg,
group_columns,
expected_results
);
expected_results,
)
.await;
}

View File

@ -14,58 +14,61 @@ use query::{
/// runs read_window_aggregate(predicate) and compares it to the expected
/// output
macro_rules! run_read_window_aggregate_test_case {
($DB_SETUP:expr, $PREDICATE:expr, $AGG:expr, $EVERY:expr, $OFFSET:expr, $EXPECTED_RESULTS:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
let agg = $AGG;
let every = $EVERY;
let offset = $OFFSET;
let expected_results = $EXPECTED_RESULTS;
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
async fn run_read_window_aggregate_test_case<D>(
db_setup: D,
predicate: Predicate,
agg: Aggregate,
every: WindowDuration,
offset: WindowDuration,
expected_results: Vec<&str>,
) where
D: DbSetup,
{
test_helpers::maybe_start_logging();
let plans = planner
.read_window_aggregate(
db.as_ref(),
predicate.clone(),
agg,
every.clone(),
offset.clone(),
)
.expect("built plan successfully");
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
let plans = plans.into_inner();
let plans = planner
.read_window_aggregate(
db.as_ref(),
predicate.clone(),
agg,
every.clone(),
offset.clone(),
)
.expect("built plan successfully");
let mut string_results = vec![];
for plan in plans.into_iter() {
let batches = ctx
.run_logical_plan(plan.plan)
.await
.expect("ok running plan");
let plans = plans.into_inner();
string_results.extend(
pretty_format_batches(&batches)
.expect("formatting results")
.trim()
.split('\n')
.map(|s| s.to_string()),
);
}
let mut string_results = vec![];
for plan in plans.into_iter() {
let batches = ctx
.run_logical_plan(plan.plan)
.await
.expect("ok running plan");
assert_eq!(
expected_results, string_results,
"Error in scenario '{}'\n\nexpected:\n{:#?}\n\nactual:\n{:#?}\n",
scenario_name, expected_results, string_results
string_results.extend(
pretty_format_batches(&batches)
.expect("formatting results")
.trim()
.split('\n')
.map(|s| s.to_string()),
);
}
};
assert_eq!(
expected_results, string_results,
"Error in scenario '{}'\n\nexpected:\n{:#?}\n\nactual:\n{:#?}\n",
scenario_name, expected_results, string_results
);
}
}
#[tokio::test]
@ -76,14 +79,8 @@ async fn test_read_window_aggregate_no_data_no_pred() {
let offset = WindowDuration::from_nanoseconds(0);
let expected_results = vec![] as Vec<&str>;
run_read_window_aggregate_test_case!(
NoData {},
predicate,
agg,
every,
offset,
expected_results
);
run_read_window_aggregate_test_case(NoData {}, predicate, agg, every, offset, expected_results)
.await;
}
struct MeasurementForWindowAggregate {}
@ -142,14 +139,15 @@ async fn test_read_window_aggregate_nanoseconds() {
"+--------+-------+--------------------------------+------+",
];
run_read_window_aggregate_test_case!(
run_read_window_aggregate_test_case(
MeasurementForWindowAggregate {},
predicate,
agg,
every,
offset,
expected_results
);
expected_results,
)
.await;
}
struct MeasurementForWindowAggregateMonths {}
@ -224,12 +222,13 @@ async fn test_read_window_aggregate_months() {
"+--------+-------+----------------------+------+",
];
run_read_window_aggregate_test_case!(
run_read_window_aggregate_test_case(
MeasurementForWindowAggregateMonths {},
predicate,
agg,
every,
offset,
expected_results
);
expected_results,
)
.await;
}

View File

@ -9,68 +9,68 @@ use crate::scenarios::*;
/// runs table_names(predicate) and compares it to the expected
/// output
macro_rules! run_table_names_test_case {
($DB_SETUP:expr, $PREDICATE:expr, $EXPECTED_NAMES:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
async fn run_table_names_test_case<D>(db_setup: D, predicate: Predicate, expected_names: Vec<&str>)
where
D: DbSetup,
{
test_helpers::maybe_start_logging();
let plan = planner
.table_names(db.as_ref(), predicate.clone())
.expect("built plan successfully");
let names = ctx
.to_string_set(plan)
.await
.expect("converted plan to strings successfully");
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
let expected_names = $EXPECTED_NAMES;
assert_eq!(
names,
to_stringset(&expected_names),
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
scenario_name,
expected_names,
names
);
}
};
let plan = planner
.table_names(db.as_ref(), predicate.clone())
.expect("built plan successfully");
let names = ctx
.to_string_set(plan)
.await
.expect("converted plan to strings successfully");
assert_eq!(
names,
to_stringset(&expected_names),
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
scenario_name,
expected_names,
names
);
}
}
#[tokio::test]
async fn list_table_names_no_data_no_pred() {
run_table_names_test_case!(NoData {}, EMPTY_PREDICATE, vec![]);
run_table_names_test_case(NoData {}, EMPTY_PREDICATE, vec![]).await;
}
#[tokio::test]
async fn list_table_names_no_data_pred() {
run_table_names_test_case!(TwoMeasurements {}, EMPTY_PREDICATE, vec!["cpu", "disk"]);
run_table_names_test_case(TwoMeasurements {}, EMPTY_PREDICATE, vec!["cpu", "disk"]).await;
}
#[tokio::test]
async fn list_table_names_data_pred_0_201() {
run_table_names_test_case!(TwoMeasurements {}, tsp(0, 201), vec!["cpu", "disk"]);
run_table_names_test_case(TwoMeasurements {}, tsp(0, 201), vec!["cpu", "disk"]).await;
}
#[tokio::test]
async fn list_table_names_data_pred_0_200() {
run_table_names_test_case!(TwoMeasurements {}, tsp(0, 200), vec!["cpu"]);
run_table_names_test_case(TwoMeasurements {}, tsp(0, 200), vec!["cpu"]).await;
}
#[tokio::test]
async fn list_table_names_data_pred_50_101() {
run_table_names_test_case!(TwoMeasurements {}, tsp(50, 101), vec!["cpu"]);
run_table_names_test_case(TwoMeasurements {}, tsp(50, 101), vec!["cpu"]).await;
}
#[tokio::test]
async fn list_table_names_data_pred_250_300() {
run_table_names_test_case!(TwoMeasurements {}, tsp(250, 300), vec![]);
run_table_names_test_case(TwoMeasurements {}, tsp(250, 300), vec![]).await;
}
// make a single timestamp predicate between r1 and r2

View File

@ -1,5 +1,5 @@
use datafusion::logical_plan::{col, lit};
use predicate::predicate::PredicateBuilder;
use predicate::predicate::{Predicate, PredicateBuilder};
use query::{
exec::stringset::{IntoStringSet, StringSetRef},
frontend::influxrpc::InfluxRpcPlanner,
@ -12,45 +12,45 @@ use crate::scenarios::*;
///
/// runs table_column_names(predicate) and compares it to the expected
/// output
macro_rules! run_tag_keys_test_case {
($DB_SETUP:expr, $PREDICATE:expr, $EXPECTED_NAMES:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
let expected_names = $EXPECTED_NAMES;
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
async fn run_tag_keys_test_case<D>(db_setup: D, predicate: Predicate, expected_tag_keys: Vec<&str>)
where
D: DbSetup,
{
test_helpers::maybe_start_logging();
let plan = planner
.tag_keys(db.as_ref(), predicate.clone())
.expect("built plan successfully");
let names = ctx
.to_string_set(plan)
.await
.expect("converted plan to strings successfully");
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
assert_eq!(
names,
to_stringset(&expected_names),
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
scenario_name,
expected_names,
names
);
}
};
let plan = planner
.tag_keys(db.as_ref(), predicate.clone())
.expect("built plan successfully");
let names = ctx
.to_string_set(plan)
.await
.expect("converted plan to strings successfully");
assert_eq!(
names,
to_stringset(&expected_tag_keys),
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
scenario_name,
expected_tag_keys,
names
);
}
}
#[tokio::test]
async fn list_tag_columns_no_predicate() {
let predicate = PredicateBuilder::default().build();
let expected_tag_keys = vec!["borough", "city", "county", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
@ -59,7 +59,7 @@ async fn list_tag_columns_timestamp() {
.timestamp_range(150, 201)
.build();
let expected_tag_keys = vec!["city", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
async fn list_tag_columns_predicate() {
@ -67,7 +67,7 @@ async fn list_tag_columns_predicate() {
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build();
let expected_tag_keys = vec!["city", "county", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
@ -77,14 +77,14 @@ async fn list_tag_columns_timestamp_and_predicate() {
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build();
let expected_tag_keys = vec!["city", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
async fn list_tag_columns_measurement_name() {
let predicate = PredicateBuilder::default().table("o2").build();
let expected_tag_keys = vec!["borough", "city", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
@ -94,7 +94,7 @@ async fn list_tag_columns_measurement_name_and_timestamp() {
.timestamp_range(150, 201)
.build();
let expected_tag_keys = vec!["city", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
@ -104,7 +104,7 @@ async fn list_tag_columns_measurement_name_and_predicate() {
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build();
let expected_tag_keys = vec!["borough", "city", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
@ -115,7 +115,7 @@ async fn list_tag_columns_measurement_name_and_predicate_and_timestamp() {
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build();
let expected_tag_keys = vec!["city", "state"];
run_tag_keys_test_case!(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys);
run_tag_keys_test_case(TwoMeasurementsManyNulls {}, predicate, expected_tag_keys).await;
}
#[tokio::test]
@ -125,7 +125,7 @@ async fn list_tag_name_end_to_end() {
.add_expr(col("host").eq(lit("server01")))
.build();
let expected_tag_keys = vec!["host", "name", "region"];
run_tag_keys_test_case!(EndToEndTest {}, predicate, expected_tag_keys);
run_tag_keys_test_case(EndToEndTest {}, predicate, expected_tag_keys).await;
}
fn to_stringset(v: &[&str]) -> StringSetRef {

View File

@ -1,5 +1,5 @@
use datafusion::logical_plan::{col, lit};
use predicate::predicate::PredicateBuilder;
use predicate::predicate::{Predicate, PredicateBuilder};
use query::{
exec::stringset::{IntoStringSet, StringSetRef},
frontend::influxrpc::InfluxRpcPlanner,
@ -9,39 +9,42 @@ use crate::scenarios::*;
/// runs tag_value(predicate) and compares it to the expected
/// output
macro_rules! run_tag_values_test_case {
($DB_SETUP:expr, $TAG_NAME:expr, $PREDICATE:expr, $EXPECTED_VALUES:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
let tag_name = $TAG_NAME;
let expected_values = $EXPECTED_VALUES;
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
async fn run_tag_values_test_case<D>(
db_setup: D,
tag_name: &str,
predicate: Predicate,
expected_tag_values: Vec<&str>,
) where
D: DbSetup,
{
test_helpers::maybe_start_logging();
let plan = planner
.tag_values(db.as_ref(), &tag_name, predicate.clone())
.expect("built plan successfully");
let names = ctx
.to_string_set(plan)
.await
.expect("converted plan to strings successfully");
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRpcPlanner::new();
let ctx = db.executor().new_context(query::exec::ExecutorType::Query);
assert_eq!(
names,
to_stringset(&expected_values),
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
scenario_name,
expected_values,
names
);
}
};
let plan = planner
.tag_values(db.as_ref(), tag_name, predicate.clone())
.expect("built plan successfully");
let names = ctx
.to_string_set(plan)
.await
.expect("converted plan to strings successfully");
assert_eq!(
names,
to_stringset(&expected_tag_values),
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
scenario_name,
expected_tag_values,
names
);
}
}
#[tokio::test]
@ -50,12 +53,13 @@ async fn list_tag_values_no_tag() {
// If the tag is not present, expect no values back (not error)
let tag_name = "tag_not_in_chunks";
let expected_tag_keys = vec![];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -63,12 +67,13 @@ async fn list_tag_values_no_predicate_state_col() {
let predicate = PredicateBuilder::default().build();
let tag_name = "state";
let expected_tag_keys = vec!["CA", "MA", "NY"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -76,12 +81,13 @@ async fn list_tag_values_no_predicate_city_col() {
let tag_name = "city";
let predicate = PredicateBuilder::default().build();
let expected_tag_keys = vec!["Boston", "LA", "NYC"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -89,12 +95,13 @@ async fn list_tag_values_timestamp_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default().timestamp_range(50, 201).build();
let expected_tag_keys = vec!["CA", "MA"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -104,12 +111,13 @@ async fn list_tag_values_state_pred_state_col() {
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build();
let expected_tag_keys = vec!["Boston"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -120,12 +128,13 @@ async fn list_tag_values_timestamp_and_state_pred_state_col() {
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build();
let expected_tag_keys = vec!["MA"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -133,12 +142,13 @@ async fn list_tag_values_table_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default().table("h2o").build();
let expected_tag_keys = vec!["CA", "MA"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -146,12 +156,13 @@ async fn list_tag_values_table_pred_city_col() {
let tag_name = "city";
let predicate = PredicateBuilder::default().table("o2").build();
let expected_tag_keys = vec!["Boston", "NYC"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -162,12 +173,13 @@ async fn list_tag_values_table_and_timestamp_and_table_pred_state_col() {
.timestamp_range(50, 201)
.build();
let expected_tag_keys = vec!["MA"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -178,12 +190,13 @@ async fn list_tag_values_table_and_state_pred_state_col() {
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build();
let expected_tag_keys = vec!["NY"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -195,12 +208,13 @@ async fn list_tag_values_table_and_timestamp_and_state_pred_state_col() {
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build();
let expected_tag_keys = vec!["NY"];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]
@ -213,12 +227,13 @@ async fn list_tag_values_table_and_timestamp_and_state_pred_state_col_no_rows()
.build();
let expected_tag_keys = vec![];
run_tag_values_test_case!(
run_tag_values_test_case(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
expected_tag_keys,
)
.await;
}
#[tokio::test]

View File

@ -72,7 +72,7 @@ async fn chunk_pruning_sql() {
let query = "select * from cpu where bar < 3.0";
let ctx = db.new_query_context(None);
let physical_plan = SqlQueryPlanner::default().query(query, &ctx).unwrap();
let physical_plan = SqlQueryPlanner::default().query(query, &ctx).await.unwrap();
let batches = ctx.collect(physical_plan).await.unwrap();
assert_batches_sorted_eq!(&expected, &batches);

View File

@ -277,7 +277,10 @@ impl<W: Write> Runner<W> {
.with_default_catalog(db)
.build();
let physical_plan = planner.query(sql, &ctx).expect("built plan successfully");
let physical_plan = planner
.query(sql, &ctx)
.await
.expect("built plan successfully");
let results: Vec<RecordBatch> = ctx.collect(physical_plan).await.expect("Running plan");

View File

@ -10,28 +10,33 @@ use arrow::record_batch::RecordBatch;
use arrow_util::assert_batches_sorted_eq;
use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner};
/// runs table_names(predicate) and compares it to the expected
/// output
macro_rules! run_sql_test_case {
($DB_SETUP:expr, $SQL:expr, $EXPECTED_LINES:expr) => {
test_helpers::maybe_start_logging();
let sql = $SQL.to_string();
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
/// Runs table_names(predicate) and compares it to the expected
/// output.
async fn run_sql_test_case<D>(db_setup: D, sql: &str, expected_lines: &[&str])
where
D: DbSetup,
{
test_helpers::maybe_start_logging();
println!("Running scenario '{}'", scenario_name);
println!("SQL: '{:#?}'", sql);
let planner = SqlQueryPlanner::default();
let ctx = db.new_query_context(None);
let sql = sql.to_string();
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
let physical_plan = planner.query(&sql, &ctx).expect("built plan successfully");
println!("Running scenario '{}'", scenario_name);
println!("SQL: '{:#?}'", sql);
let planner = SqlQueryPlanner::default();
let ctx = db.new_query_context(None);
let results: Vec<RecordBatch> = ctx.collect(physical_plan).await.expect("Running plan");
assert_batches_sorted_eq!($EXPECTED_LINES, &results);
}
};
let physical_plan = planner
.query(&sql, &ctx)
.await
.expect("built plan successfully");
let results: Vec<RecordBatch> = ctx.collect(physical_plan).await.expect("Running plan");
assert_batches_sorted_eq!(expected_lines, &results);
}
}
#[tokio::test]
@ -44,7 +49,7 @@ async fn sql_select_from_cpu() {
"| west | 1970-01-01T00:00:00.000000150Z | 21 |",
"+--------+--------------------------------+------+",
];
run_sql_test_case!(TwoMeasurements {}, "SELECT * from cpu", &expected);
run_sql_test_case(TwoMeasurements {}, "SELECT * from cpu", &expected).await;
}
#[tokio::test]
@ -57,11 +62,12 @@ async fn sql_select_from_cpu_2021() {
"| west | 2021-07-20T19:30:30Z | 21 |",
"+--------+----------------------+------+",
];
run_sql_test_case!(
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -74,26 +80,29 @@ async fn sql_select_from_cpu_with_timestamp_predicate_explicit_utc() {
"+--------+----------------------+------+",
];
run_sql_test_case!(
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20 19:28:50+00:00')",
&expected
);
&expected,
)
.await;
// Use RCF3339 format
run_sql_test_case!(
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20T19:28:50Z')",
&expected
);
&expected,
)
.await;
// use cast workaround
run_sql_test_case!(
run_sql_test_case(
OneMeasurementRealisticTimes {},
"SELECT * FROM cpu WHERE \
CAST(time AS BIGINT) > CAST(to_timestamp('2021-07-20T19:28:50Z') AS BIGINT)",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -107,11 +116,12 @@ async fn sql_select_from_cpu_with_projection() {
"| 21 | west |",
"+------+--------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurements {},
"SELECT user, region from cpu",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -123,11 +133,12 @@ async fn sql_select_from_cpu_pred() {
"| west | 1970-01-01T00:00:00.000000150Z | 21 |",
"+--------+--------------------------------+------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurements {},
"SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00')",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -140,11 +151,11 @@ async fn sql_select_from_cpu_with_projection_and_pred() {
"| 21 | west |",
"+------+--------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurements {},
"SELECT user, region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00')",
&expected
);
).await;
}
#[tokio::test]
@ -156,11 +167,12 @@ async fn sql_select_from_cpu_group() {
"| 2 |",
"+-----------------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurements {},
"SELECT count(*) from cpu group by region",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -172,7 +184,7 @@ async fn sql_select_from_disk() {
"| 99 | east | 1970-01-01T00:00:00.000000200Z |",
"+-------+--------+--------------------------------+",
];
run_sql_test_case!(TwoMeasurements {}, "SELECT * from disk", &expected);
run_sql_test_case(TwoMeasurements {}, "SELECT * from disk", &expected).await;
}
#[tokio::test]
@ -187,7 +199,7 @@ async fn sql_select_with_schema_merge() {
"| foo | east | | 1970-01-01T00:00:00.000000100Z | 23.2 |",
"+------+--------+--------+--------------------------------+------+",
];
run_sql_test_case!(MultiChunkSchemaMerge {}, "SELECT * from cpu", &expected);
run_sql_test_case(MultiChunkSchemaMerge {}, "SELECT * from cpu", &expected).await;
}
#[tokio::test]
@ -200,11 +212,12 @@ async fn sql_select_from_restaurant() {
"| reading | 632 |",
"+---------+-------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsUnsignedType {},
"SELECT town, count from restaurant",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -217,11 +230,12 @@ async fn sql_select_from_school() {
"| andover | 25 |",
"+---------+-------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsUnsignedType {},
"SELECT town, count from school",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -243,12 +257,13 @@ async fn sql_select_from_information_schema_tables() {
"| public | system | persistence_windows | BASE TABLE |",
"+---------------+--------------------+---------------------+------------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFields {},
"SELECT * from information_schema.tables",
&expected
);
run_sql_test_case!(TwoMeasurementsManyFields {}, "SHOW TABLES", &expected);
&expected,
)
.await;
run_sql_test_case(TwoMeasurementsManyFields {}, "SHOW TABLES", &expected).await;
}
#[tokio::test]
@ -272,11 +287,12 @@ async fn sql_select_from_information_schema_columns() {
"| public | iox | o2 | time | 4 | | NO | Timestamp(Nanosecond, None) | | | | | | | |",
"+---------------+--------------+------------+-------------+------------------+----------------+-------------+-----------------------------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFields {},
"SELECT * from information_schema.columns where table_name = 'h2o' OR table_name = 'o2'",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -295,11 +311,12 @@ async fn sql_show_columns() {
"| public | iox | h2o | time | Timestamp(Nanosecond, None) | NO |",
"+---------------+--------------+------------+-------------+-----------------------------+-------------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFields {},
"SHOW COLUMNS FROM h2o",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -318,11 +335,12 @@ async fn sql_select_from_system_chunks() {
"| 0 | 1970-01-01T00 | o2 | OpenMutableBuffer | 1635 | 2 |",
"+----+---------------+------------+-------------------+--------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFieldsOneChunk {},
"SELECT id, partition_key, table_name, storage, memory_bytes, row_count from system.chunks",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -349,11 +367,12 @@ async fn sql_select_from_system_columns() {
"| 1970-01-01T00 | o2 | time | I64 | Timestamp |",
"+---------------+------------+-------------+-------------+---------------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFieldsOneChunk {},
"SELECT * from system.columns",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -381,11 +400,12 @@ async fn sql_select_from_system_chunk_columns() {
"| 1970-01-01T00 | 1 | h2o | time | OpenMutableBuffer | 1 | 0 | 350 | 350 | 297 |",
"+---------------+----------+------------+-------------+-------------------+-----------+------------+-----------+-----------+--------------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFieldsTwoChunks {},
"SELECT * from system.chunk_columns",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -403,11 +423,11 @@ async fn sql_select_from_system_operations() {
// Check that the cpu time used reported is greater than zero as it isn't
// repeatable
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFieldsLifecycle {},
"SELECT id, status, CAST(start_time as BIGINT) > 0 as start_time, CAST(cpu_time_used AS BIGINT) > 0 as took_cpu_time, CAST(wall_time_used AS BIGINT) > 0 as took_wall_time, table_name, partition_key, chunk_ids, description from system.operations",
&expected
);
).await;
}
#[tokio::test]
@ -427,11 +447,12 @@ async fn sql_union_all() {
"| Boston |",
"+--------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyFields {},
"select state as name from h2o UNION ALL select city as name from h2o",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -445,11 +466,12 @@ async fn sql_distinct_aggregates() {
"| 2 |",
"+-------------------------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyNulls {},
"select count(distinct city) from o2",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -465,11 +487,12 @@ async fn sql_aggregate_on_tags() {
"| 2 | NYC |",
"+-----------------+--------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsManyNulls {},
"select count(*), city from o2 group by city",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -484,11 +507,12 @@ async fn sql_select_with_schema_merge_subset() {
"| bar | west | |",
"+------+--------+--------+",
];
run_sql_test_case!(
run_sql_test_case(
MultiChunkSchemaMerge {},
"SELECT host, region, system from cpu",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -507,11 +531,12 @@ async fn sql_predicate_pushdown_correctness_1() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -529,11 +554,12 @@ async fn sql_predicate_pushdown_correctness_2() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where count > 200",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -550,11 +576,12 @@ async fn sql_predicate_pushdown_correctness_3() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where count > 200 and town != 'tewsbury'",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -571,11 +598,11 @@ async fn sql_predicate_pushdown_correctness_4() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence')",
&expected
);
).await;
}
#[tokio::test]
@ -590,11 +617,11 @@ async fn sql_predicate_pushdown_correctness_5() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000",
&expected
);
).await;
}
#[tokio::test]
@ -611,11 +638,12 @@ async fn sql_predicate_pushdown_correctness_6() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where count > 200 and count < 40000",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -633,11 +661,12 @@ async fn sql_predicate_pushdown_correctness_7() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+-----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where system > 4.0 and system < 7.0",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -652,11 +681,12 @@ async fn sql_predicate_pushdown_correctness_8() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where system > 5.0 and system < 7.0",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -670,11 +700,12 @@ async fn sql_predicate_pushdown_correctness_9() {
"| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |",
"+-------+--------+--------------------------------+----------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -688,11 +719,11 @@ async fn sql_predicate_pushdown_correctness_10() {
"| 632 | 6 | 1970-01-01T00:00:00.000000130Z | reading |",
"+-------+--------+--------------------------------+---------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading')",
&expected
);
).await;
}
#[tokio::test]
@ -701,11 +732,11 @@ async fn sql_predicate_pushdown_correctness_11() {
// time > to_timestamp('1970-01-01T00:00:00.000000120+00:00') (rewritten to time GT int(130))
//
let expected = vec!["++", "++"];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00')",
&expected
);
).await;
}
#[tokio::test]
@ -740,11 +771,12 @@ async fn sql_predicate_pushdown_correctness_13() {
"| 632 | 6 | 1970-01-01T00:00:00.000000130Z | reading |",
"+-------+--------+--------------------------------+---------+",
];
run_sql_test_case!(
run_sql_test_case(
TwoMeasurementsPredicatePushDown {},
"SELECT * from restaurant where system > 5.0 and system < 7.0 and town = 'reading'",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -776,7 +808,7 @@ async fn sql_deduplicate_1() {
"| 1970-01-01T00:00:00.000000700Z | CA | SJ | 75.5 | 84.08 | |",
"+--------------------------------+-------+---------+----------+----------+------+",
];
run_sql_test_case!(OneMeasurementThreeChunksWithDuplicates {}, sql, &expected);
run_sql_test_case(OneMeasurementThreeChunksWithDuplicates {}, sql, &expected).await;
}
#[tokio::test]
@ -785,11 +817,12 @@ async fn sql_select_non_keys() {
"+------+", "| temp |", "+------+", "| |", "| |", "| 53.4 |", "| 70.4 |",
"+------+",
];
run_sql_test_case!(
run_sql_test_case(
OneMeasurementTwoChunksDifferentTagSet {},
"SELECT temp from h2o",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -804,17 +837,16 @@ async fn sql_select_all_different_tags_chunks() {
"| Boston | 72.4 | | | | 1970-01-01T00:00:00.000000350Z |",
"+--------+------------+---------+-------+------+--------------------------------+",
];
run_sql_test_case!(
run_sql_test_case(
OneMeasurementTwoChunksDifferentTagSet {},
"SELECT * from h2o",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_with_deleted_data_from_one_expr() {
test_helpers::maybe_start_logging();
let expected = vec![
"+-----+--------------------------------+",
"| bar | time |",
@ -824,25 +856,28 @@ async fn sql_select_with_deleted_data_from_one_expr() {
];
// Data deleted when it is in MUB, and then moved to RUB and OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::DeleteFromMubOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
// Data deleted when it is in RUB, and then moved OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::DeleteFromRubOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
// Data deleted when it is in OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::DeleteFromOsOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -857,25 +892,28 @@ async fn sql_select_with_deleted_data_from_multi_exprs() {
];
// Data deleted when it is in MUB, and then moved to RUB and OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::DeleteMultiExprsFromMubOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
// Data deleted when it is in RUB, and then moved OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::DeleteMultiExprsFromRubOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
// Data deleted when it is in OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::DeleteMultiExprsFromOsOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
}
#[tokio::test]
@ -889,25 +927,28 @@ async fn sql_select_with_two_deleted_data_from_multi_exprs() {
];
// Data deleted when it is in MUB, and then moved to RUB and OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::TwoDeleteMultiExprsFromMubOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
// Data deleted when it is in RUB, and then moved OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::TwoDeleteMultiExprsFromRubOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
// Data deleted when it is in OS
run_sql_test_case!(
run_sql_test_case(
scenarios::delete::TwoDeleteMultiExprsFromOsOneMeasurementOneChunk {},
"SELECT * from cpu",
&expected
);
&expected,
)
.await;
}
#[tokio::test]

View File

@ -2,7 +2,7 @@
use arrow::datatypes::DataType;
use internal_types::{
schema::{builder::SchemaBuilder, sort::SortKey, TIME_COLUMN_NAME},
schema::{builder::SchemaBuilder, sort::SortKey, Schema, TIME_COLUMN_NAME},
selection::Selection,
};
use predicate::predicate::PredicateBuilder;
@ -15,48 +15,50 @@ use super::scenarios::*;
///
/// runs table_schema(predicate) and compares it to the expected
/// output
macro_rules! run_table_schema_test_case {
($DB_SETUP:expr, $SELECTION:expr, $TABLE_NAME:expr, $EXPECTED_SCHEMA:expr) => {
test_helpers::maybe_start_logging();
let selection = $SELECTION;
let table_name = $TABLE_NAME;
let expected_schema = $EXPECTED_SCHEMA;
async fn run_table_schema_test_case<D>(
db_setup: D,
selection: Selection<'_>,
table_name: &str,
expected_schema: Schema,
) where
D: DbSetup,
{
test_helpers::maybe_start_logging();
for scenario in $DB_SETUP.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!(
"Getting schema for table '{}', selection {:?}",
table_name, selection
);
for scenario in db_setup.make().await {
let DbScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!(
"Getting schema for table '{}', selection {:?}",
table_name, selection
);
// Make sure at least one table has data
let mut chunks_with_table = 0;
let predicate = PredicateBuilder::new().table(table_name).build();
// Make sure at least one table has data
let mut chunks_with_table = 0;
let predicate = PredicateBuilder::new().table(table_name).build();
for chunk in db.chunks(&predicate) {
if chunk.table_name().as_ref() == table_name {
chunks_with_table += 1;
let actual_schema = chunk.schema().select(selection.clone()).unwrap();
for chunk in db.chunks(&predicate) {
if chunk.table_name().as_ref() == table_name {
chunks_with_table += 1;
let actual_schema = chunk.schema().select(selection).unwrap();
assert_eq!(
expected_schema,
actual_schema,
"Mismatch in chunk {}\nExpected:\n{:#?}\nActual:\n{:#?}\n",
chunk.id(),
expected_schema,
actual_schema
);
}
assert_eq!(
expected_schema,
actual_schema,
"Mismatch in chunk {}\nExpected:\n{:#?}\nActual:\n{:#?}\n",
chunk.id(),
expected_schema,
actual_schema
);
}
assert!(
chunks_with_table > 0,
"Expected at least one chunk to have data, but none did"
);
}
};
assert!(
chunks_with_table > 0,
"Expected at least one chunk to have data, but none did"
);
}
}
#[tokio::test]
@ -69,12 +71,13 @@ async fn list_schema_cpu_all_mub() {
.build()
.unwrap();
run_table_schema_test_case!(
run_table_schema_test_case(
TwoMeasurementsMubScenario {},
Selection::All,
"cpu",
expected_schema
);
expected_schema,
)
.await;
}
#[tokio::test]
@ -92,12 +95,13 @@ async fn list_schema_cpu_all_rub() {
.build_with_sort_key(&sort_key)
.unwrap();
run_table_schema_test_case!(
run_table_schema_test_case(
TwoMeasurementsRubScenario {},
Selection::All,
"cpu",
expected_schema
);
expected_schema,
)
.await;
}
#[tokio::test]
@ -114,12 +118,13 @@ async fn list_schema_cpu_all_rub_set_sort_key() {
.build_with_sort_key(&sort_key)
.unwrap();
run_table_schema_test_case!(
run_table_schema_test_case(
TwoMeasurementsRubScenario {},
Selection::All,
"cpu",
expected_schema
);
expected_schema,
)
.await;
// Now set
}
@ -134,12 +139,13 @@ async fn list_schema_disk_all() {
.build()
.unwrap();
run_table_schema_test_case!(
run_table_schema_test_case(
TwoMeasurementsMubScenario {},
Selection::All,
"disk",
expected_schema
);
expected_schema,
)
.await;
}
#[tokio::test]
@ -153,12 +159,13 @@ async fn list_schema_cpu_selection() {
// Pick an order that is not lexographic
let selection = Selection::Some(&["user", "region"]);
run_table_schema_test_case!(
run_table_schema_test_case(
TwoMeasurementsMubScenario {},
selection,
"cpu",
expected_schema
);
expected_schema,
)
.await;
}
#[tokio::test]
@ -173,12 +180,13 @@ async fn list_schema_disk_selection() {
// Pick an order that is not lexographic
let selection = Selection::Some(&["time", "bytes"]);
run_table_schema_test_case!(
run_table_schema_test_case(
TwoMeasurementsMubScenario {},
selection,
"disk",
expected_schema
);
expected_schema,
)
.await;
}
#[tokio::test]
@ -191,12 +199,13 @@ async fn list_schema_location_all() {
.build()
.unwrap();
run_table_schema_test_case!(
run_table_schema_test_case(
TwoMeasurementsUnsignedTypeMubScenario {},
Selection::All,
"restaurant",
expected_schema
);
expected_schema,
)
.await;
}
#[tokio::test]

View File

@ -20,7 +20,7 @@ use snafu::{ensure, OptionExt, ResultExt, Snafu};
use ::lifecycle::{LifecycleChunk, LockableChunk, LockablePartition};
use data_types::{
chunk_metadata::{ChunkId, ChunkOrder, ChunkSummary},
chunk_metadata::{ChunkId, ChunkLifecycleAction, ChunkOrder, ChunkSummary},
database_rules::DatabaseRules,
partition_metadata::{PartitionSummary, TableSummary},
server_id::ServerId,
@ -166,6 +166,14 @@ pub enum Error {
#[snafu(display("Cannot replay: {}", source))]
ReplayError { source: crate::db::replay::Error },
#[snafu(display(
"Error while commiting delete predicate on preserved catalog: {}",
source
))]
CommitDeletePredicateError {
source: parquet_file::catalog::core::Error,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -521,24 +529,53 @@ impl Db {
table_name: &str,
delete_predicate: Arc<Predicate>,
) -> Result<()> {
// collect delete predicates on preserved partitions for a catalog transaction
let mut affected_persisted_chunks = vec![];
// get all partitions of this table
let table = self
.catalog
.table(table_name)
.context(DeleteFromTable { table_name })?;
let partitions = table.partitions();
for partition in partitions {
let partition = partition.write();
let chunks = partition.chunks();
for chunk in chunks {
// save the delete predicate in the chunk
let mut chunk = chunk.write();
chunk
.add_delete_predicate(Arc::clone(&delete_predicate))
.context(AddDeletePredicateError)?;
// Note: we need an additional scope here to convince rustc that the future produced by this function is sendable.
{
let table = self
.catalog
.table(table_name)
.context(DeleteFromTable { table_name })?;
let partitions = table.partitions();
for partition in partitions {
let partition = partition.write();
let chunks = partition.chunks();
for chunk in chunks {
// save the delete predicate in the chunk
let mut chunk = chunk.write();
chunk
.add_delete_predicate(Arc::clone(&delete_predicate))
.context(AddDeletePredicateError)?;
// We should only report persisted chunks or chunks that are currently being persisted, because the
// preserved catalog does not care about purely in-mem chunks.
if matches!(chunk.stage(), ChunkStage::Persisted { .. })
|| chunk.is_in_lifecycle(ChunkLifecycleAction::Persisting)
{
affected_persisted_chunks.push(ChunkAddrWithoutDatabase {
table_name: Arc::clone(&chunk.addr().table_name),
partition_key: Arc::clone(&chunk.addr().partition_key),
chunk_id: chunk.addr().chunk_id,
});
}
}
}
}
if !affected_persisted_chunks.is_empty() {
let mut transaction = self.preserved_catalog.open_transaction().await;
transaction
.delete_predicate(&delete_predicate, &affected_persisted_chunks)
.context(CommitDeletePredicateError)?;
transaction
.commit()
.await
.context(CommitDeletePredicateError)?;
}
Ok(())
}
@ -1219,8 +1256,14 @@ pub(crate) fn checkpoint_data_from_catalog(catalog: &Catalog) -> CheckpointData
};
files.insert(path, m);
}
// capture delete predicates
// capture delete predicates
// We should only report persisted chunks or chunks that are currently being persisted, because the
// preserved catalog does not care about purely in-mem chunks.
if matches!(guard.stage(), ChunkStage::Persisted { .. })
|| guard.is_in_lifecycle(ChunkLifecycleAction::Persisting)
{
for predicate in guard.delete_predicates() {
let predicate_ref: &Predicate = predicate.as_ref();
let addr = (predicate_ref as *const Predicate) as usize;
@ -1331,7 +1374,7 @@ pub mod test_helpers {
pub async fn run_query(db: Arc<Db>, query: &str) -> Vec<RecordBatch> {
let planner = SqlQueryPlanner::default();
let ctx = db.new_query_context(None);
let physical_plan = planner.query(query, &ctx).unwrap();
let physical_plan = planner.query(query, &ctx).await.unwrap();
ctx.collect(physical_plan).await.unwrap()
}
}
@ -1350,10 +1393,12 @@ mod tests {
use arrow::record_batch::RecordBatch;
use bytes::Bytes;
use chrono::{DateTime, TimeZone};
use datafusion::logical_plan::{col, lit};
use futures::{stream, StreamExt, TryStreamExt};
use predicate::predicate::PredicateBuilder;
use tokio_util::sync::CancellationToken;
use ::test_helpers::assert_contains;
use ::test_helpers::{assert_contains, maybe_start_logging};
use arrow_util::{assert_batches_eq, assert_batches_sorted_eq};
use data_types::{
chunk_metadata::{ChunkAddr, ChunkStorage},
@ -3559,6 +3604,239 @@ mod tests {
join_handle.await.unwrap();
}
#[tokio::test]
async fn delete_predicate_preservation() {
// Test that delete predicates are stored within the preserved catalog
maybe_start_logging();
// ==================== setup ====================
let object_store = Arc::new(ObjectStore::new_in_memory());
let server_id = ServerId::try_from(1).unwrap();
let db_name = "delete_predicate_preservation_test";
// ==================== do: create DB ====================
// Create a DB given a server id, an object store and a db name
let test_db = TestDb::builder()
.object_store(Arc::clone(&object_store))
.server_id(server_id)
.db_name(db_name)
.lifecycle_rules(LifecycleRules {
catalog_transactions_until_checkpoint: NonZeroU64::try_from(1).unwrap(),
late_arrive_window_seconds: NonZeroU32::try_from(1).unwrap(),
..Default::default()
})
.partition_template(PartitionTemplate {
parts: vec![TemplatePart::Column("part".to_string())],
})
.build()
.await;
let db = Arc::new(test_db.db);
// ==================== do: create chunks ====================
let table_name = "cpu";
// 1: preserved
let partition_key = "part_a";
write_lp(&db, "cpu,part=a row=10,selector=0i 10").await;
write_lp(&db, "cpu,part=a row=11,selector=1i 11").await;
let chunk_id = {
let mb_chunk = db
.rollover_partition(table_name, partition_key)
.await
.unwrap()
.unwrap();
mb_chunk.id()
};
db.move_chunk_to_read_buffer(table_name, partition_key, chunk_id)
.await
.unwrap();
db.persist_partition(
table_name,
partition_key,
Instant::now() + Duration::from_secs(1),
)
.await
.unwrap();
// 2: RUB
let partition_key = "part_b";
write_lp(&db, "cpu,part=b row=20,selector=0i 20").await;
write_lp(&db, "cpu,part=b row=21,selector=1i 21").await;
let chunk_id = {
let mb_chunk = db
.rollover_partition(table_name, partition_key)
.await
.unwrap()
.unwrap();
mb_chunk.id()
};
db.move_chunk_to_read_buffer(table_name, partition_key, chunk_id)
.await
.unwrap();
// 3: MUB
let _partition_key = "part_c";
write_lp(&db, "cpu,part=c row=30,selector=0i 30").await;
write_lp(&db, "cpu,part=c row=31,selector=1i 31").await;
// 4: preserved and unloaded
let partition_key = "part_d";
write_lp(&db, "cpu,part=d row=40,selector=0i 40").await;
write_lp(&db, "cpu,part=d row=41,selector=1i 41").await;
let chunk_id = {
let mb_chunk = db
.rollover_partition(table_name, partition_key)
.await
.unwrap()
.unwrap();
mb_chunk.id()
};
db.move_chunk_to_read_buffer(table_name, partition_key, chunk_id)
.await
.unwrap();
let chunk_id = {
let chunk = db
.persist_partition(
table_name,
partition_key,
Instant::now() + Duration::from_secs(1),
)
.await
.unwrap();
chunk.id()
};
db.unload_read_buffer(table_name, partition_key, chunk_id)
.unwrap();
// ==================== do: delete ====================
let expr = col("selector").eq(lit(1i64));
let pred = Arc::new(
PredicateBuilder::new()
.table("cpu")
.timestamp_range(0, 1_000)
.add_expr(expr)
.build(),
);
db.delete("cpu", Arc::clone(&pred)).await.unwrap();
// ==================== do: preserve another partition ====================
let partition_key = "part_b";
db.persist_partition(
table_name,
partition_key,
Instant::now() + Duration::from_secs(1),
)
.await
.unwrap();
// ==================== check: delete predicates ====================
let closure_check_delete_predicates = |db: &Db| {
for chunk in db.catalog.chunks() {
let chunk = chunk.read();
if chunk.addr().partition_key.as_ref() == "part_b" {
// Strictly speaking not required because the chunk was persisted AFTER the delete predicate was
// registered so we can get away with materializing it during persistence.
continue;
}
let predicates = chunk.delete_predicates();
assert_eq!(predicates.len(), 1);
assert_eq!(predicates[0].as_ref(), pred.as_ref());
}
};
closure_check_delete_predicates(&db);
// ==================== check: query ====================
let expected = vec![
"+------+-----+----------+--------------------------------+",
"| part | row | selector | time |",
"+------+-----+----------+--------------------------------+",
"| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |",
"| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |",
"| c | 30 | 0 | 1970-01-01T00:00:00.000000030Z |",
"| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |",
"+------+-----+----------+--------------------------------+",
];
let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await;
assert_batches_sorted_eq!(&expected, &batches);
// ==================== do: re-load DB ====================
// Re-create database with same store, serverID, and DB name
drop(db);
let test_db = TestDb::builder()
.object_store(Arc::clone(&object_store))
.server_id(server_id)
.db_name(db_name)
.build()
.await;
let db = Arc::new(test_db.db);
// ==================== check: delete predicates ====================
closure_check_delete_predicates(&db);
// ==================== check: query ====================
// NOTE: partition "c" is gone here because it was not written to object store
let expected = vec![
"+------+-----+----------+--------------------------------+",
"| part | row | selector | time |",
"+------+-----+----------+--------------------------------+",
"| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |",
"| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |",
"| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |",
"+------+-----+----------+--------------------------------+",
];
let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await;
assert_batches_sorted_eq!(&expected, &batches);
// ==================== do: remove checkpoint files ====================
let files = db
.iox_object_store
.catalog_transaction_files()
.await
.unwrap()
.try_concat()
.await
.unwrap();
let mut deleted_one = false;
for file in files {
if file.is_checkpoint() {
db.iox_object_store
.delete_catalog_transaction_file(&file)
.await
.unwrap();
deleted_one = true;
}
}
assert!(deleted_one);
// ==================== do: re-load DB ====================
// Re-create database with same store, serverID, and DB name
drop(db);
let test_db = TestDb::builder()
.object_store(Arc::clone(&object_store))
.server_id(server_id)
.db_name(db_name)
.build()
.await;
let db = Arc::new(test_db.db);
// ==================== check: delete predicates ====================
closure_check_delete_predicates(&db);
// ==================== check: query ====================
// NOTE: partition "c" is gone here because it was not written to object store
let expected = vec![
"+------+-----+----------+--------------------------------+",
"| part | row | selector | time |",
"+------+-----+----------+--------------------------------+",
"| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |",
"| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |",
"| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |",
"+------+-----+----------+--------------------------------+",
];
let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await;
assert_batches_sorted_eq!(&expected, &batches);
}
#[tokio::test]
async fn table_wide_schema_enforcement() {
// need a table with a partition template that uses a tag column, so that we can easily write to different partitions

View File

@ -101,7 +101,7 @@ pub(crate) fn compact_chunks(
let (schema, plan) =
ReorgPlanner::new().compact_plan(schema, query_chunks.iter().map(Arc::clone), key)?;
let physical_plan = ctx.prepare_plan(&plan)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
let stream = ctx.execute_stream(physical_plan).await?;
let rb_chunk = collect_rub(stream, &addr, metric_registry.as_ref())
.await?

View File

@ -53,7 +53,7 @@ pub fn move_chunk_to_read_buffer(
let (schema, plan) =
ReorgPlanner::new().compact_plan(schema, query_chunks.iter().map(Arc::clone), key)?;
let physical_plan = ctx.prepare_plan(&plan)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
let stream = ctx.execute_stream(physical_plan).await?;
let rb_chunk = collect_rub(
stream,

View File

@ -108,7 +108,7 @@ where
flush_timestamp,
)?;
let physical_plan = ctx.prepare_plan(&plan)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
assert_eq!(
physical_plan.output_partitioning().partition_count(),
2,

View File

@ -165,6 +165,10 @@ where
};
transaction.add_parquet(&info);
// We do NOT need to report delete predicates here because they were either materialized during the write
// query above or if they were added after the query they where added to a transaction (or checkpoint)
// because the chunk here was marked as "persisting".
// preserved commit
let ckpt_handle = transaction.commit().await.context(CommitError)?;

View File

@ -8,9 +8,8 @@ use parquet_file::{
catalog::{
core::PreservedCatalog,
interface::{
CatalogParquetInfo, CatalogState, CatalogStateAddError,
CatalogStateDeletePredicateError, CatalogStateRemoveError, ChunkAddrWithoutDatabase,
ChunkCreationFailed,
CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError,
ChunkAddrWithoutDatabase, ChunkCreationFailed,
},
},
chunk::{ChunkMetrics as ParquetChunkMetrics, ParquetChunk},
@ -227,7 +226,10 @@ impl CatalogState for Loader {
.map_err(|e| Box::new(e) as _)
.context(SchemaError { path: info.path })?;
// Delete predicates are loaded explicitely via `CatalogState::delete_predicates` AFTER the chunk is added, so
// we leave this list empty (for now).
let delete_predicates: Vec<Arc<Predicate>> = vec![];
partition.insert_object_store_only_chunk(
iox_md.chunk_id,
parquet_chunk,
@ -276,21 +278,20 @@ impl CatalogState for Loader {
&mut self,
predicate: Arc<Predicate>,
chunks: Vec<ChunkAddrWithoutDatabase>,
) -> Result<(), CatalogStateDeletePredicateError> {
) {
for addr in chunks {
let (chunk, _order) = self
.catalog
.chunk(&addr.table_name, &addr.partition_key, addr.chunk_id)
.map_err(|_| CatalogStateDeletePredicateError::ChunkDoesNotExist {
chunk: addr.clone(),
})?;
let mut chunk = chunk.write();
chunk
.add_delete_predicate(Arc::clone(&predicate))
.expect("this should not fail for persisted chunks");
// The chunk might not exist because it might have been marked as "persisting" but either the persistence
// action never finished before the server restarted or failed.
if let Ok((chunk, _order)) =
self.catalog
.chunk(&addr.table_name, &addr.partition_key, addr.chunk_id)
{
let mut chunk = chunk.write();
chunk
.add_delete_predicate(Arc::clone(&predicate))
.expect("this should not fail for persisted chunks");
}
}
Ok(())
}
}

View File

@ -827,7 +827,7 @@ mod tests {
let planner = SqlQueryPlanner::default();
let ctx = db.new_query_context(None);
let physical_plan = match planner.query(query, &ctx) {
let physical_plan = match planner.query(query, &ctx).await {
Ok(physical_plan) => physical_plan,
err if use_assert => {
err.unwrap();

View File

@ -14,6 +14,7 @@ use arrow::{
error::Result,
record_batch::RecordBatch,
};
use async_trait::async_trait;
use datafusion::{
catalog::schema::SchemaProvider,
datasource::TableProvider,
@ -124,6 +125,7 @@ where
inner: T,
}
#[async_trait]
impl<T> TableProvider for SystemTableProvider<T>
where
T: IoxSystemTable + 'static,
@ -136,7 +138,7 @@ where
self.inner.schema()
}
fn scan(
async fn scan(
&self,
projection: &Option<Vec<usize>>,
_batch_size: usize,

View File

@ -2381,7 +2381,7 @@ mod tests {
let planner = SqlQueryPlanner::default();
let ctx = db.new_query_context(None);
let physical_plan = planner.query(query, &ctx).unwrap();
let physical_plan = planner.query(query, &ctx).await.unwrap();
ctx.collect(physical_plan).await.unwrap()
}

View File

@ -328,7 +328,7 @@ mod tests {
loop {
let planner = SqlQueryPlanner::default();
let ctx = db.new_query_context(None);
let physical_plan = planner.query(query, &ctx);
let physical_plan = planner.query(query, &ctx).await;
if physical_plan.is_ok() {
break;

View File

@ -14,6 +14,7 @@ use server::{
use snafu::{ResultExt, Snafu};
use std::{convert::TryFrom, net::SocketAddr, sync::Arc};
use trace::TraceCollector;
use trace_http::ctx::TraceHeaderParser;
mod http;
mod jemalloc;
@ -188,9 +189,12 @@ pub async fn main(config: Config) -> Result<()> {
let grpc_listener = grpc_listener(config.grpc_bind_address).await?;
let http_listener = http_listener(config.http_bind_address).await?;
let trace_collector = config.tracing_config.build().context(Tracing)?;
let async_exporter = config.tracing_config.build().context(Tracing)?;
let trace_collector = async_exporter
.clone()
.map(|x| -> Arc<dyn TraceCollector> { x });
serve(
let r = serve(
config,
application,
grpc_listener,
@ -198,7 +202,14 @@ pub async fn main(config: Config) -> Result<()> {
trace_collector,
app_server,
)
.await
.await;
if let Some(async_exporter) = async_exporter {
if let Err(e) = async_exporter.drain().await {
error!(%e, "error draining trace exporter");
}
}
r
}
async fn grpc_listener(addr: SocketAddr) -> Result<tokio::net::TcpListener> {
@ -236,12 +247,21 @@ async fn serve(
// Construct a token to trigger shutdown of API services
let frontend_shutdown = tokio_util::sync::CancellationToken::new();
let trace_header_parser = TraceHeaderParser::new()
.with_jaeger_trace_context_header_name(
config
.tracing_config
.traces_jaeger_trace_context_header_name,
)
.with_jaeger_debug_name(config.tracing_config.traces_jaeger_debug_name);
// Construct and start up gRPC server
let grpc_server = rpc::serve(
grpc_listener,
Arc::clone(&application),
Arc::clone(&app_server),
trace_header_parser.clone(),
trace_collector.clone(),
frontend_shutdown.clone(),
config.initial_serving_state.into(),
@ -258,6 +278,7 @@ async fn serve(
Arc::clone(&app_server),
frontend_shutdown.clone(),
max_http_request_size,
trace_header_parser,
trace_collector,
)
.fuse();
@ -368,7 +389,7 @@ mod tests {
use tokio::task::JoinHandle;
use trace::span::{Span, SpanStatus};
use trace::RingBufferTraceCollector;
use trace_exporters::otel::{OtelExporter, TestOtelExporter};
use trace_exporters::export::{AsyncExporter, TestAsyncExporter};
fn test_config(server_id: Option<u32>) -> Config {
let mut config = Config::from_iter(&[
@ -783,9 +804,9 @@ mod tests {
}
#[tokio::test]
async fn test_otel_exporter() {
async fn test_async_exporter() {
let (sender, mut receiver) = tokio::sync::mpsc::channel(20);
let collector = Arc::new(OtelExporter::new(TestOtelExporter::new(sender)));
let collector = Arc::new(AsyncExporter::new(TestAsyncExporter::new(sender)));
let (addr, server, join) = tracing_server(&collector).await;
let conn = jaeger_client(addr, "34f8495:30e34:0:1").await;
@ -794,15 +815,14 @@ mod tests {
.await
.unwrap();
collector.shutdown();
collector.join().await.unwrap();
collector.drain().await.unwrap();
server.shutdown();
join.await.unwrap().unwrap();
let span = receiver.recv().await.unwrap();
assert_eq!(span.span_context.trace_id().to_u128(), 0x34f8495);
assert_eq!(span.parent_span_id.to_u64(), 0x30e34);
assert_eq!(span.ctx.trace_id.get(), 0x34f8495);
assert_eq!(span.ctx.parent_span_id.unwrap().get(), 0x30e34);
}
fn make_rules(db_name: impl Into<String>) -> ProvidedDatabaseRules {

View File

@ -38,6 +38,7 @@ use hyper::{http::HeaderValue, Body, Method, Request, Response, StatusCode};
use observability_deps::tracing::{self, debug, error};
use serde::Deserialize;
use snafu::{OptionExt, ResultExt, Snafu};
use trace_http::ctx::TraceHeaderParser;
use crate::influxdb_ioxd::http::metrics::LineProtocolMetrics;
use hyper::server::conn::{AddrIncoming, AddrStream};
@ -777,14 +778,15 @@ pub async fn serve<M>(
app_server: Arc<AppServer<M>>,
shutdown: CancellationToken,
max_request_size: usize,
trace_header_parser: TraceHeaderParser,
trace_collector: Option<Arc<dyn TraceCollector>>,
) -> Result<(), hyper::Error>
where
M: ConnectionManager + Send + Sync + Debug + 'static,
{
let metric_registry = Arc::clone(application.metric_registry());
let trace_layer = TraceLayer::new(metric_registry, trace_collector, false);
let trace_layer = TraceLayer::new(trace_header_parser, metric_registry, trace_collector, false);
let lp_metrics = Arc::new(LineProtocolMetrics::new(
application.metric_registry().as_ref(),
));
@ -1482,12 +1484,16 @@ mod tests {
let addr = AddrIncoming::bind(&bind_addr).expect("failed to bind server");
let server_url = format!("http://{}", addr.local_addr());
let trace_header_parser = trace_http::ctx::TraceHeaderParser::new()
.with_jaeger_trace_context_header_name("uber-trace-id");
tokio::task::spawn(serve(
addr,
application,
server,
CancellationToken::new(),
TEST_MAX_REQUEST_SIZE,
trace_header_parser,
trace_collector,
));
println!("Started server at {}", server_url);

View File

@ -40,7 +40,7 @@ impl Planner {
let ctx = self.ctx.child_ctx("sql");
self.ctx
.run(async move { planner.query(&query, &ctx) })
.run(async move { planner.query(&query, &ctx).await })
.await
}

View File

@ -7,6 +7,7 @@ use tokio_stream::wrappers::TcpListenerStream;
use tokio_util::sync::CancellationToken;
use tonic::codegen::InterceptedService;
use tonic::transport::NamedService;
use trace_http::ctx::TraceHeaderParser;
use crate::influxdb_ioxd::serving_readiness::ServingReadiness;
use server::{ApplicationState, ConnectionManager, Server};
@ -88,6 +89,7 @@ pub async fn serve<M>(
socket: TcpListener,
application: Arc<ApplicationState>,
server: Arc<Server<M>>,
trace_header_parser: TraceHeaderParser,
trace_collector: Option<Arc<dyn TraceCollector>>,
shutdown: CancellationToken,
serving_readiness: ServingReadiness,
@ -105,6 +107,7 @@ where
let builder = tonic::transport::Server::builder();
let mut builder = builder.layer(trace_http::tower::TraceLayer::new(
trace_header_parser,
Arc::clone(application.metric_registry()),
trace_collector,
true,

View File

@ -2277,8 +2277,11 @@ mod tests {
bind_addr
);
let trace_header_parser = trace_http::ctx::TraceHeaderParser::new();
let router = tonic::transport::Server::builder()
.layer(trace_http::tower::TraceLayer::new(
trace_header_parser,
Arc::clone(&test_storage.metric_registry),
None,
true,

View File

@ -6,18 +6,6 @@ use crate::common::{
use futures::TryStreamExt;
use generated_types::{storage_client::StorageClient, ReadFilterRequest};
// cfg at this level so IDE can resolve code even when jaeger feature is not active
#[cfg(feature = "jaeger")]
fn run_test() -> bool {
true
}
#[cfg(not(feature = "jaeger"))]
fn run_test() -> bool {
println!("Skipping test because jaeger feature not enabled");
false
}
async fn setup() -> (UdpCapture, ServerFixture) {
let udp_capture = UdpCapture::new().await;
@ -25,7 +13,8 @@ async fn setup() -> (UdpCapture, ServerFixture) {
.with_env("TRACES_EXPORTER", "jaeger")
.with_env("TRACES_EXPORTER_JAEGER_AGENT_HOST", udp_capture.ip())
.with_env("TRACES_EXPORTER_JAEGER_AGENT_PORT", udp_capture.port())
.with_client_header("uber-trace-id", "4:3:2:1");
.with_env("JAEGER_TRACE_CONTEXT_HEADER_NAME", "custom-trace-header")
.with_client_header("custom-trace-header", "4:3:2:1");
let server_fixture = ServerFixture::create_single_use_with_config(test_config).await;
@ -37,14 +26,7 @@ async fn setup() -> (UdpCapture, ServerFixture) {
(udp_capture, server_fixture)
}
#[tokio::test]
pub async fn test_tracing_sql() {
if !run_test() {
return;
}
let (udp_capture, server_fixture) = setup().await;
async fn run_sql_query(server_fixture: &ServerFixture) {
let scenario = Scenario::new();
scenario
.create_database(&mut server_fixture.management_client())
@ -61,6 +43,12 @@ pub async fn test_tracing_sql() {
.unwrap();
collect_query(query_results).await;
}
#[tokio::test]
pub async fn test_tracing_sql() {
let (udp_capture, server_fixture) = setup().await;
run_sql_query(&server_fixture).await;
// "shallow" packet inspection and verify the UDP server got
// something that had some expected results (maybe we could
@ -76,10 +64,6 @@ pub async fn test_tracing_sql() {
#[tokio::test]
pub async fn test_tracing_storage_api() {
if !run_test() {
return;
}
let (udp_capture, server_fixture) = setup().await;
let scenario = Scenario::new();
@ -120,3 +104,36 @@ pub async fn test_tracing_storage_api() {
// wait for the UDP server to shutdown
udp_capture.stop().await
}
#[tokio::test]
pub async fn test_tracing_create_trace() {
let udp_capture = UdpCapture::new().await;
let test_config = TestConfig::new()
.with_env("TRACES_EXPORTER", "jaeger")
.with_env("TRACES_EXPORTER_JAEGER_AGENT_HOST", udp_capture.ip())
.with_env("TRACES_EXPORTER_JAEGER_AGENT_PORT", udp_capture.port())
// setup a custom debug name (to ensure it gets plumbed through)
.with_env("JAEGER_DEBUG_NAME", "force-trace")
.with_client_header("force-trace", "some-debug-id");
let server_fixture = ServerFixture::create_single_use_with_config(test_config).await;
let mut management_client = server_fixture.management_client();
management_client.update_server_id(1).await.unwrap();
server_fixture.wait_server_initialized().await;
run_sql_query(&server_fixture).await;
// "shallow" packet inspection and verify the UDP server got
// something that had some expected results (maybe we could
// eventually verify the payload here too)
udp_capture.wait_for(|m| m.to_string().contains("IOxReadFilterNode"));
// debugging assistance
//println!("Traces received (1):\n\n{:#?}", udp_capture.messages());
// wait for the UDP server to shutdown
udp_capture.stop().await
}

View File

@ -20,6 +20,7 @@ pub mod span;
/// A TraceCollector is a sink for completed `Span`
pub trait TraceCollector: std::fmt::Debug + Send + Sync {
/// Exports the specified `Span` for collection by the sink
fn export(&self, span: Span);
}

View File

@ -87,6 +87,7 @@ pub enum MetaValue {
String(Cow<'static, str>),
Float(f64),
Int(i64),
Bool(bool),
}
impl From<&'static str> for MetaValue {

View File

@ -11,16 +11,10 @@ async-trait = "0.1"
chrono = { version = "0.4" }
futures = "0.3"
observability_deps = { path = "../observability_deps" }
opentelemetry = { version = "0.16" }
opentelemetry-jaeger = { version = "0.15", optional = true }
snafu = "0.6"
structopt = { version = "0.3.23" }
thrift = { version = "0.13.0" }
tokio = { version = "1.11", features = ["macros", "time", "sync", "rt"] }
tokio-util = { version = "0.6.3" }
trace = { path = "../trace" }
[dev-dependencies]
[features]
default = []
jaeger = ["opentelemetry-jaeger"]

54
trace_exporters/README.md Normal file
View File

@ -0,0 +1,54 @@
# Trace Exporters
## Regenerating Jaeger Thrift
_The instructions below use docker, but this is optional._
_Depending on your setup there may be permissions complications that require using`-u`_
Startup a Debian bullseye image
```
docker run -it -v $PWD:/out debian:bullseye-slim
```
Install the thrift-compiler
```
$ apt-get update
$ apt-get install thrift-compiler wget
```
Verify the version of the compiler matches the version of `thrift` in [Cargo.toml](./Cargo.toml)
```
$ thrift --version
Thrift version 0.13.0
```
Get the IDL definition
```
$ wget https://raw.githubusercontent.com/jaegertracing/jaeger-idl/master/thrift/jaeger.thrift https://raw.githubusercontent.com/jaegertracing/jaeger-idl/master/thrift/zipkincore.thrift https://raw.githubusercontent.com/jaegertracing/jaeger-idl/master/thrift/agent.thrift
```
Generate the code
```
$ thrift --out /out/src/thrift --gen rs agent.thrift
$ thrift --out /out/src/thrift --gen rs jaeger.thrift
$ thrift --out /out/src/thrift --gen rs zipkincore.thrift
```
Patch up imports
```
sed -i 's/use jaeger;/use super::jaeger;/g' /out/src/thrift/agent.rs
sed -i 's/use zipkincore;/use super::zipkincore;/g' /out/src/thrift/agent.rs
```
Remove the clippy line
```
#![cfg_attr(feature = "cargo-clippy", allow(too_many_arguments, type_complexity))]
```

View File

@ -0,0 +1,162 @@
use std::sync::Arc;
use async_trait::async_trait;
use futures::{
future::{BoxFuture, Shared},
FutureExt, TryFutureExt,
};
use tokio::sync::mpsc;
use tokio::task::JoinError;
use observability_deps::tracing::{error, info, warn};
use trace::{span::Span, TraceCollector};
/// Size of the exporter buffer
const CHANNEL_SIZE: usize = 1000;
/// An `AsyncExport` is a batched async version of `trace::TraceCollector`
#[async_trait]
pub trait AsyncExport: Send + 'static {
async fn export(&mut self, span: Vec<Span>);
}
/// `AsyncExporter` wraps a `AsyncExport` and sinks spans to it
///
/// In order to do this it spawns a background worker that pulls messages
/// off a queue and writes them to the `AsyncExport`.
///
/// If this worker cannot keep up, and this queue fills up, spans will
/// be dropped and warnings logged
///
/// Note: Currently this does not batch spans (#2392)
#[derive(Debug)]
pub struct AsyncExporter {
join: Shared<BoxFuture<'static, Result<(), Arc<JoinError>>>>,
/// Communication queue with the background worker
///
/// Sending None triggers termination
sender: tokio::sync::mpsc::Sender<Option<Span>>,
}
impl AsyncExporter {
/// Creates a new `AsyncExporter`
pub fn new<T: AsyncExport>(collector: T) -> Self {
let (sender, receiver) = mpsc::channel(CHANNEL_SIZE);
let handle = tokio::spawn(background_worker(collector, receiver));
let join = handle.map_err(Arc::new).boxed().shared();
Self { join, sender }
}
/// Triggers shutdown of this `AsyncExporter` and waits until all in-flight
/// spans have been published to the `AsyncExport`
pub async fn drain(&self) -> Result<(), Arc<JoinError>> {
info!("batched exporter shutting down");
let _ = self.sender.send(None).await;
self.join.clone().await
}
}
impl TraceCollector for AsyncExporter {
fn export(&self, span: Span) {
use mpsc::error::TrySendError;
match self.sender.try_send(Some(span)) {
Ok(_) => {
//TODO: Increment some metric (#2613)
}
Err(TrySendError::Full(_)) => {
warn!("exporter cannot keep up, dropping spans")
}
Err(TrySendError::Closed(_)) => {
warn!("background worker shutdown")
}
}
}
}
async fn background_worker<T: AsyncExport>(
mut exporter: T,
mut receiver: mpsc::Receiver<Option<Span>>,
) {
loop {
match receiver.recv().await {
Some(Some(span)) => exporter.export(vec![span]).await,
Some(None) => {
info!("async exporter shut down");
break;
}
None => {
error!("sender-side of async exporter dropped without waiting for shut down");
break;
}
}
}
}
/// An `AsyncExporter` that sinks writes to a tokio mpsc channel.
///
/// Intended for testing ONLY
///
#[derive(Debug)]
pub struct TestAsyncExporter {
channel: mpsc::Sender<Span>,
}
impl TestAsyncExporter {
pub fn new(channel: mpsc::Sender<Span>) -> Self {
Self { channel }
}
}
#[async_trait]
impl AsyncExport for TestAsyncExporter {
async fn export(&mut self, batch: Vec<Span>) {
for span in batch {
self.channel.send(span).await.expect("channel closed")
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use trace::ctx::SpanContext;
#[tokio::test]
async fn test_exporter() {
let (sender, mut receiver) = mpsc::channel(10);
let exporter = AsyncExporter::new(TestAsyncExporter::new(sender));
let root = SpanContext::new(Arc::new(trace::LogTraceCollector::new()));
let s1 = root.child("foo");
let s2 = root.child("bar");
exporter.export(s1.clone());
exporter.export(s2.clone());
exporter.export(s2.clone());
// Drain should wait for all published spans to be flushed
exporter.drain().await.unwrap();
let r1 = receiver.recv().await.unwrap();
let r2 = receiver.recv().await.unwrap();
let r3 = receiver.recv().await.unwrap();
// Should not be fatal despite exporter having been shutdown
exporter.export(s2.clone());
assert_eq!(root.span_id.get(), r1.ctx.parent_span_id.unwrap().get());
assert_eq!(s1.ctx.span_id.get(), r1.ctx.span_id.get());
assert_eq!(s1.ctx.trace_id.get(), r1.ctx.trace_id.get());
assert_eq!(root.span_id.get(), r2.ctx.parent_span_id.unwrap().get());
assert_eq!(s2.ctx.span_id.get(), r2.ctx.span_id.get());
assert_eq!(s2.ctx.trace_id.get(), r2.ctx.trace_id.get());
assert_eq!(root.span_id.get(), r3.ctx.parent_span_id.unwrap().get());
assert_eq!(s2.ctx.span_id.get(), r3.ctx.span_id.get());
assert_eq!(s2.ctx.trace_id.get(), r3.ctx.trace_id.get());
}
}

View File

@ -0,0 +1,307 @@
use std::net::{SocketAddr, ToSocketAddrs, UdpSocket};
use async_trait::async_trait;
use observability_deps::tracing::{error, info};
use trace::span::Span;
use crate::export::AsyncExport;
use crate::thrift::agent::{AgentSyncClient, TAgentSyncClient};
use crate::thrift::jaeger;
use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol};
mod span;
/// `JaegerAgentExporter` receives span data and writes it over UDP to a local jaeger agent
///
/// Note: will drop data if the UDP socket would block
pub struct JaegerAgentExporter {
/// The name of the service
service_name: String,
/// The agent client that encodes messages
client:
AgentSyncClient<TCompactInputProtocol<NoopReader>, TCompactOutputProtocol<MessageWriter>>,
/// Spans should be assigned a sequential sequence number
/// to allow jaeger to better detect dropped spans
next_sequence: i64,
}
impl JaegerAgentExporter {
pub fn new<E: ToSocketAddrs + std::fmt::Display>(
service_name: String,
agent_endpoint: E,
) -> super::Result<Self> {
info!(%agent_endpoint, %service_name, "Creating jaeger tracing exporter");
let remote_addr = agent_endpoint.to_socket_addrs()?.next().ok_or_else(|| {
super::Error::ResolutionError {
address: agent_endpoint.to_string(),
}
})?;
let local_addr: SocketAddr = if remote_addr.is_ipv4() {
"0.0.0.0:0"
} else {
"[::]:0"
}
.parse()
.unwrap();
let socket = UdpSocket::bind(local_addr)?;
socket.set_nonblocking(true)?;
socket.connect(remote_addr)?;
let client = AgentSyncClient::new(
TCompactInputProtocol::new(NoopReader::default()),
TCompactOutputProtocol::new(MessageWriter::new(socket)),
);
Ok(Self {
service_name,
client,
next_sequence: 0,
})
}
fn make_batch(&mut self, spans: Vec<Span>) -> jaeger::Batch {
let seq_no = Some(self.next_sequence);
self.next_sequence += 1;
jaeger::Batch {
process: jaeger::Process {
service_name: self.service_name.clone(),
tags: None,
},
spans: spans.into_iter().map(Into::into).collect(),
seq_no,
stats: None,
}
}
}
#[async_trait]
impl AsyncExport for JaegerAgentExporter {
async fn export(&mut self, spans: Vec<Span>) {
let batch = self.make_batch(spans);
if let Err(e) = self.client.emit_batch(batch) {
error!(%e, "error writing batch to jaeger agent")
}
}
}
/// `NoopReader` is a `std::io::Read` that never returns any data
#[derive(Debug, Default)]
struct NoopReader {}
impl std::io::Read for NoopReader {
fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
Ok(0)
}
}
/// A `MessageWriter` only writes entire message payloads to the provided UDP socket
///
/// If the UDP socket would block, drops the packet
struct MessageWriter {
buf: Vec<u8>,
socket: UdpSocket,
}
impl MessageWriter {
fn new(socket: UdpSocket) -> Self {
Self {
buf: vec![],
socket,
}
}
}
impl std::io::Write for MessageWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.buf.extend_from_slice(buf);
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
let message_len = self.buf.len();
let r = self.socket.send(&self.buf);
self.buf.clear();
match r {
Ok(written) => {
if written != message_len {
// In the event a message is truncated, there isn't an obvious way to recover
//
// The Thrift protocol is normally used on top of a reliable stream,
// e.g. TCP, and it is a bit of a hack to send it over UDP
//
// Jaeger requires that each thrift Message is encoded in exactly one UDP
// packet, as this ensures it either arrives in its entirety or not at all
//
// If for whatever reason the packet is truncated, the agent will fail to
// to decode it, likely due to a missing stop-field, and discard it
error!(%written, %message_len, "jaeger agent exporter failed to write message as single UDP packet");
}
Ok(())
}
Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => {
error!("jaeger agent exporter would have blocked - dropping message");
Ok(())
}
Err(e) => Err(e),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::thrift::agent::{AgentSyncHandler, AgentSyncProcessor};
use chrono::{TimeZone, Utc};
use std::sync::{Arc, Mutex};
use thrift::server::TProcessor;
use thrift::transport::TBufferChannel;
use trace::ctx::{SpanContext, SpanId, TraceId};
use trace::span::{SpanEvent, SpanStatus};
struct TestHandler {
batches: Arc<Mutex<Vec<jaeger::Batch>>>,
}
impl AgentSyncHandler for TestHandler {
fn handle_emit_zipkin_batch(
&self,
_spans: Vec<crate::thrift::zipkincore::Span>,
) -> thrift::Result<()> {
unimplemented!()
}
fn handle_emit_batch(&self, batch: jaeger::Batch) -> thrift::Result<()> {
self.batches.lock().unwrap().push(batch);
Ok(())
}
}
/// Wraps a UdpSocket and a buffer the size of the max UDP datagram and provides
/// `std::io::Read` on this buffer's contents, ensuring that reads are not truncated
struct Reader {
socket: UdpSocket,
buffer: Box<[u8; 65535]>,
idx: usize,
len: usize,
}
impl Reader {
pub fn new(socket: UdpSocket) -> Self {
Self {
socket,
buffer: Box::new([0; 65535]),
idx: 0,
len: 0,
}
}
}
impl std::io::Read for Reader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.idx == self.len {
self.idx = 0;
self.len = self.socket.recv(self.buffer.as_mut())?;
}
let to_read = buf.len().min(self.len - self.idx);
buf.copy_from_slice(&self.buffer[self.idx..(self.idx + to_read)]);
self.idx += to_read;
Ok(to_read)
}
}
#[tokio::test]
async fn test_jaeger() {
let server = UdpSocket::bind("0.0.0.0:0").unwrap();
server
.set_read_timeout(Some(std::time::Duration::from_secs(1)))
.unwrap();
let address = server.local_addr().unwrap();
let mut exporter = JaegerAgentExporter::new("service_name".to_string(), address).unwrap();
let batches = Arc::new(Mutex::new(vec![]));
let mut processor_input = TCompactInputProtocol::new(Reader::new(server));
let mut processor_output = TCompactOutputProtocol::new(TBufferChannel::with_capacity(0, 0));
let processor = AgentSyncProcessor::new(TestHandler {
batches: Arc::clone(&batches),
});
let ctx = SpanContext {
trace_id: TraceId::new(43434).unwrap(),
parent_span_id: None,
span_id: SpanId::new(3495993).unwrap(),
collector: None,
};
let mut span = ctx.child("foo");
span.status = SpanStatus::Ok;
span.events = vec![SpanEvent {
time: Utc.timestamp_nanos(200000),
msg: "hello".into(),
}];
span.start = Some(Utc.timestamp_nanos(100000));
span.end = Some(Utc.timestamp_nanos(300000));
exporter.export(vec![span.clone(), span.clone()]).await;
exporter.export(vec![span.clone()]).await;
processor
.process(&mut processor_input, &mut processor_output)
.unwrap();
processor
.process(&mut processor_input, &mut processor_output)
.unwrap();
let batches = batches.lock().unwrap();
assert_eq!(batches.len(), 2);
let b1 = &batches[0];
assert_eq!(b1.spans.len(), 2);
assert_eq!(b1.process.service_name.as_str(), "service_name");
assert_eq!(b1.seq_no.unwrap(), 0);
let b2 = &batches[1];
assert_eq!(b2.spans.len(), 1);
assert_eq!(b2.process.service_name.as_str(), "service_name");
assert_eq!(b2.seq_no.unwrap(), 1);
let b1_s0 = &b1.spans[0];
assert_eq!(b1_s0, &b1.spans[1]);
assert_eq!(b1_s0, &b2.spans[0]);
assert_eq!(b1_s0.span_id, span.ctx.span_id.get() as i64);
assert_eq!(
b1_s0.parent_span_id,
span.ctx.parent_span_id.unwrap().get() as i64
);
// microseconds not nanoseconds
assert_eq!(b1_s0.start_time, 100);
assert_eq!(b1_s0.duration, 200);
let logs = b1_s0.logs.as_ref().unwrap();
assert_eq!(logs.len(), 1);
assert_eq!(logs[0].timestamp, 200);
assert_eq!(logs[0].fields.len(), 1);
assert_eq!(logs[0].fields[0].key.as_str(), "event");
assert_eq!(logs[0].fields[0].v_str.as_ref().unwrap().as_str(), "hello");
let tags = b1_s0.tags.as_ref().unwrap();
assert_eq!(tags.len(), 1);
assert_eq!(tags[0].key.as_str(), "ok");
assert!(tags[0].v_bool.unwrap());
}
#[test]
fn test_resolve() {
JaegerAgentExporter::new("service_name".to_string(), "localhost:8082").unwrap();
}
}

View File

@ -0,0 +1,117 @@
/// Contains the conversion logic from a `trace::span::Span` to `thrift::jaeger::Span`
use crate::thrift::jaeger;
use trace::span::{MetaValue, Span, SpanEvent, SpanStatus};
impl From<Span> for jaeger::Span {
fn from(mut s: Span) -> Self {
let trace_id = s.ctx.trace_id.get();
let trace_id_high = (trace_id >> 64) as i64;
let trace_id_low = trace_id as i64;
// A parent span id of 0 indicates no parent span ID (span IDs are non-zero)
let parent_span_id = s.ctx.parent_span_id.map(|id| id.get()).unwrap_or_default() as i64;
let (start_time, duration) = match (s.start, s.end) {
(Some(start), Some(end)) => (
start.timestamp_nanos() / 1000,
(end - start).num_microseconds().expect("no overflow"),
),
(Some(start), _) => (start.timestamp_nanos() / 1000, 0),
_ => (0, 0),
};
// These don't appear to be standardised, however, the jaeger UI treats
// the presence of an "error" tag as indicating an error
match s.status {
SpanStatus::Ok => {
s.metadata
.entry("ok".into())
.or_insert(MetaValue::Bool(true));
}
SpanStatus::Err => {
s.metadata
.entry("error".into())
.or_insert(MetaValue::Bool(true));
}
SpanStatus::Unknown => {}
}
let tags = match s.metadata.is_empty() {
true => None,
false => Some(
s.metadata
.into_iter()
.map(|(name, value)| tag_from_meta(name.to_string(), value))
.collect(),
),
};
let logs = match s.events.is_empty() {
true => None,
false => Some(s.events.into_iter().map(Into::into).collect()),
};
Self {
trace_id_low,
trace_id_high,
span_id: s.ctx.span_id.get() as i64,
parent_span_id,
operation_name: s.name.to_string(),
references: None,
flags: 0,
start_time,
duration,
tags,
logs,
}
}
}
impl From<SpanEvent> for jaeger::Log {
fn from(event: SpanEvent) -> Self {
Self {
timestamp: event.time.timestamp_nanos() / 1000,
fields: vec![jaeger::Tag {
key: "event".to_string(),
v_type: jaeger::TagType::String,
v_str: Some(event.msg.to_string()),
v_double: None,
v_bool: None,
v_long: None,
v_binary: None,
}],
}
}
}
fn tag_from_meta(key: String, value: MetaValue) -> jaeger::Tag {
let mut tag = jaeger::Tag {
key,
v_type: jaeger::TagType::String,
v_str: None,
v_double: None,
v_bool: None,
v_long: None,
v_binary: None,
};
match value {
MetaValue::String(v) => {
tag.v_type = jaeger::TagType::String;
tag.v_str = Some(v.to_string())
}
MetaValue::Float(v) => {
tag.v_type = jaeger::TagType::Double;
tag.v_double = Some(v.into())
}
MetaValue::Int(v) => {
tag.v_type = jaeger::TagType::Long;
tag.v_long = Some(v)
}
MetaValue::Bool(v) => {
tag.v_type = jaeger::TagType::Bool;
tag.v_bool = Some(v)
}
};
tag
}

View File

@ -7,13 +7,34 @@
clippy::future_not_send
)]
use crate::export::AsyncExporter;
use crate::jaeger::JaegerAgentExporter;
use snafu::Snafu;
use std::num::NonZeroU16;
use std::sync::Arc;
use structopt::StructOpt;
use trace::TraceCollector;
pub mod otel;
pub mod export;
mod jaeger;
/// Auto-generated thrift code
#[allow(
dead_code,
deprecated,
clippy::redundant_field_names,
clippy::unused_unit,
clippy::use_self,
clippy::too_many_arguments,
clippy::type_complexity
)]
mod thrift {
pub mod agent;
pub mod zipkincore;
pub mod jaeger;
}
/// CLI config for distributed tracing options
#[derive(Debug, StructOpt, Clone)]
@ -61,10 +82,30 @@ pub struct TracingConfig {
default_value = "iox-conductor"
)]
pub traces_exporter_jaeger_service_name: String,
/// Tracing: specifies the header name used for passing trace context
///
/// Only used if `--traces-exporter` is "jaeger".
#[structopt(
long = "--traces-exporter-jaeger-trace-context-header-name",
env = "JAEGER_TRACE_CONTEXT_HEADER_NAME",
default_value = "uber-trace-id"
)]
pub traces_jaeger_trace_context_header_name: String,
/// Tracing: specifies the header name used for force sampling
///
/// Only used if `--traces-exporter` is "jaeger".
#[structopt(
long = "--traces-jaeger-debug-name",
env = "JAEGER_DEBUG_NAME",
default_value = "jaeger-debug-id"
)]
pub traces_jaeger_debug_name: String,
}
impl TracingConfig {
pub fn build(&self) -> Result<Option<Arc<dyn TraceCollector>>> {
pub fn build(&self) -> Result<Option<Arc<AsyncExporter>>> {
match self.traces_exporter {
TracesExporter::None => Ok(None),
TracesExporter::Jaeger => Ok(Some(jaeger_exporter(self)?)),
@ -95,23 +136,16 @@ impl std::str::FromStr for TracesExporter {
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("failed to construct trace exporter: {}", source))]
TraceExporter {
source: opentelemetry::trace::TraceError,
},
#[snafu(display("Failed to resolve address: {}", address))]
ResolutionError { address: String },
#[snafu(display(
"'jaeger' not supported with this build. Hint: recompile with appropriate features"
))]
JaegerNotBuilt {},
#[snafu(context(false))]
IOError { source: std::io::Error },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[cfg(feature = "jaeger")]
fn jaeger_exporter(config: &TracingConfig) -> Result<Arc<dyn TraceCollector>> {
use observability_deps::tracing::info;
fn jaeger_exporter(config: &TracingConfig) -> Result<Arc<AsyncExporter>> {
let agent_endpoint = format!(
"{}:{}",
config.traces_exporter_jaeger_agent_host.trim(),
@ -119,18 +153,7 @@ fn jaeger_exporter(config: &TracingConfig) -> Result<Arc<dyn TraceCollector>> {
);
let service_name = &config.traces_exporter_jaeger_service_name;
info!(%agent_endpoint, %service_name, "Creating jaeger tracing exporter");
let jaeger = JaegerAgentExporter::new(service_name.clone(), agent_endpoint)?;
let exporter = opentelemetry_jaeger::new_pipeline()
.with_agent_endpoint(agent_endpoint)
.with_service_name(&config.traces_exporter_jaeger_service_name)
.init_sync_exporter()
.map_err(|source| Error::TraceExporter { source })?;
Ok(Arc::new(otel::OtelExporter::new(exporter)))
}
#[cfg(not(feature = "jaeger"))]
fn jaeger_exporter(_config: &TracingConfig) -> Result<Arc<dyn TraceCollector>> {
Err(Error::JaegerNotBuilt {})
Ok(Arc::new(AsyncExporter::new(jaeger)))
}

View File

@ -1,356 +0,0 @@
use std::borrow::Cow;
use std::future::Future;
use std::sync::Arc;
use async_trait::async_trait;
use futures::{
future::{BoxFuture, Shared},
FutureExt, TryFutureExt,
};
use tokio::sync::mpsc;
use tokio::task::JoinError;
use tokio_util::sync::CancellationToken;
use observability_deps::tracing::{error, info, warn};
use opentelemetry::sdk::export::trace::{ExportResult, SpanData, SpanExporter};
use trace::ctx::{SpanContext, SpanId, TraceId};
use trace::span::{MetaValue, SpanEvent, SpanStatus};
use trace::{span::Span, TraceCollector};
/// Size of the exporter buffer
const CHANNEL_SIZE: usize = 1000;
/// Maximum number of events that can be associated with a span
const MAX_EVENTS: u32 = 100;
/// Maximum number of attributes that can be associated with a span
const MAX_ATTRIBUTES: u32 = 100;
/// `OtelExporter` wraps a opentelemetry SpanExporter and sinks spans to it
///
/// In order to do this it spawns a background worker that pulls messages
/// of a queue and writes them to opentelemetry. If this worker cannot keep
/// up, and this queue fills up, spans will be dropped and warnings logged
#[derive(Debug)]
pub struct OtelExporter {
join: Shared<BoxFuture<'static, Result<(), Arc<JoinError>>>>,
sender: tokio::sync::mpsc::Sender<SpanData>,
shutdown: CancellationToken,
}
impl OtelExporter {
/// Creates a new `OtelExporter`
pub fn new<T: SpanExporter + 'static>(exporter: T) -> Self {
let shutdown = CancellationToken::new();
let (sender, receiver) = mpsc::channel(CHANNEL_SIZE);
let handle = tokio::spawn(background_worker(shutdown.clone(), exporter, receiver));
let join = handle.map_err(Arc::new).boxed().shared();
Self {
join,
shutdown,
sender,
}
}
/// Triggers shutdown of this `OtelExporter`
pub fn shutdown(&self) {
info!("otel exporter shutting down");
self.shutdown.cancel()
}
/// Waits for the background worker of OtelExporter to finish
pub fn join(&self) -> impl Future<Output = Result<(), Arc<JoinError>>> {
self.join.clone()
}
}
impl TraceCollector for OtelExporter {
fn export(&self, span: Span) {
use mpsc::error::TrySendError;
match self.sender.try_send(convert_span(span)) {
Ok(_) => {
//TODO: Increment some metric
}
Err(TrySendError::Full(_)) => {
warn!("exporter cannot keep up, dropping spans")
}
Err(TrySendError::Closed(_)) => {
warn!("background worker shutdown")
}
}
}
}
async fn background_worker<T: SpanExporter + 'static>(
shutdown: CancellationToken,
exporter: T,
receiver: mpsc::Receiver<SpanData>,
) {
tokio::select! {
_ = exporter_loop(exporter, receiver) => {
// Don't expect this future to complete
error!("otel exporter loop completed")
}
_ = shutdown.cancelled() => {}
}
info!("otel exporter shut down")
}
/// An opentelemetry::SpanExporter that sinks writes to a tokio mpsc channel.
///
/// Intended for testing ONLY
///
/// Note: There is a similar construct in opentelemetry behind the testing feature
/// flag, but enabling this brings in a large number of additional dependencies and
/// so we just implement our own version
#[derive(Debug)]
pub struct TestOtelExporter {
channel: mpsc::Sender<SpanData>,
}
impl TestOtelExporter {
pub fn new(channel: mpsc::Sender<SpanData>) -> Self {
Self { channel }
}
}
#[async_trait]
impl SpanExporter for TestOtelExporter {
async fn export(&mut self, batch: Vec<SpanData>) -> ExportResult {
for span in batch {
self.channel.send(span).await.expect("channel closed")
}
Ok(())
}
}
async fn exporter_loop<T: SpanExporter + 'static>(
mut exporter: T,
mut receiver: tokio::sync::mpsc::Receiver<SpanData>,
) {
while let Some(span) = receiver.recv().await {
// TODO: Batch export spans
if let Err(e) = exporter.export(vec![span]).await {
error!(%e, "error exporting span")
}
}
warn!("sender-side of jaeger exporter dropped without waiting for shut down")
}
fn convert_span(span: Span) -> SpanData {
use opentelemetry::sdk::trace::{EvictedHashMap, EvictedQueue};
use opentelemetry::sdk::InstrumentationLibrary;
use opentelemetry::trace::{SpanId, SpanKind};
use opentelemetry::{Key, KeyValue};
let parent_span_id = match span.ctx.parent_span_id {
Some(id) => convert_span_id(id),
None => SpanId::invalid(),
};
let mut ret = SpanData {
span_context: convert_ctx(&span.ctx),
parent_span_id,
span_kind: SpanKind::Server,
name: span.name,
start_time: span.start.map(Into::into).unwrap_or(std::time::UNIX_EPOCH),
end_time: span.end.map(Into::into).unwrap_or(std::time::UNIX_EPOCH),
attributes: EvictedHashMap::new(MAX_ATTRIBUTES, 0),
events: EvictedQueue::new(MAX_EVENTS),
links: EvictedQueue::new(0),
status_code: convert_status(span.status),
status_message: Default::default(),
resource: None,
instrumentation_lib: InstrumentationLibrary::new("iox-trace", None),
};
ret.events
.extend(span.events.into_iter().map(convert_event));
for (key, value) in span.metadata {
let key = match key {
Cow::Owned(key) => Key::new(key),
Cow::Borrowed(key) => Key::new(key),
};
let value = convert_meta_value(value);
ret.attributes.insert(KeyValue::new(key, value))
}
ret
}
fn convert_ctx(ctx: &SpanContext) -> opentelemetry::trace::SpanContext {
opentelemetry::trace::SpanContext::new(
convert_trace_id(ctx.trace_id),
convert_span_id(ctx.span_id),
Default::default(),
false,
Default::default(),
)
}
fn convert_event(event: SpanEvent) -> opentelemetry::trace::Event {
opentelemetry::trace::Event {
name: event.msg,
timestamp: event.time.into(),
attributes: vec![],
dropped_attributes_count: 0,
}
}
fn convert_status(status: SpanStatus) -> opentelemetry::trace::StatusCode {
use opentelemetry::trace::StatusCode;
match status {
SpanStatus::Unknown => StatusCode::Unset,
SpanStatus::Ok => StatusCode::Ok,
SpanStatus::Err => StatusCode::Error,
}
}
fn convert_span_id(id: SpanId) -> opentelemetry::trace::SpanId {
opentelemetry::trace::SpanId::from_u64(id.0.get())
}
fn convert_trace_id(id: TraceId) -> opentelemetry::trace::TraceId {
opentelemetry::trace::TraceId::from_u128(id.0.get())
}
fn convert_meta_value(v: MetaValue) -> opentelemetry::Value {
match v {
MetaValue::String(v) => opentelemetry::Value::String(v),
MetaValue::Float(v) => opentelemetry::Value::F64(v),
MetaValue::Int(v) => opentelemetry::Value::I64(v),
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{TimeZone, Utc};
use opentelemetry::{Key, Value};
use std::time::{Duration, UNIX_EPOCH};
#[test]
fn test_conversion() {
let root = SpanContext {
trace_id: TraceId::new(232345).unwrap(),
parent_span_id: Some(SpanId::new(2484).unwrap()),
span_id: SpanId::new(2343).unwrap(),
collector: None,
};
let mut span = root.child("foo");
span.metadata.insert("string".into(), "bar".into());
span.metadata.insert("float".into(), 3.32.into());
span.metadata.insert("int".into(), 5.into());
span.events.push(SpanEvent {
time: Utc.timestamp_nanos(1230),
msg: "event".into(),
});
span.status = SpanStatus::Ok;
span.start = Some(Utc.timestamp_nanos(1000));
span.end = Some(Utc.timestamp_nanos(2000));
let span_data: SpanData = convert_span(span.clone());
assert_eq!(
span_data.span_context.span_id().to_u64(),
span.ctx.span_id.get()
);
assert_eq!(
span_data.span_context.trace_id().to_u128(),
span.ctx.trace_id.get()
);
assert_eq!(
span_data.parent_span_id.to_u64(),
span.ctx.parent_span_id.unwrap().get()
);
assert_eq!(
span_data.start_time,
UNIX_EPOCH + Duration::from_nanos(1000)
);
assert_eq!(span_data.end_time, UNIX_EPOCH + Duration::from_nanos(2000));
let events: Vec<_> = span_data.events.iter().collect();
assert_eq!(events.len(), 1);
assert_eq!(events[0].name.as_ref(), "event");
assert_eq!(events[0].timestamp, UNIX_EPOCH + Duration::from_nanos(1230));
assert_eq!(events[0].attributes.len(), 0);
assert_eq!(
span_data
.attributes
.get(&Key::from_static_str("string"))
.unwrap()
.clone(),
Value::String("bar".into())
);
assert_eq!(
span_data
.attributes
.get(&Key::from_static_str("float"))
.unwrap()
.clone(),
Value::F64(3.32)
);
assert_eq!(
span_data
.attributes
.get(&Key::from_static_str("int"))
.unwrap()
.clone(),
Value::I64(5)
);
}
#[tokio::test]
async fn test_exporter() {
let (sender, mut receiver) = mpsc::channel(10);
let exporter = OtelExporter::new(TestOtelExporter::new(sender));
assert!(exporter.join().now_or_never().is_none());
let root = SpanContext {
trace_id: TraceId::new(232345).unwrap(),
parent_span_id: None,
span_id: SpanId::new(2343).unwrap(),
collector: None,
};
let s1 = root.child("foo");
let s2 = root.child("bar");
exporter.export(s1.clone());
exporter.export(s2.clone());
exporter.export(s2.clone());
let r1 = receiver.recv().await.unwrap();
let r2 = receiver.recv().await.unwrap();
let r3 = receiver.recv().await.unwrap();
exporter.shutdown();
exporter.join().await.unwrap();
// Should not be fatal despite exporter having been shutdown
exporter.export(s2.clone());
assert_eq!(root.span_id.get(), r1.parent_span_id.to_u64());
assert_eq!(s1.ctx.span_id.get(), r1.span_context.span_id().to_u64());
assert_eq!(s1.ctx.trace_id.get(), r1.span_context.trace_id().to_u128());
assert_eq!(root.span_id.get(), r2.parent_span_id.to_u64());
assert_eq!(s2.ctx.span_id.get(), r2.span_context.span_id().to_u64());
assert_eq!(s2.ctx.trace_id.get(), r2.span_context.trace_id().to_u128());
assert_eq!(root.span_id.get(), r3.parent_span_id.to_u64());
assert_eq!(s2.ctx.span_id.get(), r3.span_context.span_id().to_u64());
assert_eq!(s2.ctx.trace_id.get(), r3.span_context.trace_id().to_u128());
}
}

View File

@ -0,0 +1,305 @@
// Autogenerated by Thrift Compiler (0.13.0)
// DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
#![allow(unused_imports)]
#![allow(unused_extern_crates)]
#![cfg_attr(rustfmt, rustfmt_skip)]
extern crate thrift;
use thrift::OrderedFloat;
use std::cell::RefCell;
use std::collections::{BTreeMap, BTreeSet};
use std::convert::{From, TryFrom};
use std::default::Default;
use std::error::Error;
use std::fmt;
use std::fmt::{Display, Formatter};
use std::rc::Rc;
use thrift::{ApplicationError, ApplicationErrorKind, ProtocolError, ProtocolErrorKind, TThriftClient};
use thrift::protocol::{TFieldIdentifier, TListIdentifier, TMapIdentifier, TMessageIdentifier, TMessageType, TInputProtocol, TOutputProtocol, TSetIdentifier, TStructIdentifier, TType};
use thrift::protocol::field_id;
use thrift::protocol::verify_expected_message_type;
use thrift::protocol::verify_expected_sequence_number;
use thrift::protocol::verify_expected_service_call;
use thrift::protocol::verify_required_field_exists;
use thrift::server::TProcessor;
use super::jaeger;
use super::zipkincore;
//
// Agent service client
//
pub trait TAgentSyncClient {
fn emit_zipkin_batch(&mut self, spans: Vec<zipkincore::Span>) -> thrift::Result<()>;
fn emit_batch(&mut self, batch: jaeger::Batch) -> thrift::Result<()>;
}
pub trait TAgentSyncClientMarker {}
pub struct AgentSyncClient<IP, OP> where IP: TInputProtocol, OP: TOutputProtocol {
_i_prot: IP,
_o_prot: OP,
_sequence_number: i32,
}
impl <IP, OP> AgentSyncClient<IP, OP> where IP: TInputProtocol, OP: TOutputProtocol {
pub fn new(input_protocol: IP, output_protocol: OP) -> AgentSyncClient<IP, OP> {
AgentSyncClient { _i_prot: input_protocol, _o_prot: output_protocol, _sequence_number: 0 }
}
}
impl <IP, OP> TThriftClient for AgentSyncClient<IP, OP> where IP: TInputProtocol, OP: TOutputProtocol {
fn i_prot_mut(&mut self) -> &mut dyn TInputProtocol { &mut self._i_prot }
fn o_prot_mut(&mut self) -> &mut dyn TOutputProtocol { &mut self._o_prot }
fn sequence_number(&self) -> i32 { self._sequence_number }
fn increment_sequence_number(&mut self) -> i32 { self._sequence_number += 1; self._sequence_number }
}
impl <IP, OP> TAgentSyncClientMarker for AgentSyncClient<IP, OP> where IP: TInputProtocol, OP: TOutputProtocol {}
impl <C: TThriftClient + TAgentSyncClientMarker> TAgentSyncClient for C {
fn emit_zipkin_batch(&mut self, spans: Vec<zipkincore::Span>) -> thrift::Result<()> {
(
{
self.increment_sequence_number();
let message_ident = TMessageIdentifier::new("emitZipkinBatch", TMessageType::OneWay, self.sequence_number());
let call_args = AgentEmitZipkinBatchArgs { spans: spans };
self.o_prot_mut().write_message_begin(&message_ident)?;
call_args.write_to_out_protocol(self.o_prot_mut())?;
self.o_prot_mut().write_message_end()?;
self.o_prot_mut().flush()
}
)?;
Ok(())
}
fn emit_batch(&mut self, batch: jaeger::Batch) -> thrift::Result<()> {
(
{
self.increment_sequence_number();
let message_ident = TMessageIdentifier::new("emitBatch", TMessageType::OneWay, self.sequence_number());
let call_args = AgentEmitBatchArgs { batch: batch };
self.o_prot_mut().write_message_begin(&message_ident)?;
call_args.write_to_out_protocol(self.o_prot_mut())?;
self.o_prot_mut().write_message_end()?;
self.o_prot_mut().flush()
}
)?;
Ok(())
}
}
//
// Agent service processor
//
pub trait AgentSyncHandler {
fn handle_emit_zipkin_batch(&self, spans: Vec<zipkincore::Span>) -> thrift::Result<()>;
fn handle_emit_batch(&self, batch: jaeger::Batch) -> thrift::Result<()>;
}
pub struct AgentSyncProcessor<H: AgentSyncHandler> {
handler: H,
}
impl <H: AgentSyncHandler> AgentSyncProcessor<H> {
pub fn new(handler: H) -> AgentSyncProcessor<H> {
AgentSyncProcessor {
handler,
}
}
fn process_emit_zipkin_batch(&self, incoming_sequence_number: i32, i_prot: &mut dyn TInputProtocol, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
TAgentProcessFunctions::process_emit_zipkin_batch(&self.handler, incoming_sequence_number, i_prot, o_prot)
}
fn process_emit_batch(&self, incoming_sequence_number: i32, i_prot: &mut dyn TInputProtocol, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
TAgentProcessFunctions::process_emit_batch(&self.handler, incoming_sequence_number, i_prot, o_prot)
}
}
pub struct TAgentProcessFunctions;
impl TAgentProcessFunctions {
pub fn process_emit_zipkin_batch<H: AgentSyncHandler>(handler: &H, _: i32, i_prot: &mut dyn TInputProtocol, _: &mut dyn TOutputProtocol) -> thrift::Result<()> {
let args = AgentEmitZipkinBatchArgs::read_from_in_protocol(i_prot)?;
match handler.handle_emit_zipkin_batch(args.spans) {
Ok(_) => {
Ok(())
},
Err(e) => {
match e {
thrift::Error::Application(app_err) => {
Err(thrift::Error::Application(app_err))
},
_ => {
let ret_err = {
ApplicationError::new(
ApplicationErrorKind::Unknown,
e.description()
)
};
Err(thrift::Error::Application(ret_err))
},
}
},
}
}
pub fn process_emit_batch<H: AgentSyncHandler>(handler: &H, _: i32, i_prot: &mut dyn TInputProtocol, _: &mut dyn TOutputProtocol) -> thrift::Result<()> {
let args = AgentEmitBatchArgs::read_from_in_protocol(i_prot)?;
match handler.handle_emit_batch(args.batch) {
Ok(_) => {
Ok(())
},
Err(e) => {
match e {
thrift::Error::Application(app_err) => {
Err(thrift::Error::Application(app_err))
},
_ => {
let ret_err = {
ApplicationError::new(
ApplicationErrorKind::Unknown,
e.description()
)
};
Err(thrift::Error::Application(ret_err))
},
}
},
}
}
}
impl <H: AgentSyncHandler> TProcessor for AgentSyncProcessor<H> {
fn process(&self, i_prot: &mut dyn TInputProtocol, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
let message_ident = i_prot.read_message_begin()?;
let res = match &*message_ident.name {
"emitZipkinBatch" => {
self.process_emit_zipkin_batch(message_ident.sequence_number, i_prot, o_prot)
},
"emitBatch" => {
self.process_emit_batch(message_ident.sequence_number, i_prot, o_prot)
},
method => {
Err(
thrift::Error::Application(
ApplicationError::new(
ApplicationErrorKind::UnknownMethod,
format!("unknown method {}", method)
)
)
)
},
};
thrift::server::handle_process_result(&message_ident, res, o_prot)
}
}
//
// AgentEmitZipkinBatchArgs
//
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
struct AgentEmitZipkinBatchArgs {
spans: Vec<zipkincore::Span>,
}
impl AgentEmitZipkinBatchArgs {
fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<AgentEmitZipkinBatchArgs> {
i_prot.read_struct_begin()?;
let mut f_1: Option<Vec<zipkincore::Span>> = None;
loop {
let field_ident = i_prot.read_field_begin()?;
if field_ident.field_type == TType::Stop {
break;
}
let field_id = field_id(&field_ident)?;
match field_id {
1 => {
let list_ident = i_prot.read_list_begin()?;
let mut val: Vec<zipkincore::Span> = Vec::with_capacity(list_ident.size as usize);
for _ in 0..list_ident.size {
let list_elem_0 = zipkincore::Span::read_from_in_protocol(i_prot)?;
val.push(list_elem_0);
}
i_prot.read_list_end()?;
f_1 = Some(val);
},
_ => {
i_prot.skip(field_ident.field_type)?;
},
};
i_prot.read_field_end()?;
}
i_prot.read_struct_end()?;
verify_required_field_exists("AgentEmitZipkinBatchArgs.spans", &f_1)?;
let ret = AgentEmitZipkinBatchArgs {
spans: f_1.expect("auto-generated code should have checked for presence of required fields"),
};
Ok(ret)
}
fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
let struct_ident = TStructIdentifier::new("emitZipkinBatch_args");
o_prot.write_struct_begin(&struct_ident)?;
o_prot.write_field_begin(&TFieldIdentifier::new("spans", TType::List, 1))?;
o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.spans.len() as i32))?;
for e in &self.spans {
e.write_to_out_protocol(o_prot)?;
o_prot.write_list_end()?;
}
o_prot.write_field_end()?;
o_prot.write_field_stop()?;
o_prot.write_struct_end()
}
}
//
// AgentEmitBatchArgs
//
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
struct AgentEmitBatchArgs {
batch: jaeger::Batch,
}
impl AgentEmitBatchArgs {
fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<AgentEmitBatchArgs> {
i_prot.read_struct_begin()?;
let mut f_1: Option<jaeger::Batch> = None;
loop {
let field_ident = i_prot.read_field_begin()?;
if field_ident.field_type == TType::Stop {
break;
}
let field_id = field_id(&field_ident)?;
match field_id {
1 => {
let val = jaeger::Batch::read_from_in_protocol(i_prot)?;
f_1 = Some(val);
},
_ => {
i_prot.skip(field_ident.field_type)?;
},
};
i_prot.read_field_end()?;
}
i_prot.read_struct_end()?;
verify_required_field_exists("AgentEmitBatchArgs.batch", &f_1)?;
let ret = AgentEmitBatchArgs {
batch: f_1.expect("auto-generated code should have checked for presence of required fields"),
};
Ok(ret)
}
fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
let struct_ident = TStructIdentifier::new("emitBatch_args");
o_prot.write_struct_begin(&struct_ident)?;
o_prot.write_field_begin(&TFieldIdentifier::new("batch", TType::Struct, 1))?;
self.batch.write_to_out_protocol(o_prot)?;
o_prot.write_field_end()?;
o_prot.write_field_stop()?;
o_prot.write_struct_end()
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -15,25 +15,20 @@ const B3_TRACE_ID_HEADER: &str = "X-B3-TraceId";
const B3_PARENT_SPAN_ID_HEADER: &str = "X-B3-ParentSpanId";
const B3_SPAN_ID_HEADER: &str = "X-B3-SpanId";
const JAEGER_TRACE_HEADER: &str = "uber-trace-id";
/// Error decoding SpanContext from transport representation
#[derive(Debug, Snafu)]
pub enum ContextError {
#[snafu(display("header '{}' not found", header))]
Missing { header: &'static str },
Missing { header: String },
#[snafu(display("header '{}' has non-UTF8 content: {}", header, source))]
InvalidUtf8 {
header: &'static str,
header: String,
source: http::header::ToStrError,
},
#[snafu(display("error decoding header '{}': {}", header, source))]
HeaderDecodeError {
header: &'static str,
source: DecodeError,
},
HeaderDecodeError { header: String, source: DecodeError },
}
/// Error decoding a specific header value
@ -68,35 +63,73 @@ fn parse_span(s: &str) -> Result<SpanId, DecodeError> {
))
}
/// Create a SpanContext for the trace described in the request's headers
pub fn parse_span_ctx(
collector: &Arc<dyn TraceCollector>,
headers: &HeaderMap,
) -> Result<Option<SpanContext>, ContextError> {
match ContextCodec::detect(headers) {
None => Ok(None),
Some(ContextCodec::B3) => decode_b3(collector, headers),
Some(ContextCodec::Jaeger) => decode_jaeger(collector, headers),
/// Extracts tracing information such as the `SpanContext`s , if any,
/// from http request headers.
#[derive(Debug, Clone, Default)]
pub struct TraceHeaderParser {
/// header that contains pre-existing trace context, if any
jaeger_trace_context_header_name: Option<Arc<str>>,
/// header that forces sampling
jaeger_debug_name: Option<Arc<str>>,
}
impl TraceHeaderParser {
/// Create a new span context parser with default Jaeger trace
/// header name
pub fn new() -> Self {
Default::default()
}
}
/// The codec used to encode trace context
enum ContextCodec {
/// <https://github.com/openzipkin/b3-propagation#multiple-headers>
B3,
/// <https://www.jaegertracing.io/docs/1.21/client-libraries/#propagation-format>
Jaeger,
}
/// specify a header for jaeger_trace_context_header_name
///
/// For example, 'uber-trace-id'
pub fn with_jaeger_trace_context_header_name(mut self, name: impl AsRef<str>) -> Self {
self.jaeger_trace_context_header_name = Some(name.as_ref().into());
self
}
impl ContextCodec {
fn detect(headers: &HeaderMap) -> Option<Self> {
if headers.contains_key(JAEGER_TRACE_HEADER) {
Some(Self::Jaeger)
} else if headers.contains_key(B3_TRACE_ID_HEADER) {
Some(Self::B3)
} else {
None
/// specify a custom jaeger_debug_header_name
///
/// For example, 'jaeger-debug-id'
pub fn with_jaeger_debug_name(mut self, name: impl AsRef<str>) -> Self {
self.jaeger_debug_name = Some(name.as_ref().into());
self
}
/// Create a SpanContext for the trace described in the request's
/// headers, if any
///
/// Currently support the following formats:
/// * <https://github.com/openzipkin/b3-propagation#multiple-headers>
/// * <https://www.jaegertracing.io/docs/1.21/client-libraries/#propagation-format>
pub fn parse(
&self,
collector: &Arc<dyn TraceCollector>,
headers: &HeaderMap,
) -> Result<Option<SpanContext>, ContextError> {
if let Some(trace_header) = self.jaeger_trace_context_header_name.as_ref() {
if headers.contains_key(trace_header.as_ref()) {
return decode_jaeger(collector, headers, trace_header.as_ref());
}
}
if headers.contains_key(B3_TRACE_ID_HEADER) {
return decode_b3(collector, headers);
}
if let Some(debug_header_name) = self.jaeger_debug_name.as_ref() {
if let Some(debug_header_value) = headers.get(debug_header_name.as_ref()) {
// create a new trace / span
let new_trace_context = SpanContext::new(Arc::clone(collector));
// It would be nice to record the debug-name in the span somehow for easy finding in Jaeger
// for now, also log it.
let trace_id = format!("{:x}", new_trace_context.trace_id.get());
info!(%trace_id, ?debug_header_value, "Created new trace rooted at IOx");
return Ok(Some(new_trace_context));
}
}
Ok(None)
}
}
@ -171,8 +204,9 @@ impl FromStr for JaegerCtx {
fn decode_jaeger(
collector: &Arc<dyn TraceCollector>,
headers: &HeaderMap,
jaeger_header: &str,
) -> Result<Option<SpanContext>, ContextError> {
let decoded: JaegerCtx = required_header(headers, JAEGER_TRACE_HEADER, FromStr::from_str)?;
let decoded: JaegerCtx = required_header(headers, jaeger_header, FromStr::from_str)?;
if decoded.flags & 0x01 == 0 {
return Ok(None);
}
@ -192,14 +226,15 @@ fn decode_jaeger(
/// - Returns Ok(Some(_)) otherwise
fn decoded_header<'a>(
headers: &'a HeaderMap,
header: &'static str,
header: &str,
) -> Result<Option<&'a str>, ContextError> {
headers
.get(header)
.map(|value| {
value
.to_str()
.map_err(|source| ContextError::InvalidUtf8 { header, source })
value.to_str().map_err(|source| ContextError::InvalidUtf8 {
header: header.to_string(),
source,
})
})
.transpose()
}
@ -211,13 +246,16 @@ fn decoded_header<'a>(
/// - Returns Ok(Some(_)) otherwise
fn parsed_header<T, F: FnOnce(&str) -> Result<T, DecodeError>>(
headers: &HeaderMap,
header: &'static str,
header: &str,
parse: F,
) -> Result<Option<T>, ContextError> {
decoded_header(headers, header)?
.map(parse)
.transpose()
.map_err(|source| ContextError::HeaderDecodeError { source, header })
.map_err(|source| ContextError::HeaderDecodeError {
source,
header: header.to_string(),
})
}
/// Decodes and parses a given required header from the provided HeaderMap
@ -226,10 +264,12 @@ fn parsed_header<T, F: FnOnce(&str) -> Result<T, DecodeError>>(
/// - Returns Ok(str) otherwise
fn required_header<T, F: FnOnce(&str) -> Result<T, DecodeError>>(
headers: &HeaderMap,
header: &'static str,
header: &str,
parse: F,
) -> Result<T, ContextError> {
parsed_header(headers, header, parse)?.ok_or(ContextError::Missing { header })
parsed_header(headers, header, parse)?.ok_or(ContextError::Missing {
header: header.to_string(),
})
}
#[cfg(test)]
@ -240,31 +280,31 @@ mod tests {
#[test]
fn test_decode_b3() {
let parser = TraceHeaderParser::new();
let collector: Arc<dyn TraceCollector> = Arc::new(trace::LogTraceCollector::new());
let mut headers = HeaderMap::new();
// No headers should be None
assert!(parse_span_ctx(&collector, &headers).unwrap().is_none());
assert!(parser.parse(&collector, &headers).unwrap().is_none());
headers.insert(B3_TRACE_ID_HEADER, HeaderValue::from_static("ee25f"));
headers.insert(B3_SAMPLED_HEADER, HeaderValue::from_static("0"));
// Not sampled
assert!(parse_span_ctx(&collector, &headers).unwrap().is_none());
assert!(parser.parse(&collector, &headers).unwrap().is_none());
headers.insert(B3_SAMPLED_HEADER, HeaderValue::from_static("1"));
// Missing required headers
assert_eq!(
parse_span_ctx(&collector, &headers)
.unwrap_err()
.to_string(),
parser.parse(&collector, &headers).unwrap_err().to_string(),
"header 'X-B3-SpanId' not found"
);
headers.insert(B3_SPAN_ID_HEADER, HeaderValue::from_static("34e"));
let span = parse_span_ctx(&collector, &headers).unwrap().unwrap();
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert_eq!(span.span_id.0.get(), 0x34e);
assert_eq!(span.trace_id.0.get(), 0xee25f);
@ -275,7 +315,7 @@ mod tests {
HeaderValue::from_static("4595945"),
);
let span = parse_span_ctx(&collector, &headers).unwrap().unwrap();
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert_eq!(span.span_id.0.get(), 0x34e);
assert_eq!(span.trace_id.0.get(), 0xee25f);
@ -284,7 +324,7 @@ mod tests {
headers.insert(B3_SPAN_ID_HEADER, HeaderValue::from_static("not a number"));
assert_eq!(
parse_span_ctx(&collector, &headers)
parser.parse(&collector, &headers)
.unwrap_err()
.to_string(),
"error decoding header 'X-B3-SpanId': value decode error: invalid digit found in string"
@ -293,77 +333,126 @@ mod tests {
headers.insert(B3_SPAN_ID_HEADER, HeaderValue::from_static("0"));
assert_eq!(
parse_span_ctx(&collector, &headers)
.unwrap_err()
.to_string(),
parser.parse(&collector, &headers).unwrap_err().to_string(),
"error decoding header 'X-B3-SpanId': value cannot be 0"
);
}
#[test]
fn test_decode_jaeger() {
const TRACE_HEADER: &str = "uber-trace-id";
let parser = TraceHeaderParser::new().with_jaeger_trace_context_header_name(TRACE_HEADER);
let collector: Arc<dyn TraceCollector> = Arc::new(trace::LogTraceCollector::new());
let mut headers = HeaderMap::new();
// Invalid format
headers.insert(JAEGER_TRACE_HEADER, HeaderValue::from_static("invalid"));
headers.insert(TRACE_HEADER, HeaderValue::from_static("invalid"));
assert_eq!(
parse_span_ctx(&collector, &headers)
parser.parse(&collector, &headers)
.unwrap_err()
.to_string(),
"error decoding header 'uber-trace-id': Expected \"trace-id:span-id:parent-span-id:flags\""
);
// Not sampled
headers.insert(
JAEGER_TRACE_HEADER,
HeaderValue::from_static("343:4325345:0:0"),
);
assert!(parse_span_ctx(&collector, &headers).unwrap().is_none());
headers.insert(TRACE_HEADER, HeaderValue::from_static("343:4325345:0:0"));
assert!(parser.parse(&collector, &headers).unwrap().is_none());
// Sampled
headers.insert(
JAEGER_TRACE_HEADER,
HeaderValue::from_static("3a43:432e345:0:1"),
);
let span = parse_span_ctx(&collector, &headers).unwrap().unwrap();
headers.insert(TRACE_HEADER, HeaderValue::from_static("3a43:432e345:0:1"));
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert_eq!(span.trace_id.0.get(), 0x3a43);
assert_eq!(span.span_id.0.get(), 0x432e345);
assert!(span.parent_span_id.is_none());
// Parent span
headers.insert(
JAEGER_TRACE_HEADER,
HeaderValue::from_static("343:4325345:3434:F"),
);
let span = parse_span_ctx(&collector, &headers).unwrap().unwrap();
headers.insert(TRACE_HEADER, HeaderValue::from_static("343:4325345:3434:F"));
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert_eq!(span.trace_id.0.get(), 0x343);
assert_eq!(span.span_id.0.get(), 0x4325345);
assert_eq!(span.parent_span_id.unwrap().0.get(), 0x3434);
// Invalid trace id
headers.insert(
JAEGER_TRACE_HEADER,
HeaderValue::from_static("0:4325345:3434:1"),
);
headers.insert(TRACE_HEADER, HeaderValue::from_static("0:4325345:3434:1"));
assert_eq!(
parse_span_ctx(&collector, &headers)
.unwrap_err()
.to_string(),
parser.parse(&collector, &headers).unwrap_err().to_string(),
"error decoding header 'uber-trace-id': value cannot be 0"
);
headers.insert(
JAEGER_TRACE_HEADER,
TRACE_HEADER,
HeaderValue::from_static("008e813572f53b3a:008e813572f53b3a:0000000000000000:1"),
);
let span = parse_span_ctx(&collector, &headers).unwrap().unwrap();
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert_eq!(span.trace_id.0.get(), 0x008e813572f53b3a);
assert_eq!(span.span_id.0.get(), 0x008e813572f53b3a);
assert!(span.parent_span_id.is_none());
}
#[test]
fn test_decode_jaeger_custom_header() {
const DEFAULT_JAEGER_TRACE_HEADER: &str = "uber-trace-id";
let parser =
TraceHeaderParser::new().with_jaeger_trace_context_header_name("my-awesome-header");
let collector: Arc<dyn TraceCollector> = Arc::new(trace::LogTraceCollector::new());
let mut headers = HeaderMap::new();
let value = HeaderValue::from_static("1:2:3:1");
// Default header is ignored
headers.insert(DEFAULT_JAEGER_TRACE_HEADER, value.clone());
assert!(parser.parse(&collector, &headers).unwrap().is_none());
// custom header is parsed
let mut headers = HeaderMap::new();
headers.insert("my-awesome-header", value);
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert_eq!(span.trace_id.0.get(), 1);
assert_eq!(span.span_id.0.get(), 2);
assert_eq!(span.parent_span_id.unwrap().get(), 3);
}
#[test]
fn test_jaeger_debug_name() {
let parser = TraceHeaderParser::new().with_jaeger_debug_name("force-a-trace");
let collector: Arc<dyn TraceCollector> = Arc::new(trace::LogTraceCollector::new());
let mut headers = HeaderMap::new();
assert!(parser.parse(&collector, &headers).unwrap().is_none());
headers.insert("force-a-trace", HeaderValue::from_static("please do"));
// should have created an entirely new span
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert!(span.parent_span_id.is_none());
}
#[test]
fn test_jaeger_debug_name_and_trace_context() {
let parser = TraceHeaderParser::new()
.with_jaeger_trace_context_header_name("uber-trace-id")
.with_jaeger_debug_name("force-a-trace");
let collector: Arc<dyn TraceCollector> = Arc::new(trace::LogTraceCollector::new());
// when both headers are present, prefer existing trace context
let mut headers = HeaderMap::new();
headers.insert("uber-trace-id", HeaderValue::from_static("1:2:3:1"));
headers.insert("force-a-trace", HeaderValue::from_static("please do"));
let span = parser.parse(&collector, &headers).unwrap().unwrap();
assert_eq!(span.trace_id.0.get(), 1);
assert_eq!(span.span_id.0.get(), 2);
assert_eq!(span.parent_span_id.unwrap().get(), 3);
}
}

View File

@ -7,7 +7,7 @@
//! For those not familiar with tower:
//!
//! - A Layer produces a Service
//! - A Service can then be called with a request which returns a Future
//! - A Service can then be called with a request which returns a Future
//! - This Future returns a response which contains a Body
//! - This Body contains the data payload (potentially streamed)
//!
@ -18,7 +18,7 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use futures::ready;
use http::{Request, Response};
use http::{HeaderValue, Request, Response};
use http_body::SizeHint;
use pin_project::pin_project;
use tower::{Layer, Service};
@ -27,7 +27,7 @@ use observability_deps::tracing::error;
use trace::{span::SpanRecorder, TraceCollector};
use crate::classify::{classify_headers, classify_response, Classification};
use crate::ctx::parse_span_ctx;
use crate::ctx::TraceHeaderParser;
use crate::metrics::{MetricsCollection, MetricsRecorder};
/// `TraceLayer` implements `tower::Layer` and can be used to decorate a
@ -41,17 +41,20 @@ use crate::metrics::{MetricsCollection, MetricsRecorder};
/// [1]: https://www.weave.works/blog/the-red-method-key-metrics-for-microservices-architecture/
#[derive(Debug, Clone)]
pub struct TraceLayer {
trace_header_parser: TraceHeaderParser,
metrics: Arc<MetricsCollection>,
collector: Option<Arc<dyn TraceCollector>>,
}
impl TraceLayer {
pub fn new(
trace_header_parser: TraceHeaderParser,
metric_registry: Arc<metric::Registry>,
collector: Option<Arc<dyn TraceCollector>>,
is_grpc: bool,
) -> Self {
Self {
trace_header_parser,
metrics: Arc::new(MetricsCollection::new(metric_registry, is_grpc)),
collector,
}
@ -66,6 +69,7 @@ impl<S> Layer<S> for TraceLayer {
service,
collector: self.collector.clone(),
metrics: Arc::clone(&self.metrics),
trace_header_parser: self.trace_header_parser.clone(),
}
}
}
@ -74,6 +78,7 @@ impl<S> Layer<S> for TraceLayer {
#[derive(Debug, Clone)]
pub struct TraceService<S> {
service: S,
trace_header_parser: TraceHeaderParser,
collector: Option<Arc<dyn TraceCollector>>,
metrics: Arc<MetricsCollection>,
}
@ -105,7 +110,7 @@ where
}
};
let span = match parse_span_ctx(collector, request.headers()) {
let span = match self.trace_header_parser.parse(collector, request.headers()) {
Ok(Some(ctx)) => {
let span = ctx.child("IOx");
@ -176,11 +181,21 @@ where
}
match result {
Ok(response) => Poll::Ready(Ok(response.map(|body| TracedBody {
span_recorder: self.as_mut().project().span_recorder.take(),
inner: body,
metrics_recorder,
}))),
Ok(mut response) => {
// add trace-id header to the response, if we have one
let span_recorder = self.as_mut().project().span_recorder.take();
if let Some(trace_id) = span_recorder.span().map(|span| span.ctx.trace_id) {
// format as hex
let trace_id = HeaderValue::from_str(&format!("{:x}", trace_id.get())).unwrap();
response.headers_mut().insert("trace-id", trace_id);
}
Poll::Ready(Ok(response.map(|body| TracedBody {
span_recorder,
inner: body,
metrics_recorder,
})))
}
Err(e) => Poll::Ready(Err(e)),
}
}

View File

@ -15,3 +15,7 @@ parking_lot = "0.11.2"
rdkafka = "0.26.0"
tokio = { version = "1.11", features = ["macros", "fs"] }
uuid = { version = "0.8", features = ["serde", "v4"] }
[package.metadata.cargo-udeps.ignore]
# used within the `maybe_skip_kafka_integration` macro and cannot be detected by a normal analysis pass
normal = ["dotenv"]