diff --git a/.circleci/config.yml b/.circleci/config.yml index 7385f48597..d723c6e0f6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -354,15 +354,15 @@ jobs: - checkout - rust_components - cache_restore - - run: - name: Print rustc target CPU options - command: cargo run --release --no-default-features --features="aws,gcp,azure,jemalloc_replacing_malloc" --bin print_cpu - run: name: Cargo release build with target arch set for CRoaring command: cargo build --release --no-default-features --features="aws,gcp,azure,jemalloc_replacing_malloc" - run: | echo sha256sum after build is sha256sum target/release/influxdb_iox + - run: + name: Print rustc target CPU options + command: target/release/influxdb_iox debug print-cpu - setup_remote_docker: # There seems to be a cache invalidation bug in docker # or in the way that circleci implements layer caching. diff --git a/Cargo.lock b/Cargo.lock index 12a2d2173b..7b02ddd8ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -283,9 +283,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.61" +version = "0.3.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7a905d892734eea339e896738c14b9afce22b5318f64b951e70bf3844419b01" +checksum = "091bcdf2da9950f96aa522681ce805e6857f6ca8df73833d35736ab2dc78e152" dependencies = [ "addr2line", "cc", @@ -1429,9 +1429,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "399c583b2979440c60be0821a6199eca73bc3c8dcd9d070d75ac726e2c6186e5" +checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6" dependencies = [ "bytes", "http", @@ -1458,9 +1458,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.13" +version = "0.14.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d1cfb9e4f68655fa04c01f59edb405b6074a0f7118ea881e5026e4a1cd8593" +checksum = "2b91bb1f221b6ea1f1e4371216b70f40748774c2fb5971b450c07773fb92d26b" dependencies = [ "bytes", "futures-channel", @@ -1635,7 +1635,6 @@ dependencies = [ "prost", "query", "rand", - "rdkafka", "read_buffer", "reqwest", "rustyline", @@ -1931,9 +1930,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.104" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b2f96d100e1cf1929e7719b7edb3b90ab5298072638fccd77be9ce942ecdfce" +checksum = "869d572136620d55835903746bcb5cdc54cb2851fd0aeec53220b4bb65ef3013" [[package]] name = "libloading" @@ -2035,9 +2034,9 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "matchers" -version = "0.0.1" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ "regex-automata", ] @@ -2219,6 +2218,7 @@ dependencies = [ "bytes", "criterion", "flate2", + "hashbrown", "influxdb_line_protocol", "mutable_batch", "schema", @@ -2504,9 +2504,9 @@ dependencies = [ [[package]] name = "object" -version = "0.26.2" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39f37e50073ccad23b6d09bcb5b263f4e76d3bb6038e4a3c08e52162ffa8abc2" +checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" dependencies = [ "memchr", ] @@ -4518,9 +4518,9 @@ dependencies = [ [[package]] name = "tower" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d15a6b60cdff0cb039d81d3b37f8bc3d7e53dca09069aae3ef2502ca4834fe30" +checksum = "c00e500fff5fa1131c866b246041a6bf96da9c965f8fe4128cb1421f23e93c00" dependencies = [ "futures-core", "futures-util", @@ -4658,12 +4658,11 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.2.25" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71" +checksum = "5cf865b5ddc38e503a29c41c4843e616a73028ae18c637bc3eb2afaef4909c84" dependencies = [ "ansi_term 0.12.1", - "chrono", "lazy_static", "matchers", "regex", diff --git a/Cargo.toml b/Cargo.toml index 2351e44efc..4db454858d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,45 +1,17 @@ -[package] -name = "influxdb_iox" -version = "0.1.0" -authors = ["Paul Dix "] -edition = "2018" -default-run = "influxdb_iox" -readme = "README.md" - -exclude = [ - "*.md", - "*.txt", - ".circleci/", - ".editorconfig", - ".git*", - ".github/", - ".kodiak.toml", - "Dockerfile*", - "LICENSE*", - "buf.yaml", - "docker/", - "docs/", - "massif.out.*", - "perf/", - "scripts/", - "tools/", -] - - -[[bin]] -name = "print_cpu" -path = "src/print_cpu.rs" - -[workspace] # In alphabetical order +[workspace] +# In alphabetical order members = [ "arrow_util", - "data_types", "client_util", + "data_types", "datafusion", "datafusion_util", "entry", "generated_types", + "grpc-router", + "grpc-router-test-gen", "influxdb2_client", + "influxdb_iox", "influxdb_iox_client", "influxdb_line_protocol", "influxdb_storage_client", @@ -66,6 +38,7 @@ members = [ "query", "query_tests", "read_buffer", + "schema", "server", "server_benchmarks", "test_helpers", @@ -75,131 +48,32 @@ members = [ "trace_http", "tracker", "trogging", - "schema", - "grpc-router", - "grpc-router/grpc-router-test-gen", "write_buffer", ] +default-members = ["influxdb_iox"] + +exclude = [ + "*.md", + "*.txt", + ".circleci/", + ".editorconfig", + ".git*", + ".github/", + ".kodiak.toml", + "Dockerfile*", + "LICENSE*", + "buf.yaml", + "docker/", + "docs/", + "massif.out.*", + "perf/", + "scripts/", + "test_fixtures/", + "tools/", +] [profile.release] debug = true [profile.bench] debug = true - -[dependencies] -# Workspace dependencies, in alphabetical order -datafusion = { path = "datafusion" } -data_types = { path = "data_types" } -entry = { path = "entry" } -generated_types = { path = "generated_types" } - -influxdb_iox_client = { path = "influxdb_iox_client", features = ["format"] } -influxdb_line_protocol = { path = "influxdb_line_protocol" } -internal_types = { path = "internal_types" } -iox_object_store = { path = "iox_object_store" } -logfmt = { path = "logfmt" } -metric = { path = "metric" } -metric_exporters = { path = "metric_exporters" } -mutable_buffer = { path = "mutable_buffer" } -num_cpus = "1.13.0" -object_store = { path = "object_store" } -observability_deps = { path = "observability_deps" } -panic_logging = { path = "panic_logging" } -parquet_catalog = { path = "parquet_catalog" } -parquet_file = { path = "parquet_file" } -predicate = { path = "predicate" } -query = { path = "query" } -read_buffer = { path = "read_buffer" } -server = { path = "server" } -trace = { path = "trace" } -trace_exporters = { path = "trace_exporters" } -trace_http = { path = "trace_http" } -tracker = { path = "tracker" } -trogging = { path = "trogging", default-features = false, features = ["structopt"] } -time = { path = "time" } - -# Crates.io dependencies, in alphabetical order -arrow = { version = "6.0", features = ["prettyprint"] } -arrow-flight = "6.0" -backtrace = "0.3" -byteorder = "1.3.4" -bytes = "1.0" -chrono = "0.4" -clap = "2.33.1" -csv = "1.1" -dirs = "4.0.0" -dotenv = "0.15.0" -flate2 = "1.0" -futures = "0.3" -hashbrown = "0.11" -http = "0.2.0" -humantime = "2.1.0" -hyper = "0.14" -libc = { version = "0.2" } -log = "0.4" -once_cell = { version = "1.4.0", features = ["parking_lot"] } -parking_lot = "0.11.2" -itertools = "0.10.1" -parquet = "6.0" -pin-project = "1.0" -# used by arrow/datafusion anyway -comfy-table = { version = "4.0", default-features = false } -pprof = { version = "^0.5", default-features = false, features = ["flamegraph", "protobuf"], optional = true } -prost = "0.8" -rustyline = { version = "9.0", default-features = false } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0.67" -serde_urlencoded = "0.7.0" -snafu = "0.6.9" -structopt = "0.3.25" -thiserror = "1.0.30" -tikv-jemalloc-ctl = { version = "0.4.0" } -tokio = { version = "1.11", features = ["macros", "rt-multi-thread", "parking_lot", "signal"] } -tokio-stream = { version = "0.1.2", features = ["net"] } -tokio-util = { version = "0.6.3" } -tonic = "0.5.0" -tonic-health = "0.4.0" -tonic-reflection = "0.2.0" -tower = "0.4" -uuid = { version = "0.8", features = ["v4"] } - -# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive -tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] } -heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true } - - -[dev-dependencies] -# Workspace dependencies, in alphabetical order -arrow_util = { path = "arrow_util" } -entry = { path = "entry" } -influxdb2_client = { path = "influxdb2_client" } -influxdb_storage_client = { path = "influxdb_storage_client" } -influxdb_iox_client = { path = "influxdb_iox_client", features = ["flight"] } -test_helpers = { path = "test_helpers" } -parking_lot = "0.11.2" -write_buffer = { path = "write_buffer" } - -# Crates.io dependencies, in alphabetical order -assert_cmd = "2.0.2" -flate2 = "1.0" -hex = "0.4.2" -predicates = "2.0.3" -rand = "0.8.3" -rdkafka = "0.27.0" -reqwest = "0.11" -tempfile = "3.1.0" - -[features] -default = ["jemalloc_replacing_malloc"] - -azure = ["object_store/azure"] # Optional Azure Object store support -gcp = ["object_store/gcp"] # Optional GCP object store support -aws = ["object_store/aws"] # Optional AWS / S3 object store support -# pprof is an optional feature for pprof support - -# heappy is an optional feature; Not on by default as it -# runtime overhead on all allocations (calls to malloc). -# Cargo cannot currently implement mutually exclusive features so let's force every build -# to pick either heappy or jemalloc_replacing_malloc feature at least until we figure out something better. -jemalloc_replacing_malloc = ["tikv-jemalloc-sys"] diff --git a/arrow_util/Cargo.toml b/arrow_util/Cargo.toml index 0aa118f64b..e6682746a2 100644 --- a/arrow_util/Cargo.toml +++ b/arrow_util/Cargo.toml @@ -2,7 +2,7 @@ name = "arrow_util" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "Apache Arrow utilities" [dependencies] diff --git a/arrow_util/src/bitset.rs b/arrow_util/src/bitset.rs index 70c212b529..08b4559c4a 100644 --- a/arrow_util/src/bitset.rs +++ b/arrow_util/src/bitset.rs @@ -29,6 +29,12 @@ impl BitSet { bitset } + /// Reserve space for `count` further bits + pub fn reserve(&mut self, count: usize) { + let new_buf_len = (self.len + count + 7) >> 3; + self.buffer.reserve(new_buf_len); + } + /// Appends `count` unset bits pub fn append_unset(&mut self, count: usize) { self.len += count; diff --git a/client_util/Cargo.toml b/client_util/Cargo.toml index b61f69e75e..af6f4a1b16 100644 --- a/client_util/Cargo.toml +++ b/client_util/Cargo.toml @@ -3,7 +3,7 @@ name = "client_util" version = "0.1.0" authors = ["Raphael Taylor-Davies "] description = "Shared code for IOx clients" -edition = "2018" +edition = "2021" [dependencies] http = "0.2.3" @@ -13,4 +13,4 @@ tonic = { version = "0.5.0" } tower = "0.4" [dev-dependencies] -tokio = { version = "1.11", features = ["macros"] } \ No newline at end of file +tokio = { version = "1.11", features = ["macros"] } diff --git a/data_types/Cargo.toml b/data_types/Cargo.toml index 0a729fbc25..eee2b2c9ff 100644 --- a/data_types/Cargo.toml +++ b/data_types/Cargo.toml @@ -3,7 +3,7 @@ name = "data_types" version = "0.1.0" authors = ["pauldix "] description = "InfluxDB IOx data_types, shared between IOx instances and IOx clients" -edition = "2018" +edition = "2021" readme = "README.md" [dependencies] # In alphabetical order diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index 66af425cea..7326fa3728 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -2,7 +2,7 @@ name = "datafusion" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "Re-exports datafusion at a specific version" [dependencies] diff --git a/datafusion_util/Cargo.toml b/datafusion_util/Cargo.toml index 2a924f472a..4d3288bb71 100644 --- a/datafusion_util/Cargo.toml +++ b/datafusion_util/Cargo.toml @@ -2,7 +2,7 @@ name = "datafusion_util" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "Datafusion utilities" [dependencies] diff --git a/docker/Dockerfile.ci.integration b/docker/Dockerfile.ci.integration index 53cef783c8..9294cded81 100644 --- a/docker/Dockerfile.ci.integration +++ b/docker/Dockerfile.ci.integration @@ -21,4 +21,4 @@ ENV TEST_INTEGRATION=1 ENV KAFKA_CONNECT=kafka:9092 # Run the integration tests that connect to Kafka that will be running in another container -CMD ["sh", "-c", "./docker/integration_test.sh"] +CMD ["sh", "-c", "cargo test -p write_buffer kafka -- --nocapture"] diff --git a/docker/integration_test.sh b/docker/integration_test.sh deleted file mode 100755 index 70ad18cc2e..0000000000 --- a/docker/integration_test.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -set -euxo pipefail - -cargo test -p write_buffer kafka -- --nocapture -cargo test -p influxdb_iox --test end_to_end skip_replay -- --nocapture -cargo test -p influxdb_iox --test end_to_end write_buffer -- --nocapture diff --git a/docs/testing.md b/docs/testing.md index c3b43647ac..fdd68c2adb 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -136,7 +136,7 @@ You can then run the tests with `KAFKA_CONNECT=localhost:9093`. To run just the tests, the full command would then be: ``` -TEST_INTEGRATION=1 KAFKA_CONNECT=localhost:9093 cargo test -p influxdb_iox --test end_to_end write_buffer +TEST_INTEGRATION=1 KAFKA_CONNECT=localhost:9093 cargo test -p write_buffer kafka --nocapture ``` ### Running `cargo test` in a Docker container diff --git a/entry/Cargo.toml b/entry/Cargo.toml index 459f6a59ae..6450bbda2f 100644 --- a/entry/Cargo.toml +++ b/entry/Cargo.toml @@ -2,7 +2,7 @@ name = "entry" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" description = "The entry format used by the write buffer" [dependencies] diff --git a/generated_types/Cargo.toml b/generated_types/Cargo.toml index 48400b3859..2bc44c8970 100644 --- a/generated_types/Cargo.toml +++ b/generated_types/Cargo.toml @@ -2,7 +2,7 @@ name = "generated_types" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order bytes = "1.0" diff --git a/generated_types/build.rs b/generated_types/build.rs index a759a30bba..6ba069d0c6 100644 --- a/generated_types/build.rs +++ b/generated_types/build.rs @@ -21,38 +21,44 @@ fn main() -> Result<()> { /// - `com.github.influxdata.idpe.storage.read.rs` /// - `influxdata.iox.catalog.v1.rs` /// - `influxdata.iox.management.v1.rs` +/// - `influxdata.iox.router.v1.rs` /// - `influxdata.iox.write.v1.rs` /// - `influxdata.platform.storage.rs` fn generate_grpc_types(root: &Path) -> Result<()> { - let storage_path = root.join("influxdata/platform/storage"); - let idpe_path = root.join("com/github/influxdata/idpe/storage/read"); let catalog_path = root.join("influxdata/iox/catalog/v1"); + let idpe_path = root.join("com/github/influxdata/idpe/storage/read"); let management_path = root.join("influxdata/iox/management/v1"); + let router_path = root.join("influxdata/iox/router/v1"); + let storage_path = root.join("influxdata/platform/storage"); let write_path = root.join("influxdata/iox/write/v1"); let proto_files = vec![ - storage_path.join("test.proto"), - storage_path.join("predicate.proto"), - storage_path.join("storage_common.proto"), - storage_path.join("service.proto"), - storage_path.join("storage_common_idpe.proto"), - idpe_path.join("source.proto"), catalog_path.join("catalog.proto"), catalog_path.join("parquet_metadata.proto"), catalog_path.join("predicate.proto"), - management_path.join("database_rules.proto"), + idpe_path.join("source.proto"), management_path.join("chunk.proto"), + management_path.join("database_rules.proto"), + management_path.join("jobs.proto"), management_path.join("partition.proto"), + management_path.join("partition_template.proto"), management_path.join("server_config.proto"), management_path.join("service.proto"), management_path.join("shard.proto"), - management_path.join("jobs.proto"), - write_path.join("service.proto"), - root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"), - root.join("grpc/health/v1/service.proto"), + management_path.join("write_buffer.proto"), root.join("google/longrunning/operations.proto"), root.join("google/rpc/error_details.proto"), root.join("google/rpc/status.proto"), + root.join("grpc/health/v1/service.proto"), + root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"), + router_path.join("router.proto"), + router_path.join("service.proto"), + storage_path.join("predicate.proto"), + storage_path.join("service.proto"), + storage_path.join("storage_common.proto"), + storage_path.join("storage_common_idpe.proto"), + storage_path.join("test.proto"), + write_path.join("service.proto"), ]; // Tell cargo to recompile if any of these proto files are changed diff --git a/generated_types/protos/influxdata/iox/management/v1/database_rules.proto b/generated_types/protos/influxdata/iox/management/v1/database_rules.proto index 2c07351ead..baf7feb39a 100644 --- a/generated_types/protos/influxdata/iox/management/v1/database_rules.proto +++ b/generated_types/protos/influxdata/iox/management/v1/database_rules.proto @@ -3,34 +3,9 @@ package influxdata.iox.management.v1; option go_package = "github.com/influxdata/iox/management/v1"; import "google/protobuf/duration.proto"; -import "google/protobuf/empty.proto"; +import "influxdata/iox/management/v1/partition_template.proto"; import "influxdata/iox/management/v1/shard.proto"; - -// `PartitionTemplate` is used to compute the partition key of each row that -// gets written. It can consist of the table name, a column name and its value, -// a formatted time, or a string column and regex captures of its value. For -// columns that do not appear in the input row, a blank value is output. -// -// The key is constructed in order of the template parts; thus ordering changes -// what partition key is generated. -message PartitionTemplate { - message Part { - message ColumnFormat { - string column = 1; - string format = 2; - } - - oneof part { - google.protobuf.Empty table = 1; - string column = 2; - string time = 3; - ColumnFormat regex = 4; - ColumnFormat strf_time = 5; - } - } - - repeated Part parts = 1; -} +import "influxdata/iox/management/v1/write_buffer.proto"; message LifecycleRules { // Once the total amount of buffered data in memory reaches this size start @@ -111,6 +86,9 @@ message LifecycleRules { uint64 parquet_cache_limit = 17; } +// Database rules. +// +// TODO(marco): add `WriteSources` to this message. message DatabaseRules { // The unencoded name of the database // @@ -128,6 +106,8 @@ message DatabaseRules { LifecycleRules lifecycle_rules = 3; // If not specified, does not configure any routing + // + // TODO(marco): remove this oneof routing_rules { // Shard config ShardConfig shard_config = 8; @@ -146,6 +126,8 @@ message DatabaseRules { // Optionally, the connection for the write buffer for writing or reading/restoring data. // // If not specified, does not configure a write buffer + // + // TODO(marco): remove this WriteBufferConnection write_buffer_connection = 13; } @@ -158,61 +140,6 @@ message PersistedDatabaseRules { DatabaseRules rules = 2; } -// Configures the use of a write buffer. -message WriteBufferConnection { - enum Direction { - // Unspecified direction, will be treated as an error. - DIRECTION_UNSPECIFIED = 0; - - // Writes into the buffer aka "producer". - DIRECTION_WRITE = 1; - - // Reads from the buffer aka "consumer". - DIRECTION_READ = 2; - } - - // If the buffer is used for reading or writing. - Direction direction = 1; - - // Which type should be used (e.g. "kafka", "mock") - string type = 2; - - // Connection string, depends on `type`. - string connection = 3; - - // Old non-nested auto-creation config. - reserved 4, 5, 7; - - // Special configs to be applied when establishing the connection. - // - // This depends on `type` and can configure aspects like timeouts. - map connection_config = 6; - - // Specifies if the sequencers (e.g. for Kafka in form of a topic w/ `n_sequencers` partitions) should be - // automatically created if they do not existing prior to reading or writing. - WriteBufferCreationConfig creation_config = 8; -} - -// Configs sequencer auto-creation for write buffers. -// -// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/ `n_sequencers` -// partitions. -message WriteBufferCreationConfig { - // Number of sequencers. - // - // How they are implemented depends on `type`, e.g. for Kafka this is mapped to the number of partitions. - // - // If 0, a server-side default is used - uint32 n_sequencers = 1; - - // Special configs to by applied when sequencers are created. - // - // This depends on `type` and can setup parameters like retention policy. - // - // Contains 0 or more key value pairs - map options = 2; -} - message RoutingConfig { Sink sink = 2; } diff --git a/generated_types/protos/influxdata/iox/management/v1/partition_template.proto b/generated_types/protos/influxdata/iox/management/v1/partition_template.proto new file mode 100644 index 0000000000..6c82c800cc --- /dev/null +++ b/generated_types/protos/influxdata/iox/management/v1/partition_template.proto @@ -0,0 +1,31 @@ +syntax = "proto3"; +package influxdata.iox.management.v1; +option go_package = "github.com/influxdata/iox/management/v1"; + +import "google/protobuf/empty.proto"; + +// `PartitionTemplate` is used to compute the partition key of each row that +// gets written. It can consist of the table name, a column name and its value, +// a formatted time, or a string column and regex captures of its value. For +// columns that do not appear in the input row, a blank value is output. +// +// The key is constructed in order of the template parts; thus ordering changes +// what partition key is generated. +message PartitionTemplate { + message Part { + message ColumnFormat { + string column = 1; + string format = 2; + } + + oneof part { + google.protobuf.Empty table = 1; + string column = 2; + string time = 3; + ColumnFormat regex = 4; + ColumnFormat strf_time = 5; + } + } + + repeated Part parts = 1; +} diff --git a/generated_types/protos/influxdata/iox/management/v1/shard.proto b/generated_types/protos/influxdata/iox/management/v1/shard.proto index 87a976795c..c5983e0eb3 100644 --- a/generated_types/protos/influxdata/iox/management/v1/shard.proto +++ b/generated_types/protos/influxdata/iox/management/v1/shard.proto @@ -30,10 +30,14 @@ message ShardConfig { /// If set to true the router will ignore any errors sent by the remote /// targets in this route. That is, the write request will succeed /// regardless of this route's success. + /// + /// TODO(marco): remove this bool ignore_errors = 3; /// Mapping between shard IDs and node groups. Other sharding rules use /// ShardId as targets. + /// + /// TODO(marco): remove this map shards = 4; } diff --git a/generated_types/protos/influxdata/iox/management/v1/write_buffer.proto b/generated_types/protos/influxdata/iox/management/v1/write_buffer.proto new file mode 100644 index 0000000000..86a59c08bb --- /dev/null +++ b/generated_types/protos/influxdata/iox/management/v1/write_buffer.proto @@ -0,0 +1,58 @@ +syntax = "proto3"; +package influxdata.iox.management.v1; +option go_package = "github.com/influxdata/iox/management/v1"; + +// Configures the use of a write buffer. +message WriteBufferConnection { + enum Direction { + // Unspecified direction, will be treated as an error. + DIRECTION_UNSPECIFIED = 0; + + // Writes into the buffer aka "producer". + DIRECTION_WRITE = 1; + + // Reads from the buffer aka "consumer". + DIRECTION_READ = 2; + } + + // If the buffer is used for reading or writing. + Direction direction = 1; + + // Which type should be used (e.g. "kafka", "mock") + string type = 2; + + // Connection string, depends on `type`. + string connection = 3; + + // Old non-nested auto-creation config. + reserved 4, 5, 7; + + // Special configs to be applied when establishing the connection. + // + // This depends on `type` and can configure aspects like timeouts. + map connection_config = 6; + + // Specifies if the sequencers (e.g. for Kafka in form of a topic w/ `n_sequencers` partitions) should be + // automatically created if they do not existing prior to reading or writing. + WriteBufferCreationConfig creation_config = 8; +} + +// Configs sequencer auto-creation for write buffers. +// +// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/ `n_sequencers` +// partitions. +message WriteBufferCreationConfig { + // Number of sequencers. + // + // How they are implemented depends on `type`, e.g. for Kafka this is mapped to the number of partitions. + // + // If 0, a server-side default is used + uint32 n_sequencers = 1; + + // Special configs to by applied when sequencers are created. + // + // This depends on `type` and can setup parameters like retention policy. + // + // Contains 0 or more key value pairs + map options = 2; +} diff --git a/generated_types/protos/influxdata/iox/router/v1/router.proto b/generated_types/protos/influxdata/iox/router/v1/router.proto new file mode 100644 index 0000000000..37a3935cd5 --- /dev/null +++ b/generated_types/protos/influxdata/iox/router/v1/router.proto @@ -0,0 +1,148 @@ +syntax = "proto3"; +package influxdata.iox.router.v1; +option go_package = "github.com/influxdata/iox/router/v1"; + +import "influxdata/iox/management/v1/partition_template.proto"; +import "influxdata/iox/management/v1/shard.proto"; +import "influxdata/iox/management/v1/write_buffer.proto"; + +// Router for writes and queries. +// +// A router acts similar to a "real" database except that it does NOT store or manage any data by itself but forwards +// this responsiblity to other nodes (which then in turn provide an actual database or another routing layer). +// +// # Write Routing +// +// ## Overall Picture +// Data is accepted from all sources, is sharded, and is (according to the sharding) written into the sink sets. There +// may be a prioritization for sources that is "HTTP and gRPC first, and write buffers in declared order". +// +// ```text +// ( HTTP )--+ +------->( sink set 1 ) +// | | +// ( gRPC )--+-->( sharder )--> ... +// | | +// ( Write Buffer 1 )--+ +------->( sink set n ) +// ... | +// ( Write Buffer n )--+ +// ``` +// +// ## Sharder +// A sharder takes data and for every row/line: +// +// 1. Checks if a matcher matches the row, first matcher wins. If that's the case, the row/line is directly sent to the +// sink set. +// 2. If no matcher matches the row/line is handled by the hash ring. +// +// ```text +// --->[ matcher 1? ]-{no}---...--->[ matcher n? ]-{no}---+ +// | | | +// {yes} {yes} | +// | | | +// V V | +// ( sink set 1 ) ( sink set n ) | +// ^ ^ | +// | | | +// +--------( hash ring )-------+ | +// ^ | +// | | +// +-----------------------------+ +// ``` +// +// ## Sink Set +// Data is written to all sinks in the set in implementation-defined order. Errors do NOT short-circuit. If an error +// occurs for at least one sink that has `ignore_errors = false`, an error is returned. An empty sink set acts as NULL +// sink and always succeeds. +// +// **IMPORTANT: Queries are NOT distributed! The are always only answered by a single node.** +// +// # Query Routing +// Queries always arrive via gRPC and are forwarded one sink. The specific sink is selected via an engine that might +// take the following features into account: +// +// - **freshness:** For each sink what are the lasted sequence numbers pulled from the write buffer. +// - **stickyness:** The same client should ideally reach the same sink in subsequent requests to improve caching. +// - **circuit breaking:** If a sink is unhealthy it should be excluded from the candidate list for a while. +// +// ```text +// ( gRPC )-->[ selection engine ]-->( sink 1 ) +// | ... +// +---->( sink n ) +// ``` +message Router { + // Router name. + // + // The name is unique for this node. + string name = 1; + + // Sources of write requests. + WriteSources write_sources = 2; + + // Write sharder. + // + // NOTE: This only uses the `specific_targets` and `hash_ring` config of the sharder. The other fields are ignored. + // + // TODO(marco): remove the note above once the `ShardConfig` was cleaned up. + influxdata.iox.management.v1.ShardConfig write_sharder = 3; + + // Sinks for write requests. + map write_sinks = 4; + + // Sinks for query requests. + QuerySinks query_sinks = 5; + + // Template that generates a partition key for each row inserted into the database. + // + // This is a temporary config until the partition is moved entirely into the database. + // + // If not specified, a server-side default is used + // + // TODO(marco): remove this + influxdata.iox.management.v1.PartitionTemplate partition_template = 6; +} + +// Sources of write request aka new data. +// +// Data is accepted from these sources and a status is provided back to it. +message WriteSources { + // If set writes via gRPC and HTTP are accepted. + // + // You may want to disable this when incoming data should solely be received via write buffer(s). + bool allow_unsequenced_inputs = 2; + + // Write buffer connections. + repeated influxdata.iox.management.v1.WriteBufferConnection write_buffers = 3; +} + +// Sink of write requests aka new data. +// +// Data is sent to this sink and a status is received from it. +message WriteSink { + // Where the data goes. + oneof sink { + // gRPC-based remote, addressed by its server ID. + uint32 grpc_remote = 1; + + // Write buffer connection. + influxdata.iox.management.v1.WriteBufferConnection write_buffer = 2; + } + + // If set, errors during writing to this sink are ignored and do NOT lead to an overall failure. + bool ignore_errors = 3; +} + +// Set of write sinks. +message WriteSinkSet { + // Sinks within the set. + repeated WriteSink sinks = 1; +} + +// Sinks for query requests. +// +// Queries are sent to one of these sinks and the resulting data is received from it. +// +// Note that the query results are flowing into the opposite direction (aka a query sink is a result source). +message QuerySinks { + // gRPC-based remotes, addressed by their server IDs. + repeated uint32 grpc_remotes = 1; +} diff --git a/generated_types/protos/influxdata/iox/router/v1/service.proto b/generated_types/protos/influxdata/iox/router/v1/service.proto new file mode 100644 index 0000000000..38b478d67c --- /dev/null +++ b/generated_types/protos/influxdata/iox/router/v1/service.proto @@ -0,0 +1,76 @@ +syntax = "proto3"; +package influxdata.iox.router.v1; +option go_package = "github.com/influxdata/iox/router/v1"; + +import "influxdata/iox/router/v1/router.proto"; + +service RouterService { + // List remote IOx servers we know about. + rpc ListRemotes(ListRemotesRequest) returns (ListRemotesResponse); + + // Update information about a remote IOx server (upsert). + rpc UpdateRemote(UpdateRemoteRequest) returns (UpdateRemoteResponse); + + // Delete a reference to remote IOx server. + rpc DeleteRemote(DeleteRemoteRequest) returns (DeleteRemoteResponse); + + // List configured routers. + rpc ListRouter(ListRouterRequest) returns (ListRouterResponse); + + // Update router config (upsert). + rpc UpdateRouter(UpdateRouterRequest) returns (UpdateRouterResponse); + + // Delete router. + rpc DeleteRouter(DeleteRouterRequest) returns (DeleteRouterResponse); +} + +message ListRemotesRequest {} + +message ListRemotesResponse { + repeated Remote remotes = 1; +} + +// This resource represents a remote IOx server. +message Remote { + // The server ID associated with a remote IOx server. + uint32 id = 1; + + // The address of the remote IOx server gRPC endpoint. + string connection_string = 2; +} + +// Updates information about a remote IOx server. +// +// If a remote for a given `id` already exists, it is updated in place. +message UpdateRemoteRequest { + // If omitted, the remote associated with `id` will be removed. + Remote remote = 1; + + // TODO(#917): add an optional flag to test the connection or not before adding it. +} + +message UpdateRemoteResponse {} + +message ListRouterRequest {} + +message ListRouterResponse { + repeated Router routers = 1; +} + +message DeleteRemoteRequest{ + uint32 id = 1; +} + +message DeleteRemoteResponse {} + +message UpdateRouterRequest { + Router router = 1; +} + +message UpdateRouterResponse {} + +message DeleteRouterRequest { + string router_name = 1; +} + +message DeleteRouterResponse {} diff --git a/grpc-router/grpc-router-test-gen/Cargo.toml b/grpc-router-test-gen/Cargo.toml similarity index 95% rename from grpc-router/grpc-router-test-gen/Cargo.toml rename to grpc-router-test-gen/Cargo.toml index d3546ba504..f23a1591c1 100644 --- a/grpc-router/grpc-router-test-gen/Cargo.toml +++ b/grpc-router-test-gen/Cargo.toml @@ -2,7 +2,7 @@ name = "grpc-router-test-gen" version = "0.1.0" authors = ["Marko Mikulicic "] -edition = "2018" +edition = "2021" description = "Protobuf used in test for the grpc-router crate; need to be in a separate create because of linter limitations" [dependencies] diff --git a/grpc-router/grpc-router-test-gen/build.rs b/grpc-router-test-gen/build.rs similarity index 100% rename from grpc-router/grpc-router-test-gen/build.rs rename to grpc-router-test-gen/build.rs diff --git a/grpc-router/grpc-router-test-gen/protos/test.proto b/grpc-router-test-gen/protos/test.proto similarity index 100% rename from grpc-router/grpc-router-test-gen/protos/test.proto rename to grpc-router-test-gen/protos/test.proto diff --git a/grpc-router/grpc-router-test-gen/src/lib.rs b/grpc-router-test-gen/src/lib.rs similarity index 100% rename from grpc-router/grpc-router-test-gen/src/lib.rs rename to grpc-router-test-gen/src/lib.rs diff --git a/grpc-router/Cargo.toml b/grpc-router/Cargo.toml index 9cfc2102bd..48b8c02c3f 100644 --- a/grpc-router/Cargo.toml +++ b/grpc-router/Cargo.toml @@ -2,7 +2,7 @@ name = "grpc-router" version = "0.1.0" authors = ["Marko Mikulicic "] -edition = "2018" +edition = "2021" [dependencies] bytes = "1.0" @@ -25,4 +25,4 @@ prost-build = "0.8" tonic-build = "0.5" [dev-dependencies] -grpc-router-test-gen = { path = "./grpc-router-test-gen" } +grpc-router-test-gen = { path = "../grpc-router-test-gen" } diff --git a/influxdb2_client/Cargo.toml b/influxdb2_client/Cargo.toml index a475cf5989..0824082817 100644 --- a/influxdb2_client/Cargo.toml +++ b/influxdb2_client/Cargo.toml @@ -2,7 +2,7 @@ name = "influxdb2_client" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order bytes = "1.0" diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml new file mode 100644 index 0000000000..76b8ee2dd0 --- /dev/null +++ b/influxdb_iox/Cargo.toml @@ -0,0 +1,122 @@ +[package] +name = "influxdb_iox" +version = "0.1.0" +authors = ["Paul Dix "] +edition = "2021" +default-run = "influxdb_iox" + +[dependencies] +# Workspace dependencies, in alphabetical order +datafusion = { path = "../datafusion" } +data_types = { path = "../data_types" } +entry = { path = "../entry" } +generated_types = { path = "../generated_types" } + +influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] } +influxdb_line_protocol = { path = "../influxdb_line_protocol" } +internal_types = { path = "../internal_types" } +iox_object_store = { path = "../iox_object_store" } +logfmt = { path = "../logfmt" } +metric = { path = "../metric" } +metric_exporters = { path = "../metric_exporters" } +mutable_buffer = { path = "../mutable_buffer" } +num_cpus = "1.13.0" +object_store = { path = "../object_store" } +observability_deps = { path = "../observability_deps" } +panic_logging = { path = "../panic_logging" } +parquet_catalog = { path = "../parquet_catalog" } +parquet_file = { path = "../parquet_file" } +predicate = { path = "../predicate" } +query = { path = "../query" } +read_buffer = { path = "../read_buffer" } +server = { path = "../server" } +trace = { path = "../trace" } +trace_exporters = { path = "../trace_exporters" } +trace_http = { path = "../trace_http" } +tracker = { path = "../tracker" } +trogging = { path = "../trogging", default-features = false, features = ["structopt"] } +time = { path = "../time" } + +# Crates.io dependencies, in alphabetical order +arrow = { version = "6.0", features = ["prettyprint"] } +arrow-flight = "6.0" +backtrace = "0.3" +byteorder = "1.3.4" +bytes = "1.0" +chrono = "0.4" +clap = "2.33.1" +csv = "1.1" +dirs = "4.0.0" +dotenv = "0.15.0" +flate2 = "1.0" +futures = "0.3" +hashbrown = "0.11" +http = "0.2.0" +humantime = "2.1.0" +hyper = "0.14" +libc = { version = "0.2" } +log = "0.4" +once_cell = { version = "1.4.0", features = ["parking_lot"] } +parking_lot = "0.11.2" +itertools = "0.10.1" +parquet = "6.0" +pin-project = "1.0" +# used by arrow/datafusion anyway +comfy-table = { version = "4.0", default-features = false } +pprof = { version = "^0.5", default-features = false, features = ["flamegraph", "protobuf"], optional = true } +prost = "0.8" +rustyline = { version = "9.0", default-features = false } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0.67" +serde_urlencoded = "0.7.0" +snafu = "0.6.9" +structopt = "0.3.25" +thiserror = "1.0.30" +tikv-jemalloc-ctl = { version = "0.4.0" } +tokio = { version = "1.11", features = ["macros", "rt-multi-thread", "parking_lot", "signal"] } +tokio-stream = { version = "0.1.2", features = ["net"] } +tokio-util = { version = "0.6.3" } +tonic = "0.5.0" +tonic-health = "0.4.0" +tonic-reflection = "0.2.0" +tower = "0.4" +uuid = { version = "0.8", features = ["v4"] } + +# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive +tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] } +heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true } + + +[dev-dependencies] +# Workspace dependencies, in alphabetical order +arrow_util = { path = "../arrow_util" } +entry = { path = "../entry" } +influxdb2_client = { path = "../influxdb2_client" } +influxdb_storage_client = { path = "../influxdb_storage_client" } +influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight"] } +test_helpers = { path = "../test_helpers" } +parking_lot = "0.11.2" +write_buffer = { path = "../write_buffer" } + +# Crates.io dependencies, in alphabetical order +assert_cmd = "2.0.2" +flate2 = "1.0" +hex = "0.4.2" +predicates = "2.0.3" +rand = "0.8.3" +reqwest = "0.11" +tempfile = "3.1.0" + +[features] +default = ["jemalloc_replacing_malloc"] + +azure = ["object_store/azure"] # Optional Azure Object store support +gcp = ["object_store/gcp"] # Optional GCP object store support +aws = ["object_store/aws"] # Optional AWS / S3 object store support +# pprof is an optional feature for pprof support + +# heappy is an optional feature; Not on by default as it +# runtime overhead on all allocations (calls to malloc). +# Cargo cannot currently implement mutually exclusive features so let's force every build +# to pick either heappy or jemalloc_replacing_malloc feature at least until we figure out something better. +jemalloc_replacing_malloc = ["tikv-jemalloc-sys"] diff --git a/src/commands/database.rs b/influxdb_iox/src/commands/database.rs similarity index 100% rename from src/commands/database.rs rename to influxdb_iox/src/commands/database.rs diff --git a/src/commands/database/chunk.rs b/influxdb_iox/src/commands/database/chunk.rs similarity index 100% rename from src/commands/database/chunk.rs rename to influxdb_iox/src/commands/database/chunk.rs diff --git a/src/commands/database/partition.rs b/influxdb_iox/src/commands/database/partition.rs similarity index 100% rename from src/commands/database/partition.rs rename to influxdb_iox/src/commands/database/partition.rs diff --git a/src/commands/database/recover.rs b/influxdb_iox/src/commands/database/recover.rs similarity index 100% rename from src/commands/database/recover.rs rename to influxdb_iox/src/commands/database/recover.rs diff --git a/src/commands/debug.rs b/influxdb_iox/src/commands/debug/dump_catalog.rs similarity index 70% rename from src/commands/debug.rs rename to influxdb_iox/src/commands/debug/dump_catalog.rs index f2d84ffc35..542373da43 100644 --- a/src/commands/debug.rs +++ b/influxdb_iox/src/commands/debug/dump_catalog.rs @@ -5,13 +5,13 @@ use snafu::{OptionExt, ResultExt, Snafu}; use std::{convert::TryFrom, sync::Arc}; use structopt::StructOpt; -use crate::{object_store::ObjectStoreConfig, server_id::ServerIdConfig}; +use crate::structopt_blocks::{object_store::ObjectStoreConfig, server_id::ServerIdConfig}; #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Cannot parse object store config: {}", source))] ObjectStoreParsing { - source: crate::object_store::ParseError, + source: crate::structopt_blocks::object_store::ParseError, }, #[snafu(display("No server ID provided"))] @@ -38,22 +38,9 @@ pub enum Error { pub type Result = std::result::Result; -/// Interrogate internal database data -#[derive(Debug, StructOpt)] -pub struct Config { - #[structopt(subcommand)] - command: Command, -} - -#[derive(Debug, StructOpt)] -enum Command { - /// Dump preserved catalog. - DumpCatalog(DumpCatalog), -} - /// Dump preserved catalog. #[derive(Debug, StructOpt)] -struct DumpCatalog { +pub struct Config { // object store config #[structopt(flatten)] object_store_config: ObjectStoreConfig, @@ -71,7 +58,7 @@ struct DumpCatalog { } #[derive(Debug, StructOpt)] -struct DumpOptions { +pub struct DumpOptions { /// Show debug output of `DecodedIoxParquetMetaData` if decoding succeeds, show the decoding error otherwise. /// /// Since this contains the entire Apache Parquet metadata object this is quite verbose and is usually not @@ -116,29 +103,21 @@ impl From for parquet_catalog::dump::DumpOptions { } pub async fn command(config: Config) -> Result<()> { - match config.command { - Command::DumpCatalog(dump_catalog) => { - let object_store = ObjectStore::try_from(&dump_catalog.object_store_config) - .context(ObjectStoreParsing)?; - let database_name = - DatabaseName::try_from(dump_catalog.db_name).context(InvalidDbName)?; - let server_id = dump_catalog - .server_id_config - .server_id - .context(NoServerId)?; - let iox_object_store = - IoxObjectStore::find_existing(Arc::new(object_store), server_id, &database_name) - .await - .context(IoxObjectStoreFailure)? - .context(NoIoxObjectStore)?; + let object_store = + ObjectStore::try_from(&config.object_store_config).context(ObjectStoreParsing)?; + let database_name = DatabaseName::try_from(config.db_name).context(InvalidDbName)?; + let server_id = config.server_id_config.server_id.context(NoServerId)?; + let iox_object_store = + IoxObjectStore::find_existing(Arc::new(object_store), server_id, &database_name) + .await + .context(IoxObjectStoreFailure)? + .context(NoIoxObjectStore)?; - let mut writer = std::io::stdout(); - let options = dump_catalog.dump_options.into(); - parquet_catalog::dump::dump(&iox_object_store, &mut writer, options) - .await - .context(DumpCatalogFailure)?; - } - } + let mut writer = std::io::stdout(); + let options = config.dump_options.into(); + parquet_catalog::dump::dump(&iox_object_store, &mut writer, options) + .await + .context(DumpCatalogFailure)?; Ok(()) } diff --git a/influxdb_iox/src/commands/debug/mod.rs b/influxdb_iox/src/commands/debug/mod.rs new file mode 100644 index 0000000000..ee786f1037 --- /dev/null +++ b/influxdb_iox/src/commands/debug/mod.rs @@ -0,0 +1,41 @@ +use snafu::{ResultExt, Snafu}; +use structopt::StructOpt; + +mod dump_catalog; +mod print_cpu; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Error in dump-catalog subcommand: {}", source))] + DumpCatalogError { source: dump_catalog::Error }, +} + +pub type Result = std::result::Result; + +/// Interrogate internal database data +#[derive(Debug, StructOpt)] +pub struct Config { + #[structopt(subcommand)] + command: Command, +} + +#[derive(Debug, StructOpt)] +enum Command { + /// Dump preserved catalog. + DumpCatalog(dump_catalog::Config), + + /// Prints what CPU features are used by the compiler by default. + PrintCpu, +} + +pub async fn command(config: Config) -> Result<()> { + match config.command { + Command::DumpCatalog(dump_catalog) => dump_catalog::command(dump_catalog) + .await + .context(DumpCatalogError), + Command::PrintCpu => { + print_cpu::main(); + Ok(()) + } + } +} diff --git a/src/print_cpu.rs b/influxdb_iox/src/commands/debug/print_cpu.rs similarity index 98% rename from src/print_cpu.rs rename to influxdb_iox/src/commands/debug/print_cpu.rs index 3258494d29..0fe531440b 100644 --- a/src/print_cpu.rs +++ b/influxdb_iox/src/commands/debug/print_cpu.rs @@ -1,4 +1,3 @@ -#![recursion_limit = "512"] /// Prints what CPU features are used by the compiler by default. /// /// Script from: @@ -29,7 +28,7 @@ macro_rules! print_if_feature_enabled { } } -fn main() { +pub fn main() { println!("rustc is using the following target options"); print_if_feature_enabled!( diff --git a/src/commands/operations.rs b/influxdb_iox/src/commands/operations.rs similarity index 100% rename from src/commands/operations.rs rename to influxdb_iox/src/commands/operations.rs diff --git a/src/commands/run.rs b/influxdb_iox/src/commands/run.rs similarity index 98% rename from src/commands/run.rs rename to influxdb_iox/src/commands/run.rs index 70e96b55cc..8d00f1b0bc 100644 --- a/src/commands/run.rs +++ b/influxdb_iox/src/commands/run.rs @@ -2,8 +2,7 @@ use crate::{ influxdb_ioxd::{self, serving_readiness::ServingReadinessState}, - object_store::ObjectStoreConfig, - server_id::ServerIdConfig, + structopt_blocks::{object_store::ObjectStoreConfig, server_id::ServerIdConfig}, }; use std::{net::SocketAddr, net::ToSocketAddrs}; use structopt::StructOpt; diff --git a/src/commands/server.rs b/influxdb_iox/src/commands/server.rs similarity index 100% rename from src/commands/server.rs rename to influxdb_iox/src/commands/server.rs diff --git a/src/commands/server_remote.rs b/influxdb_iox/src/commands/server_remote.rs similarity index 100% rename from src/commands/server_remote.rs rename to influxdb_iox/src/commands/server_remote.rs diff --git a/src/commands/sql.rs b/influxdb_iox/src/commands/sql.rs similarity index 100% rename from src/commands/sql.rs rename to influxdb_iox/src/commands/sql.rs diff --git a/src/commands/sql/observer.rs b/influxdb_iox/src/commands/sql/observer.rs similarity index 100% rename from src/commands/sql/observer.rs rename to influxdb_iox/src/commands/sql/observer.rs diff --git a/src/commands/sql/repl.rs b/influxdb_iox/src/commands/sql/repl.rs similarity index 100% rename from src/commands/sql/repl.rs rename to influxdb_iox/src/commands/sql/repl.rs diff --git a/src/commands/sql/repl_command.rs b/influxdb_iox/src/commands/sql/repl_command.rs similarity index 100% rename from src/commands/sql/repl_command.rs rename to influxdb_iox/src/commands/sql/repl_command.rs diff --git a/src/commands/tracing.rs b/influxdb_iox/src/commands/tracing.rs similarity index 100% rename from src/commands/tracing.rs rename to influxdb_iox/src/commands/tracing.rs diff --git a/src/influxdb_ioxd.rs b/influxdb_iox/src/influxdb_ioxd.rs similarity index 95% rename from src/influxdb_ioxd.rs rename to influxdb_iox/src/influxdb_ioxd.rs index 5063edf6ae..d684df176d 100644 --- a/src/influxdb_ioxd.rs +++ b/influxdb_iox/src/influxdb_ioxd.rs @@ -1,6 +1,6 @@ use crate::{ commands::run::Config, - object_store::{check_object_store, warn_about_inmem_store}, + structopt_blocks::object_store::{check_object_store, warn_about_inmem_store}, }; use futures::{future::FusedFuture, pin_mut, FutureExt}; use hyper::server::conn::AddrIncoming; @@ -44,12 +44,12 @@ pub enum Error { #[snafu(display("Cannot parse object store config: {}", source))] ObjectStoreParsing { - source: crate::object_store::ParseError, + source: crate::structopt_blocks::object_store::ParseError, }, #[snafu(display("Cannot check object store config: {}", source))] ObjectStoreCheck { - source: crate::object_store::CheckError, + source: crate::structopt_blocks::object_store::CheckError, }, #[snafu(display("Cannot create tracing pipeline: {}", source))] @@ -79,7 +79,10 @@ async fn wait_for_signal() { let _ = tokio::signal::ctrl_c().await; } -async fn make_application(config: &Config) -> Result> { +async fn make_application( + config: &Config, + trace_collector: Option>, +) -> Result> { warn_about_inmem_store(&config.object_store_config); let object_store = ObjectStore::try_from(&config.object_store_config).context(ObjectStoreParsing)?; @@ -91,6 +94,7 @@ async fn make_application(config: &Config) -> Result> { Ok(Arc::new(ApplicationState::new( object_storage, config.num_worker_threads, + trace_collector, ))) } @@ -178,7 +182,11 @@ pub async fn main(config: Config) -> Result<()> { let f = SendPanicsToTracing::new(); std::mem::forget(f); - let application = make_application(&config).await?; + let async_exporter = config.tracing_config.build().context(Tracing)?; + let trace_collector = async_exporter + .clone() + .map(|x| -> Arc { x }); + let application = make_application(&config, trace_collector).await?; // Register jemalloc metrics application @@ -189,17 +197,12 @@ pub async fn main(config: Config) -> Result<()> { let grpc_listener = grpc_listener(config.grpc_bind_address).await?; let http_listener = http_listener(config.http_bind_address).await?; - let async_exporter = config.tracing_config.build().context(Tracing)?; - let trace_collector = async_exporter - .clone() - .map(|x| -> Arc { x }); let r = serve( config, application, grpc_listener, http_listener, - trace_collector, app_server, ) .await; @@ -241,7 +244,6 @@ async fn serve( application: Arc, grpc_listener: tokio::net::TcpListener, http_listener: AddrIncoming, - trace_collector: Option>, app_server: Arc>, ) -> Result<()> { // Construct a token to trigger shutdown of API services @@ -262,7 +264,6 @@ async fn serve( Arc::clone(&application), Arc::clone(&app_server), trace_header_parser.clone(), - trace_collector.clone(), frontend_shutdown.clone(), config.initial_serving_state.into(), ) @@ -279,7 +280,6 @@ async fn serve( frontend_shutdown.clone(), max_http_request_size, trace_header_parser, - trace_collector, ) .fuse(); info!("HTTP server listening"); @@ -381,7 +381,7 @@ mod tests { use super::*; use ::http::{header::HeaderName, HeaderValue}; use data_types::{database_rules::DatabaseRules, DatabaseName}; - use influxdb_iox_client::connection::Connection; + use influxdb_iox_client::{connection::Connection, flight::PerformQuery}; use server::rules::ProvidedDatabaseRules; use std::{convert::TryInto, num::NonZeroU64}; use structopt::StructOpt; @@ -412,16 +412,9 @@ mod tests { let grpc_listener = grpc_listener(config.grpc_bind_address).await.unwrap(); let http_listener = http_listener(config.grpc_bind_address).await.unwrap(); - serve( - config, - application, - grpc_listener, - http_listener, - None, - server, - ) - .await - .unwrap() + serve(config, application, grpc_listener, http_listener, server) + .await + .unwrap() } #[tokio::test] @@ -430,7 +423,7 @@ mod tests { // Create a server and wait for it to initialize let config = test_config(Some(23)); - let application = make_application(&config).await.unwrap(); + let application = make_application(&config, None).await.unwrap(); let server = make_server(Arc::clone(&application), &config); server.wait_for_init().await.unwrap(); @@ -458,7 +451,7 @@ mod tests { async fn test_server_shutdown_uninit() { // Create a server but don't set a server id let config = test_config(None); - let application = make_application(&config).await.unwrap(); + let application = make_application(&config, None).await.unwrap(); let server = make_server(Arc::clone(&application), &config); let serve_fut = test_serve(config, application, Arc::clone(&server)).fuse(); @@ -489,7 +482,7 @@ mod tests { async fn test_server_panic() { // Create a server and wait for it to initialize let config = test_config(Some(999999999)); - let application = make_application(&config).await.unwrap(); + let application = make_application(&config, None).await.unwrap(); let server = make_server(Arc::clone(&application), &config); server.wait_for_init().await.unwrap(); @@ -516,7 +509,7 @@ mod tests { async fn test_database_panic() { // Create a server and wait for it to initialize let config = test_config(Some(23)); - let application = make_application(&config).await.unwrap(); + let application = make_application(&config, None).await.unwrap(); let server = make_server(Arc::clone(&application), &config); server.wait_for_init().await.unwrap(); @@ -597,7 +590,9 @@ mod tests { JoinHandle>, ) { let config = test_config(Some(23)); - let application = make_application(&config).await.unwrap(); + let application = make_application(&config, Some(Arc::::clone(collector))) + .await + .unwrap(); let server = make_server(Arc::clone(&application), &config); server.wait_for_init().await.unwrap(); @@ -611,7 +606,6 @@ mod tests { application, grpc_listener, http_listener, - Some(Arc::::clone(collector)), Arc::clone(&server), ); @@ -690,6 +684,11 @@ mod tests { join.await.unwrap().unwrap(); } + /// Ensure that query is fully executed. + async fn consume_query(mut query: PerformQuery) { + while query.next().await.unwrap().is_some() {} + } + #[tokio::test] async fn test_query_tracing() { let collector = Arc::new(RingBufferTraceCollector::new(100)); @@ -721,10 +720,13 @@ mod tests { .unwrap(); let mut flight = influxdb_iox_client::flight::Client::new(conn.clone()); - flight - .perform_query(db_info.db_name(), "select * from cpu;") - .await - .unwrap(); + consume_query( + flight + .perform_query(db_info.db_name(), "select * from cpu;") + .await + .unwrap(), + ) + .await; flight .perform_query("nonexistent", "select * from cpu;") diff --git a/src/influxdb_ioxd/http.rs b/influxdb_iox/src/influxdb_ioxd/http.rs similarity index 99% rename from src/influxdb_ioxd/http.rs rename to influxdb_iox/src/influxdb_ioxd/http.rs index d35a84a7c0..e8ece0a4e1 100644 --- a/src/influxdb_ioxd/http.rs +++ b/influxdb_iox/src/influxdb_ioxd/http.rs @@ -52,7 +52,6 @@ use std::{ }; use tokio_util::sync::CancellationToken; use tower::Layer; -use trace::TraceCollector; use trace_http::tower::TraceLayer; /// Constants used in API error codes. @@ -865,12 +864,12 @@ pub async fn serve( shutdown: CancellationToken, max_request_size: usize, trace_header_parser: TraceHeaderParser, - trace_collector: Option>, ) -> Result<(), hyper::Error> where M: ConnectionManager + Send + Sync + Debug + 'static, { let metric_registry = Arc::clone(application.metric_registry()); + let trace_collector = application.trace_collector().clone(); let trace_layer = TraceLayer::new(trace_header_parser, metric_registry, trace_collector, false); let lp_metrics = Arc::new(LineProtocolMetrics::new( @@ -924,6 +923,7 @@ mod tests { Arc::new(ApplicationState::new( Arc::new(ObjectStore::new_in_memory()), None, + None, )) } @@ -939,7 +939,7 @@ mod tests { async fn test_health() { let application = make_application(); let app_server = make_server(Arc::clone(&application)); - let server_url = test_server(application, Arc::clone(&app_server), None); + let server_url = test_server(application, Arc::clone(&app_server)); let client = Client::new(); let response = client.get(&format!("{}/health", server_url)).send().await; @@ -958,7 +958,7 @@ mod tests { .register_metric("my_metric", "description"); let app_server = make_server(Arc::clone(&application)); - let server_url = test_server(application, Arc::clone(&app_server), None); + let server_url = test_server(application, Arc::clone(&app_server)); metric.recorder(&[("tag", "value")]).inc(20); @@ -998,15 +998,15 @@ mod tests { #[tokio::test] async fn test_tracing() { - let application = make_application(); - let app_server = make_server(Arc::clone(&application)); let trace_collector = Arc::new(RingBufferTraceCollector::new(5)); - - let server_url = test_server( - application, - Arc::clone(&app_server), + let application = Arc::new(ApplicationState::new( + Arc::new(ObjectStore::new_in_memory()), + None, Some(Arc::::clone(&trace_collector)), - ); + )); + let app_server = make_server(Arc::clone(&application)); + + let server_url = test_server(application, Arc::clone(&app_server)); let client = Client::new(); let response = client @@ -1036,7 +1036,7 @@ mod tests { .create_database(make_rules("MyOrg_MyBucket")) .await .unwrap(); - let server_url = test_server(application, Arc::clone(&app_server), None); + let server_url = test_server(application, Arc::clone(&app_server)); let client = Client::new(); @@ -1083,7 +1083,7 @@ mod tests { .create_database(make_rules("MyOrg_MyBucket")) .await .unwrap(); - let server_url = test_server(application, Arc::clone(&app_server), None); + let server_url = test_server(application, Arc::clone(&app_server)); // Set up client let client = Client::new(); @@ -1209,7 +1209,7 @@ mod tests { .await .unwrap(); - let server_url = test_server(application, Arc::clone(&app_server), None); + let server_url = test_server(application, Arc::clone(&app_server)); let client = Client::new(); @@ -1399,7 +1399,7 @@ mod tests { .create_database(make_rules("MyOrg_MyBucket")) .await .unwrap(); - let server_url = test_server(application, Arc::clone(&app_server), None); + let server_url = test_server(application, Arc::clone(&app_server)); let client = Client::new(); @@ -1693,7 +1693,6 @@ mod tests { fn test_server( application: Arc, server: Arc>, - trace_collector: Option>, ) -> String { // NB: specify port 0 to let the OS pick the port. let bind_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 0); @@ -1710,7 +1709,6 @@ mod tests { CancellationToken::new(), TEST_MAX_REQUEST_SIZE, trace_header_parser, - trace_collector, )); println!("Started server at {}", server_url); server_url @@ -1734,7 +1732,7 @@ mod tests { .create_database(make_rules("MyOrg_MyBucket")) .await .unwrap(); - let server_url = test_server(application, Arc::clone(&app_server), None); + let server_url = test_server(application, Arc::clone(&app_server)); (app_server, server_url) } diff --git a/src/influxdb_ioxd/http/heappy.rs b/influxdb_iox/src/influxdb_ioxd/http/heappy.rs similarity index 100% rename from src/influxdb_ioxd/http/heappy.rs rename to influxdb_iox/src/influxdb_ioxd/http/heappy.rs diff --git a/src/influxdb_ioxd/http/metrics.rs b/influxdb_iox/src/influxdb_ioxd/http/metrics.rs similarity index 100% rename from src/influxdb_ioxd/http/metrics.rs rename to influxdb_iox/src/influxdb_ioxd/http/metrics.rs diff --git a/src/influxdb_ioxd/http/pprof.rs b/influxdb_iox/src/influxdb_ioxd/http/pprof.rs similarity index 100% rename from src/influxdb_ioxd/http/pprof.rs rename to influxdb_iox/src/influxdb_ioxd/http/pprof.rs diff --git a/src/influxdb_ioxd/jemalloc.rs b/influxdb_iox/src/influxdb_ioxd/jemalloc.rs similarity index 100% rename from src/influxdb_ioxd/jemalloc.rs rename to influxdb_iox/src/influxdb_ioxd/jemalloc.rs diff --git a/src/influxdb_ioxd/planner.rs b/influxdb_iox/src/influxdb_ioxd/planner.rs similarity index 96% rename from src/influxdb_ioxd/planner.rs rename to influxdb_iox/src/influxdb_ioxd/planner.rs index dc04bc77c7..c238baa299 100644 --- a/src/influxdb_ioxd/planner.rs +++ b/influxdb_iox/src/influxdb_ioxd/planner.rs @@ -7,11 +7,7 @@ use query::{ exec::IOxExecutionContext, frontend::{influxrpc::InfluxRpcPlanner, sql::SqlQueryPlanner}, group_by::{Aggregate, WindowDuration}, - plan::{ - fieldlist::FieldListPlan, - seriesset::SeriesSetPlans, - stringset::{StringSetPlan, TableNamePlanBuilder}, - }, + plan::{fieldlist::FieldListPlan, seriesset::SeriesSetPlans, stringset::StringSetPlan}, QueryDatabase, }; @@ -54,7 +50,7 @@ impl Planner { &self, database: Arc, predicate: Predicate, - ) -> Result + ) -> Result where D: QueryDatabase + 'static, { diff --git a/src/influxdb_ioxd/rpc.rs b/influxdb_iox/src/influxdb_ioxd/rpc.rs similarity index 97% rename from src/influxdb_ioxd/rpc.rs rename to influxdb_iox/src/influxdb_ioxd/rpc.rs index 226341fd85..857f1fface 100644 --- a/src/influxdb_ioxd/rpc.rs +++ b/influxdb_iox/src/influxdb_ioxd/rpc.rs @@ -11,7 +11,6 @@ use trace_http::ctx::TraceHeaderParser; use crate::influxdb_ioxd::serving_readiness::ServingReadiness; use server::{connection::ConnectionManager, ApplicationState, Server}; -use trace::TraceCollector; pub mod error; mod flight; @@ -90,7 +89,6 @@ pub async fn serve( application: Arc, server: Arc>, trace_header_parser: TraceHeaderParser, - trace_collector: Option>, shutdown: CancellationToken, serving_readiness: ServingReadiness, ) -> Result<()> @@ -109,7 +107,7 @@ where let mut builder = builder.layer(trace_http::tower::TraceLayer::new( trace_header_parser, Arc::clone(application.metric_registry()), - trace_collector, + application.trace_collector().clone(), true, )); diff --git a/src/influxdb_ioxd/rpc/error.rs b/influxdb_iox/src/influxdb_ioxd/rpc/error.rs similarity index 100% rename from src/influxdb_ioxd/rpc/error.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/error.rs diff --git a/src/influxdb_ioxd/rpc/flight.rs b/influxdb_iox/src/influxdb_ioxd/rpc/flight.rs similarity index 100% rename from src/influxdb_ioxd/rpc/flight.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/flight.rs diff --git a/src/influxdb_ioxd/rpc/management.rs b/influxdb_iox/src/influxdb_ioxd/rpc/management.rs similarity index 100% rename from src/influxdb_ioxd/rpc/management.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/management.rs diff --git a/src/influxdb_ioxd/rpc/operations.rs b/influxdb_iox/src/influxdb_ioxd/rpc/operations.rs similarity index 100% rename from src/influxdb_ioxd/rpc/operations.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/operations.rs diff --git a/src/influxdb_ioxd/rpc/storage.rs b/influxdb_iox/src/influxdb_ioxd/rpc/storage.rs similarity index 100% rename from src/influxdb_ioxd/rpc/storage.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/storage.rs diff --git a/src/influxdb_ioxd/rpc/storage/data.rs b/influxdb_iox/src/influxdb_ioxd/rpc/storage/data.rs similarity index 100% rename from src/influxdb_ioxd/rpc/storage/data.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/storage/data.rs diff --git a/src/influxdb_ioxd/rpc/storage/expr.rs b/influxdb_iox/src/influxdb_ioxd/rpc/storage/expr.rs similarity index 100% rename from src/influxdb_ioxd/rpc/storage/expr.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/storage/expr.rs diff --git a/src/influxdb_ioxd/rpc/storage/id.rs b/influxdb_iox/src/influxdb_ioxd/rpc/storage/id.rs similarity index 100% rename from src/influxdb_ioxd/rpc/storage/id.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/storage/id.rs diff --git a/src/influxdb_ioxd/rpc/storage/input.rs b/influxdb_iox/src/influxdb_ioxd/rpc/storage/input.rs similarity index 100% rename from src/influxdb_ioxd/rpc/storage/input.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/storage/input.rs diff --git a/src/influxdb_ioxd/rpc/storage/service.rs b/influxdb_iox/src/influxdb_ioxd/rpc/storage/service.rs similarity index 99% rename from src/influxdb_ioxd/rpc/storage/service.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/storage/service.rs index 0a445c7efa..68d606f02a 100644 --- a/src/influxdb_ioxd/rpc/storage/service.rs +++ b/influxdb_iox/src/influxdb_ioxd/rpc/storage/service.rs @@ -724,14 +724,14 @@ where let db = db_store.db(db_name).context(DatabaseNotFound { db_name })?; let ctx = db.new_query_context(span_ctx); - let builder = Planner::new(&ctx) + let plan = Planner::new(&ctx) .table_names(db, predicate) .await .map_err(|e| Box::new(e) as _) .context(ListingTables { db_name })?; let table_names = ctx - .to_table_names(builder) + .to_string_set(plan) .await .map_err(|e| Box::new(e) as _) .context(ListingTables { db_name })?; @@ -1116,11 +1116,11 @@ mod tests { let chunk0 = TestChunk::new("h2o") .with_id(0) - .with_predicate_match(PredicateMatch::AtLeastOne); + .with_predicate_match(PredicateMatch::AtLeastOneNonNullField); let chunk1 = TestChunk::new("o2") .with_id(1) - .with_predicate_match(PredicateMatch::AtLeastOne); + .with_predicate_match(PredicateMatch::AtLeastOneNonNullField); fixture .test_storage @@ -1474,7 +1474,8 @@ mod tests { tag_key: [0].into(), }; - let chunk = TestChunk::new("h2o").with_predicate_match(PredicateMatch::AtLeastOne); + let chunk = + TestChunk::new("h2o").with_predicate_match(PredicateMatch::AtLeastOneNonNullField); fixture .test_storage @@ -1724,7 +1725,8 @@ mod tests { // Note we don't include the actual line / column in the // expected panic message to avoid needing to update the test // whenever the source code file changed. - let expected_error = "panicked at 'This is a test panic', src/influxdb_ioxd/rpc/testing.rs"; + let expected_error = + "panicked at 'This is a test panic', influxdb_iox/src/influxdb_ioxd/rpc/testing.rs"; assert_contains!(captured_logs, expected_error); // Ensure that panics don't exhaust the tokio executor by diff --git a/src/influxdb_ioxd/rpc/testing.rs b/influxdb_iox/src/influxdb_ioxd/rpc/testing.rs similarity index 100% rename from src/influxdb_ioxd/rpc/testing.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/testing.rs diff --git a/src/influxdb_ioxd/rpc/write.rs b/influxdb_iox/src/influxdb_ioxd/rpc/write.rs similarity index 100% rename from src/influxdb_ioxd/rpc/write.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/write.rs diff --git a/src/influxdb_ioxd/rpc/write_pb.rs b/influxdb_iox/src/influxdb_ioxd/rpc/write_pb.rs similarity index 100% rename from src/influxdb_ioxd/rpc/write_pb.rs rename to influxdb_iox/src/influxdb_ioxd/rpc/write_pb.rs diff --git a/src/influxdb_ioxd/serving_readiness.rs b/influxdb_iox/src/influxdb_ioxd/serving_readiness.rs similarity index 100% rename from src/influxdb_ioxd/serving_readiness.rs rename to influxdb_iox/src/influxdb_ioxd/serving_readiness.rs diff --git a/src/main.rs b/influxdb_iox/src/main.rs similarity index 99% rename from src/main.rs rename to influxdb_iox/src/main.rs index 5fb99c8ff6..683f6e59a3 100644 --- a/src/main.rs +++ b/influxdb_iox/src/main.rs @@ -1,4 +1,5 @@ //! Entrypoint of InfluxDB IOx binary +#![recursion_limit = "512"] // required for print_cpu #![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] #![warn( missing_debug_implementations, @@ -31,8 +32,7 @@ mod commands { pub mod tracing; } -mod object_store; -mod server_id; +mod structopt_blocks; pub mod influxdb_ioxd; diff --git a/influxdb_iox/src/structopt_blocks/mod.rs b/influxdb_iox/src/structopt_blocks/mod.rs new file mode 100644 index 0000000000..151ab95c80 --- /dev/null +++ b/influxdb_iox/src/structopt_blocks/mod.rs @@ -0,0 +1,5 @@ +//! Building blocks for [`structopt`]-driven configs. +//! +//! They can easily be re-used using `#[structopt(flatten)]`. +pub mod object_store; +pub mod server_id; diff --git a/src/object_store.rs b/influxdb_iox/src/structopt_blocks/object_store.rs similarity index 100% rename from src/object_store.rs rename to influxdb_iox/src/structopt_blocks/object_store.rs diff --git a/src/server_id.rs b/influxdb_iox/src/structopt_blocks/server_id.rs similarity index 100% rename from src/server_id.rs rename to influxdb_iox/src/structopt_blocks/server_id.rs diff --git a/tests/common/mod.rs b/influxdb_iox/tests/common/mod.rs similarity index 100% rename from tests/common/mod.rs rename to influxdb_iox/tests/common/mod.rs diff --git a/tests/common/server_fixture.rs b/influxdb_iox/tests/common/server_fixture.rs similarity index 99% rename from tests/common/server_fixture.rs rename to influxdb_iox/tests/common/server_fixture.rs index bc713382ad..cc059f3ccb 100644 --- a/tests/common/server_fixture.rs +++ b/influxdb_iox/tests/common/server_fixture.rs @@ -292,7 +292,7 @@ struct TestServer { } // Options for creating test servers -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone)] pub struct TestConfig { /// Additional environment variables env: Vec<(String, String)>, diff --git a/tests/common/udp_listener.rs b/influxdb_iox/tests/common/udp_listener.rs similarity index 100% rename from tests/common/udp_listener.rs rename to influxdb_iox/tests/common/udp_listener.rs diff --git a/tests/end_to_end.rs b/influxdb_iox/tests/end_to_end.rs similarity index 100% rename from tests/end_to_end.rs rename to influxdb_iox/tests/end_to_end.rs diff --git a/tests/end_to_end_cases/debug_cli.rs b/influxdb_iox/tests/end_to_end_cases/debug_cli.rs similarity index 72% rename from tests/end_to_end_cases/debug_cli.rs rename to influxdb_iox/tests/end_to_end_cases/debug_cli.rs index 9f001a5153..8a7217ac8a 100644 --- a/tests/end_to_end_cases/debug_cli.rs +++ b/influxdb_iox/tests/end_to_end_cases/debug_cli.rs @@ -27,3 +27,16 @@ async fn test_dump_catalog() { predicate::str::contains("Transaction").and(predicate::str::contains("DecodeError")), ); } + +#[tokio::test] +async fn test_print_cpu() { + Command::cargo_bin("influxdb_iox") + .unwrap() + .arg("debug") + .arg("print-cpu") + .assert() + .success() + .stdout(predicate::str::contains( + "rustc is using the following target options", + )); +} diff --git a/tests/end_to_end_cases/deletes.rs b/influxdb_iox/tests/end_to_end_cases/deletes.rs similarity index 100% rename from tests/end_to_end_cases/deletes.rs rename to influxdb_iox/tests/end_to_end_cases/deletes.rs diff --git a/tests/end_to_end_cases/flight_api.rs b/influxdb_iox/tests/end_to_end_cases/flight_api.rs similarity index 100% rename from tests/end_to_end_cases/flight_api.rs rename to influxdb_iox/tests/end_to_end_cases/flight_api.rs diff --git a/tests/end_to_end_cases/freeze.rs b/influxdb_iox/tests/end_to_end_cases/freeze.rs similarity index 100% rename from tests/end_to_end_cases/freeze.rs rename to influxdb_iox/tests/end_to_end_cases/freeze.rs diff --git a/tests/end_to_end_cases/http.rs b/influxdb_iox/tests/end_to_end_cases/http.rs similarity index 100% rename from tests/end_to_end_cases/http.rs rename to influxdb_iox/tests/end_to_end_cases/http.rs diff --git a/tests/end_to_end_cases/influxdb_ioxd.rs b/influxdb_iox/tests/end_to_end_cases/influxdb_ioxd.rs similarity index 100% rename from tests/end_to_end_cases/influxdb_ioxd.rs rename to influxdb_iox/tests/end_to_end_cases/influxdb_ioxd.rs diff --git a/tests/end_to_end_cases/management_api.rs b/influxdb_iox/tests/end_to_end_cases/management_api.rs similarity index 98% rename from tests/end_to_end_cases/management_api.rs rename to influxdb_iox/tests/end_to_end_cases/management_api.rs index 177f2aaa95..6b75efba97 100644 --- a/tests/end_to_end_cases/management_api.rs +++ b/influxdb_iox/tests/end_to_end_cases/management_api.rs @@ -1121,8 +1121,8 @@ async fn test_get_server_status_global_error() { let server_fixture = ServerFixture::create_single_use().await; let mut client = server_fixture.management_client(); - // we need to "break" the object store AFTER the server was started, otherwise the server process will exit - // immediately + // we need to "break" the object store AFTER the server was started, otherwise the server + // process will exit immediately let metadata = server_fixture.dir().metadata().unwrap(); let mut permissions = metadata.permissions(); permissions.set_mode(0o000); @@ -1137,7 +1137,8 @@ async fn test_get_server_status_global_error() { loop { let status = client.get_server_status().await.unwrap(); if let Some(err) = status.error { - assert!(dbg!(err.message).starts_with("error listing databases in object storage:")); + assert!(dbg!(err.message) + .starts_with("error getting server config from object storage:")); assert!(status.database_statuses.is_empty()); return; } @@ -1208,6 +1209,33 @@ async fn test_get_server_status_db_error() { other_gen_path.push("rules.pb"); std::fs::write(other_gen_path, "foo").unwrap(); + // create the server config listing the ownership of these three databases + let mut path = server_fixture.dir().to_path_buf(); + path.push("42"); + path.push("config.pb"); + + let data = ServerConfig { + databases: vec![ + (String::from("my_db"), String::from("42/my_db")), + ( + String::from("soft_deleted"), + String::from("42/soft_deleted"), + ), + ( + String::from("multiple_active"), + String::from("42/multiple_active"), + ), + ] + .into_iter() + .collect(), + }; + + let mut encoded = bytes::BytesMut::new(); + generated_types::server_config::encode_persisted_server_config(&data, &mut encoded) + .expect("server config serialization should be valid"); + let encoded = encoded.freeze(); + std::fs::write(path, encoded).unwrap(); + // initialize client.update_server_id(42).await.expect("set ID failed"); server_fixture.wait_server_initialized().await; diff --git a/tests/end_to_end_cases/management_cli.rs b/influxdb_iox/tests/end_to_end_cases/management_cli.rs similarity index 99% rename from tests/end_to_end_cases/management_cli.rs rename to influxdb_iox/tests/end_to_end_cases/management_cli.rs index b318a58fa9..baeb39ac43 100644 --- a/tests/end_to_end_cases/management_cli.rs +++ b/influxdb_iox/tests/end_to_end_cases/management_cli.rs @@ -8,8 +8,8 @@ use generated_types::google::longrunning::IoxOperation; use generated_types::influxdata::iox::management::v1::{ operation_metadata::Job, WipePreservedCatalog, }; +use tempfile::TempDir; use test_helpers::make_temp_file; -use write_buffer::maybe_skip_kafka_integration; use crate::{ common::server_fixture::ServerFixture, @@ -910,9 +910,9 @@ async fn test_wipe_persisted_catalog_error_db_exists() { #[tokio::test] async fn test_skip_replay() { - let kafka_connection = maybe_skip_kafka_integration!(); + let write_buffer_dir = TempDir::new().unwrap(); let db_name = rand_name(); - let server_fixture = fixture_replay_broken(&db_name, &kafka_connection).await; + let server_fixture = fixture_replay_broken(&db_name, write_buffer_dir.path()).await; let addr = server_fixture.grpc_base(); Command::cargo_bin("influxdb_iox") diff --git a/tests/end_to_end_cases/metrics.rs b/influxdb_iox/tests/end_to_end_cases/metrics.rs similarity index 100% rename from tests/end_to_end_cases/metrics.rs rename to influxdb_iox/tests/end_to_end_cases/metrics.rs diff --git a/tests/end_to_end_cases/mod.rs b/influxdb_iox/tests/end_to_end_cases/mod.rs similarity index 100% rename from tests/end_to_end_cases/mod.rs rename to influxdb_iox/tests/end_to_end_cases/mod.rs diff --git a/tests/end_to_end_cases/operations_api.rs b/influxdb_iox/tests/end_to_end_cases/operations_api.rs similarity index 100% rename from tests/end_to_end_cases/operations_api.rs rename to influxdb_iox/tests/end_to_end_cases/operations_api.rs diff --git a/tests/end_to_end_cases/operations_cli.rs b/influxdb_iox/tests/end_to_end_cases/operations_cli.rs similarity index 100% rename from tests/end_to_end_cases/operations_cli.rs rename to influxdb_iox/tests/end_to_end_cases/operations_cli.rs diff --git a/tests/end_to_end_cases/persistence.rs b/influxdb_iox/tests/end_to_end_cases/persistence.rs similarity index 100% rename from tests/end_to_end_cases/persistence.rs rename to influxdb_iox/tests/end_to_end_cases/persistence.rs diff --git a/tests/end_to_end_cases/read_api.rs b/influxdb_iox/tests/end_to_end_cases/read_api.rs similarity index 100% rename from tests/end_to_end_cases/read_api.rs rename to influxdb_iox/tests/end_to_end_cases/read_api.rs diff --git a/tests/end_to_end_cases/read_cli.rs b/influxdb_iox/tests/end_to_end_cases/read_cli.rs similarity index 100% rename from tests/end_to_end_cases/read_cli.rs rename to influxdb_iox/tests/end_to_end_cases/read_cli.rs diff --git a/tests/end_to_end_cases/run_cli.rs b/influxdb_iox/tests/end_to_end_cases/run_cli.rs similarity index 100% rename from tests/end_to_end_cases/run_cli.rs rename to influxdb_iox/tests/end_to_end_cases/run_cli.rs diff --git a/tests/end_to_end_cases/scenario.rs b/influxdb_iox/tests/end_to_end_cases/scenario.rs similarity index 94% rename from tests/end_to_end_cases/scenario.rs rename to influxdb_iox/tests/end_to_end_cases/scenario.rs index 2b1f3cb9c5..f9d2048bad 100644 --- a/tests/end_to_end_cases/scenario.rs +++ b/influxdb_iox/tests/end_to_end_cases/scenario.rs @@ -1,6 +1,5 @@ -use std::convert::TryFrom; use std::iter::once; -use std::num::NonZeroU32; +use std::path::Path; use std::time::Duration; use std::{convert::TryInto, str, u32}; use std::{sync::Arc, time::SystemTime}; @@ -31,9 +30,9 @@ use generated_types::{ ReadSource, TimestampRange, }; use influxdb_iox_client::{connection::Connection, flight::PerformQuery}; +use time::SystemProvider; use write_buffer::core::WriteBufferWriting; -use write_buffer::kafka::test_utils::{kafka_sequencer_options, purge_kafka_topic}; -use write_buffer::kafka::KafkaBufferProducer; +use write_buffer::file::FileBufferProducer; use crate::common::server_fixture::{ServerFixture, TestConfig, DEFAULT_SERVER_ID}; @@ -655,7 +654,7 @@ pub async fn fixture_broken_catalog(db_name: &str) -> ServerFixture { } /// Creates a database that cannot be replayed -pub async fn fixture_replay_broken(db_name: &str, kafka_connection: &str) -> ServerFixture { +pub async fn fixture_replay_broken(db_name: &str, write_buffer_path: &Path) -> ServerFixture { let server_id = DEFAULT_SERVER_ID; let test_config = TestConfig::new().with_env("INFLUXDB_IOX_SKIP_REPLAY", "no"); @@ -680,11 +679,11 @@ pub async fn fixture_replay_broken(db_name: &str, kafka_connection: &str) -> Ser name: db_name.to_string(), write_buffer_connection: Some(WriteBufferConnection { direction: write_buffer_connection::Direction::Read.into(), - r#type: "kafka".to_string(), - connection: kafka_connection.to_string(), + r#type: "file".to_string(), + connection: write_buffer_path.display().to_string(), creation_config: Some(WriteBufferCreationConfig { n_sequencers: 1, - options: kafka_sequencer_options(), + ..Default::default() }), ..Default::default() }), @@ -708,45 +707,42 @@ pub async fn fixture_replay_broken(db_name: &str, kafka_connection: &str) -> Ser .unwrap(); // ingest data as mixed throughput - let creation_config = Some(data_types::database_rules::WriteBufferCreationConfig { - n_sequencers: NonZeroU32::try_from(1).unwrap(), - options: kafka_sequencer_options(), - }); - let producer = KafkaBufferProducer::new( - kafka_connection, + let time_provider = Arc::new(SystemProvider::new()); + let producer = FileBufferProducer::new( + write_buffer_path, db_name, - &Default::default(), - creation_config.as_ref(), - Arc::new(time::SystemProvider::new()), + Default::default(), + time_provider, ) .await .unwrap(); - producer + let sequencer_id = producer.sequencer_ids().into_iter().next().unwrap(); + let (sequence_1, _) = producer .store_entry( &lp_to_entries("table_1,partition_by=a foo=1 10", &partition_template) .pop() .unwrap(), - 0, + sequencer_id, None, ) .await .unwrap(); - producer + let (sequence_2, _) = producer .store_entry( &lp_to_entries("table_1,partition_by=b foo=2 20", &partition_template) .pop() .unwrap(), - 0, + sequencer_id, None, ) .await .unwrap(); - producer + let (sequence_3, _) = producer .store_entry( &lp_to_entries("table_1,partition_by=b foo=3 30", &partition_template) .pop() .unwrap(), - 0, + sequencer_id, None, ) .await @@ -766,8 +762,7 @@ pub async fn fixture_replay_broken(db_name: &str, kafka_connection: &str) -> Ser ) .await; - // purge data from Kafka - purge_kafka_topic(kafka_connection, db_name).await; + // add new entry to the end producer .store_entry( &lp_to_entries("table_1,partition_by=c foo=4 40", &partition_template) @@ -779,6 +774,29 @@ pub async fn fixture_replay_broken(db_name: &str, kafka_connection: &str) -> Ser .await .unwrap(); + // purge data from write buffer + write_buffer::file::test_utils::remove_entry( + write_buffer_path, + db_name, + sequencer_id, + sequence_1.number, + ) + .await; + write_buffer::file::test_utils::remove_entry( + write_buffer_path, + db_name, + sequencer_id, + sequence_2.number, + ) + .await; + write_buffer::file::test_utils::remove_entry( + write_buffer_path, + db_name, + sequencer_id, + sequence_3.number, + ) + .await; + // Try to replay and error let fixture = fixture.restart_server().await; diff --git a/tests/end_to_end_cases/sql_cli.rs b/influxdb_iox/tests/end_to_end_cases/sql_cli.rs similarity index 100% rename from tests/end_to_end_cases/sql_cli.rs rename to influxdb_iox/tests/end_to_end_cases/sql_cli.rs diff --git a/tests/end_to_end_cases/storage_api.rs b/influxdb_iox/tests/end_to_end_cases/storage_api.rs similarity index 100% rename from tests/end_to_end_cases/storage_api.rs rename to influxdb_iox/tests/end_to_end_cases/storage_api.rs diff --git a/tests/end_to_end_cases/system_tables.rs b/influxdb_iox/tests/end_to_end_cases/system_tables.rs similarity index 100% rename from tests/end_to_end_cases/system_tables.rs rename to influxdb_iox/tests/end_to_end_cases/system_tables.rs diff --git a/tests/end_to_end_cases/tracing.rs b/influxdb_iox/tests/end_to_end_cases/tracing.rs similarity index 100% rename from tests/end_to_end_cases/tracing.rs rename to influxdb_iox/tests/end_to_end_cases/tracing.rs diff --git a/tests/end_to_end_cases/write_api.rs b/influxdb_iox/tests/end_to_end_cases/write_api.rs similarity index 100% rename from tests/end_to_end_cases/write_api.rs rename to influxdb_iox/tests/end_to_end_cases/write_api.rs diff --git a/tests/end_to_end_cases/write_buffer.rs b/influxdb_iox/tests/end_to_end_cases/write_buffer.rs similarity index 51% rename from tests/end_to_end_cases/write_buffer.rs rename to influxdb_iox/tests/end_to_end_cases/write_buffer.rs index 16dd15ea70..db698f0e90 100644 --- a/tests/end_to_end_cases/write_buffer.rs +++ b/influxdb_iox/tests/end_to_end_cases/write_buffer.rs @@ -1,9 +1,13 @@ use crate::{ - common::server_fixture::ServerFixture, + common::{ + server_fixture::{ServerFixture, TestConfig}, + udp_listener::UdpCapture, + }, end_to_end_cases::scenario::{rand_name, DatabaseBuilder}, }; use arrow_util::assert_batches_sorted_eq; -use entry::{test_helpers::lp_to_entry, Entry}; +use entry::test_helpers::lp_to_entry; +use futures::StreamExt; use generated_types::influxdata::iox::management::v1::{ write_buffer_connection::Direction as WriteBufferDirection, WriteBufferConnection, }; @@ -11,35 +15,35 @@ use influxdb_iox_client::{ management::{generated_types::WriteBufferCreationConfig, CreateDatabaseError}, write::WriteError, }; -use rdkafka::{ - consumer::{Consumer, StreamConsumer}, - producer::{FutureProducer, FutureRecord}, - ClientConfig, Message, Offset, TopicPartitionList, -}; -use std::convert::TryFrom; +use std::sync::Arc; +use tempfile::TempDir; use test_helpers::assert_contains; -use write_buffer::{kafka::test_utils::kafka_sequencer_options, maybe_skip_kafka_integration}; +use time::SystemProvider; +use write_buffer::{ + core::{WriteBufferReading, WriteBufferWriting}, + file::{FileBufferConsumer, FileBufferProducer}, +}; #[tokio::test] -async fn writes_go_to_kafka() { - let kafka_connection = maybe_skip_kafka_integration!(); +async fn writes_go_to_write_buffer() { + let write_buffer_dir = TempDir::new().unwrap(); - // set up a database with a write buffer pointing at kafka + // set up a database with a write buffer pointing at write buffer let server = ServerFixture::create_shared().await; let db_name = rand_name(); let write_buffer_connection = WriteBufferConnection { direction: WriteBufferDirection::Write.into(), - r#type: "kafka".to_string(), - connection: kafka_connection.to_string(), + r#type: "file".to_string(), + connection: write_buffer_dir.path().display().to_string(), creation_config: Some(WriteBufferCreationConfig { n_sequencers: 1, - options: kafka_sequencer_options(), + ..Default::default() }), ..Default::default() }; DatabaseBuilder::new(db_name.clone()) - .write_buffer(write_buffer_connection) + .write_buffer(write_buffer_connection.clone()) .build(server.grpc_channel()) .await; @@ -58,43 +62,32 @@ async fn writes_go_to_kafka() { .expect("cannot write"); assert_eq!(num_lines_written, 3); - // check the data is in kafka - let mut cfg = ClientConfig::new(); - cfg.set("bootstrap.servers", kafka_connection); - cfg.set("session.timeout.ms", "6000"); - cfg.set("enable.auto.commit", "false"); - cfg.set("group.id", "placeholder"); - - let consumer: StreamConsumer = cfg.create().unwrap(); - let mut topics = TopicPartitionList::new(); - topics.add_partition(&db_name, 0); - topics - .set_partition_offset(&db_name, 0, Offset::Beginning) - .unwrap(); - consumer.assign(&topics).unwrap(); - - let message = consumer.recv().await.unwrap(); - assert_eq!(message.topic(), db_name); - - let entry = Entry::try_from(message.payload().unwrap().to_vec()).unwrap(); + // check the data is in write buffer + let mut consumer = + FileBufferConsumer::new(write_buffer_dir.path(), &db_name, Default::default(), None) + .await + .unwrap(); + let (_, mut stream) = consumer.streams().into_iter().next().unwrap(); + let sequenced_entry = stream.stream.next().await.unwrap().unwrap(); + let entry = sequenced_entry.entry(); let partition_writes = entry.partition_writes().unwrap(); assert_eq!(partition_writes.len(), 2); } #[tokio::test] -async fn writes_go_to_kafka_whitelist() { - let kafka_connection = maybe_skip_kafka_integration!(); +async fn writes_go_to_write_buffer_whitelist() { + let write_buffer_dir = TempDir::new().unwrap(); - // set up a database with a write buffer pointing at kafka + // set up a database with a write buffer pointing at write buffer let server = ServerFixture::create_shared().await; let db_name = rand_name(); let write_buffer_connection = WriteBufferConnection { direction: WriteBufferDirection::Write.into(), - r#type: "kafka".to_string(), - connection: kafka_connection.to_string(), + r#type: "file".to_string(), + connection: write_buffer_dir.path().display().to_string(), creation_config: Some(WriteBufferCreationConfig { n_sequencers: 1, - options: kafka_sequencer_options(), + ..Default::default() }), ..Default::default() }; @@ -121,99 +114,74 @@ async fn writes_go_to_kafka_whitelist() { .expect("cannot write"); assert_eq!(num_lines_written, 4); - // check the data is in kafka - let mut cfg = ClientConfig::new(); - cfg.set("bootstrap.servers", kafka_connection); - cfg.set("session.timeout.ms", "6000"); - cfg.set("enable.auto.commit", "false"); - cfg.set("group.id", "placeholder"); - - let consumer: StreamConsumer = cfg.create().unwrap(); - let mut topics = TopicPartitionList::new(); - topics.add_partition(&db_name, 0); - topics - .set_partition_offset(&db_name, 0, Offset::Beginning) - .unwrap(); - consumer.assign(&topics).unwrap(); - - let message = consumer.recv().await.unwrap(); - assert_eq!(message.topic(), db_name); - - let entry = Entry::try_from(message.payload().unwrap().to_vec()).unwrap(); + // check the data is in write buffer + let mut consumer = + FileBufferConsumer::new(write_buffer_dir.path(), &db_name, Default::default(), None) + .await + .unwrap(); + let (_, mut stream) = consumer.streams().into_iter().next().unwrap(); + let sequenced_entry = stream.stream.next().await.unwrap().unwrap(); + let entry = sequenced_entry.entry(); let partition_writes = entry.partition_writes().unwrap(); assert_eq!(partition_writes.len(), 1); } -async fn produce_to_kafka_directly( - producer: &FutureProducer, - lp: &str, - topic: &str, - partition: Option, -) { - let entry = lp_to_entry(lp); - let mut record: FutureRecord<'_, String, _> = FutureRecord::to(topic).payload(entry.data()); - - if let Some(pid) = partition { - record = record.partition(pid); - } - - producer - .send_result(record) - .unwrap() - .await - .unwrap() - .unwrap(); -} - #[tokio::test] -async fn reads_come_from_kafka() { - let kafka_connection = maybe_skip_kafka_integration!(); +async fn reads_come_from_write_buffer() { + let write_buffer_dir = TempDir::new().unwrap(); - // set up a database to read from Kafka + // set up a database to read from write buffer let server = ServerFixture::create_shared().await; let db_name = rand_name(); let write_buffer_connection = WriteBufferConnection { direction: WriteBufferDirection::Read.into(), - r#type: "kafka".to_string(), - connection: kafka_connection.to_string(), + r#type: "file".to_string(), + connection: write_buffer_dir.path().display().to_string(), creation_config: Some(WriteBufferCreationConfig { n_sequencers: 2, - options: kafka_sequencer_options(), + ..Default::default() }), ..Default::default() }; - // Common Kafka config - let mut cfg = ClientConfig::new(); - cfg.set("bootstrap.servers", &kafka_connection); - cfg.set("message.timeout.ms", "5000"); - DatabaseBuilder::new(db_name.clone()) .write_buffer(write_buffer_connection) .build(server.grpc_channel()) .await; - // put some points in Kafka - let producer: FutureProducer = cfg.create().unwrap(); + // put some points in write buffer + let time_provider = Arc::new(SystemProvider::new()); + let producer = FileBufferProducer::new( + write_buffer_dir.path(), + &db_name, + Default::default(), + time_provider, + ) + .await + .unwrap(); + let mut sequencer_ids = producer.sequencer_ids().into_iter(); + let sequencer_id_1 = sequencer_ids.next().unwrap(); + let sequencer_id_2 = sequencer_ids.next().unwrap(); - // Kafka partitions must be configured based on the primary key because ordering across Kafka - // partitions is undefined, so the upsert semantics would be undefined. Entries that can - // potentially be merged must end up in the same Kafka partition. This test follows that - // constraint, but doesn't actually encode it. - - // Put some data for `upc,region=west` in partition 0 + // Put some data for `upc,region=west` in sequencer 1 let lp_lines = [ "upc,region=west user=23.2 100", "upc,region=west user=21.0 150", ]; - produce_to_kafka_directly(&producer, &lp_lines.join("\n"), &db_name, Some(0)).await; + producer + .store_entry(&lp_to_entry(&lp_lines.join("\n")), sequencer_id_1, None) + .await + .unwrap(); - // Put some data for `upc,region=east` in partition 1 + // Put some data for `upc,region=east` in sequencer 2 let lp_lines = [ "upc,region=east user=76.2 300", "upc,region=east user=88.7 350", ]; - produce_to_kafka_directly(&producer, &lp_lines.join("\n"), &db_name, Some(1)).await; + producer + .store_entry(&lp_to_entry(&lp_lines.join("\n")), sequencer_id_2, None) + .await + .unwrap(); let check = async { let mut interval = tokio::time::interval(std::time::Duration::from_millis(500)); @@ -261,19 +229,19 @@ async fn reads_come_from_kafka() { } #[tokio::test] -async fn cant_write_to_db_reading_from_kafka() { - let kafka_connection = maybe_skip_kafka_integration!(); +async fn cant_write_to_db_reading_from_write_buffer() { + let write_buffer_dir = TempDir::new().unwrap(); - // set up a database to read from Kafka + // set up a database to read from write buffer let server = ServerFixture::create_shared().await; let db_name = rand_name(); let write_buffer_connection = WriteBufferConnection { direction: WriteBufferDirection::Read.into(), - r#type: "kafka".to_string(), - connection: kafka_connection.to_string(), + r#type: "file".to_string(), + connection: write_buffer_dir.path().display().to_string(), creation_config: Some(WriteBufferCreationConfig { n_sequencers: 1, - options: kafka_sequencer_options(), + ..Default::default() }), ..Default::default() }; @@ -283,7 +251,7 @@ async fn cant_write_to_db_reading_from_kafka() { .build(server.grpc_channel()) .await; - // Writing to this database is an error; all data comes from Kafka + // Writing to this database is an error; all data comes from write buffer let mut write_client = server.write_client(); let err = write_client .write(&db_name, "temp,region=south color=1") @@ -302,15 +270,15 @@ async fn cant_write_to_db_reading_from_kafka() { #[tokio::test] async fn test_create_database_missing_write_buffer_sequencers() { - let kafka_connection = maybe_skip_kafka_integration!(); + let write_buffer_dir = TempDir::new().unwrap(); - // set up a database to read from Kafka + // set up a database to read from write buffer let server = ServerFixture::create_shared().await; let db_name = rand_name(); let write_buffer_connection = WriteBufferConnection { direction: WriteBufferDirection::Read.into(), - r#type: "kafka".to_string(), - connection: kafka_connection.to_string(), + r#type: "file".to_string(), + connection: write_buffer_dir.path().display().to_string(), ..Default::default() }; @@ -325,3 +293,89 @@ async fn test_create_database_missing_write_buffer_sequencers() { &err ); } + +#[tokio::test] +pub async fn test_cross_write_buffer_tracing() { + let write_buffer_dir = TempDir::new().unwrap(); + + // setup tracing + let udp_capture = UdpCapture::new().await; + let test_config = TestConfig::new() + .with_env("TRACES_EXPORTER", "jaeger") + .with_env("TRACES_EXPORTER_JAEGER_AGENT_HOST", udp_capture.ip()) + .with_env("TRACES_EXPORTER_JAEGER_AGENT_PORT", udp_capture.port()) + .with_client_header("jaeger-debug-id", "some-debug-id"); + + // we need to use two servers but the same DB name here because the write buffer topic is named after the DB name + let db_name = rand_name(); + + // create producer server + let server_write = ServerFixture::create_single_use_with_config(test_config.clone()).await; + server_write + .management_client() + .update_server_id(1) + .await + .unwrap(); + server_write.wait_server_initialized().await; + let conn_write = WriteBufferConnection { + direction: WriteBufferDirection::Write.into(), + r#type: "file".to_string(), + connection: write_buffer_dir.path().display().to_string(), + creation_config: Some(WriteBufferCreationConfig { + n_sequencers: 1, + ..Default::default() + }), + ..Default::default() + }; + DatabaseBuilder::new(db_name.clone()) + .write_buffer(conn_write.clone()) + .build(server_write.grpc_channel()) + .await; + + // create consumer DB + let server_read = ServerFixture::create_single_use_with_config(test_config).await; + server_read + .management_client() + .update_server_id(2) + .await + .unwrap(); + server_read.wait_server_initialized().await; + let conn_read = WriteBufferConnection { + direction: WriteBufferDirection::Read.into(), + ..conn_write + }; + DatabaseBuilder::new(db_name.clone()) + .write_buffer(conn_read) + .build(server_read.grpc_channel()) + .await; + + // write some points + let mut write_client = server_write.write_client(); + let lp_lines = [ + "cpu,region=west user=23.2 100", + "cpu,region=west user=21.0 150", + "disk,region=east bytes=99i 200", + ]; + let num_lines_written = write_client + .write(&db_name, lp_lines.join("\n")) + .await + .expect("cannot write"); + assert_eq!(num_lines_written, 3); + + // "shallow" packet inspection and verify the UDP server got + // something that had some expected results (maybe we could + // eventually verify the payload here too) + udp_capture + .wait_for(|m| m.to_string().contains("IOx write buffer")) + .await; + udp_capture + .wait_for(|m| m.to_string().contains("stored entry")) + .await; + + // // debugging assistance + // tokio::time::sleep(std::time::Duration::from_secs(10)).await; + // println!("Traces received (1):\n\n{:#?}", udp_capture.messages()); + + // wait for the UDP server to shutdown + udp_capture.stop().await +} diff --git a/tests/end_to_end_cases/write_cli.rs b/influxdb_iox/tests/end_to_end_cases/write_cli.rs similarity index 100% rename from tests/end_to_end_cases/write_cli.rs rename to influxdb_iox/tests/end_to_end_cases/write_cli.rs diff --git a/tests/end_to_end_cases/write_pb.rs b/influxdb_iox/tests/end_to_end_cases/write_pb.rs similarity index 100% rename from tests/end_to_end_cases/write_pb.rs rename to influxdb_iox/tests/end_to_end_cases/write_pb.rs diff --git a/influxdb_iox_client/Cargo.toml b/influxdb_iox_client/Cargo.toml index e43f0cbe54..e4a5d4dd18 100644 --- a/influxdb_iox_client/Cargo.toml +++ b/influxdb_iox_client/Cargo.toml @@ -2,11 +2,11 @@ name = "influxdb_iox_client" version = "0.1.0" authors = ["Dom Dwyer "] -edition = "2018" +edition = "2021" [features] flight = ["arrow", "arrow-flight", "arrow_util", "serde/derive", "serde_json", "futures-util"] -format = ["arrow"] +format = ["arrow", "arrow_util"] [dependencies] # Workspace dependencies, in alphabetical order diff --git a/influxdb_line_protocol/Cargo.toml b/influxdb_line_protocol/Cargo.toml index b2a194c86b..096cd3f997 100644 --- a/influxdb_line_protocol/Cargo.toml +++ b/influxdb_line_protocol/Cargo.toml @@ -2,7 +2,7 @@ name = "influxdb_line_protocol" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order nom = "7" @@ -11,4 +11,4 @@ snafu = "0.6.2" observability_deps = { path = "../observability_deps" } [dev-dependencies] # In alphabetical order -test_helpers = { path = "../test_helpers" } \ No newline at end of file +test_helpers = { path = "../test_helpers" } diff --git a/influxdb_storage_client/Cargo.toml b/influxdb_storage_client/Cargo.toml index 77e949c8b8..ad0742e840 100644 --- a/influxdb_storage_client/Cargo.toml +++ b/influxdb_storage_client/Cargo.toml @@ -2,7 +2,7 @@ name = "influxdb_storage_client" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" [dependencies] client_util = { path = "../client_util" } @@ -11,4 +11,4 @@ prost = "0.8" tonic = { version = "0.5.0" } futures-util = { version = "0.3.1" } -[dev-dependencies] \ No newline at end of file +[dev-dependencies] diff --git a/influxdb_tsm/Cargo.toml b/influxdb_tsm/Cargo.toml index 200cb8a50f..79e64c3676 100644 --- a/influxdb_tsm/Cargo.toml +++ b/influxdb_tsm/Cargo.toml @@ -2,7 +2,7 @@ name = "influxdb_tsm" version = "0.1.0" authors = ["Edd Robinson "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order integer-encoding = "3.0.2" diff --git a/influxdb_tsm/src/mapper.rs b/influxdb_tsm/src/mapper.rs index 19195c4e47..cf30aeb89e 100644 --- a/influxdb_tsm/src/mapper.rs +++ b/influxdb_tsm/src/mapper.rs @@ -655,7 +655,7 @@ mod tests { #[test] fn map_tsm_index() { - let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../test_fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = GzDecoder::new(file.unwrap()); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -671,7 +671,7 @@ mod tests { #[test] fn map_field_columns_file() { - let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../test_fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = GzDecoder::new(file.unwrap()); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -714,7 +714,7 @@ mod tests { #[test] fn measurement_table_columns() { - let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../test_fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = GzDecoder::new(file.unwrap()); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); diff --git a/influxdb_tsm/src/reader.rs b/influxdb_tsm/src/reader.rs index dd3366c0ce..eccd02b6ca 100644 --- a/influxdb_tsm/src/reader.rs +++ b/influxdb_tsm/src/reader.rs @@ -19,7 +19,7 @@ use std::u64; /// # use std::io::BufReader; /// # use std::io::Cursor; /// # use std::io::Read; -/// # let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); +/// # let file = File::open("../test_fixtures/000000000000005-000000002.tsm.gz"); /// # let mut decoder = GzDecoder::new(file.unwrap()); /// # let mut buf = Vec::new(); /// # decoder.read_to_end(&mut buf).unwrap(); @@ -687,7 +687,7 @@ mod tests { #[test] fn read_tsm_index() { - let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../test_fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = GzDecoder::new(file.unwrap()); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -700,7 +700,7 @@ mod tests { #[test] fn read_tsm_block() { - let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../test_fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = GzDecoder::new(file.unwrap()); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -754,7 +754,7 @@ mod tests { #[test] fn decode_tsm_blocks() { - let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../test_fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = GzDecoder::new(file.unwrap()); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -833,12 +833,12 @@ mod tests { #[test] fn check_tsm_cpu_usage() { - walk_index_and_check_for_errors("../tests/fixtures/cpu_usage.tsm.gz"); + walk_index_and_check_for_errors("../test_fixtures/cpu_usage.tsm.gz"); } #[test] fn check_tsm_000000000000005_000000002() { - walk_index_and_check_for_errors("../tests/fixtures/000000000000005-000000002.tsm.gz"); + walk_index_and_check_for_errors("../test_fixtures/000000000000005-000000002.tsm.gz"); } #[test] diff --git a/internal_types/Cargo.toml b/internal_types/Cargo.toml index 9d44a6c55e..a4a010e16a 100644 --- a/internal_types/Cargo.toml +++ b/internal_types/Cargo.toml @@ -2,7 +2,7 @@ name = "internal_types" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "InfluxDB IOx internal types, shared between IOx instances" readme = "README.md" diff --git a/iox_data_generator/Cargo.toml b/iox_data_generator/Cargo.toml index 393ac1a3b1..ba0d13e1cd 100644 --- a/iox_data_generator/Cargo.toml +++ b/iox_data_generator/Cargo.toml @@ -2,7 +2,7 @@ name = "iox_data_generator" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" default-run = "iox_data_generator" [dependencies] @@ -26,7 +26,7 @@ snafu = "0.6.8" tokio = { version = "1.11", features = ["macros", "rt-multi-thread"] } toml = "0.5.6" tracing = "0.1" -tracing-subscriber = "0.2.25" +tracing-subscriber = "0.3.0" uuid = { version = "0.8.1", default_features = false } [dev-dependencies] diff --git a/iox_object_store/Cargo.toml b/iox_object_store/Cargo.toml index d98b82d208..de6a42e660 100644 --- a/iox_object_store/Cargo.toml +++ b/iox_object_store/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "iox_object_store" version = "0.1.0" -edition = "2018" +edition = "2021" description = "IOx-specific semantics wrapping the general-purpose object store crate" [dependencies] diff --git a/iox_object_store/src/lib.rs b/iox_object_store/src/lib.rs index aff1ed9bdc..c8d40dd896 100644 --- a/iox_object_store/src/lib.rs +++ b/iox_object_store/src/lib.rs @@ -114,8 +114,6 @@ impl Generation { impl IoxObjectStore { /// Get the data for the server config to determine the names and locations of the databases /// that this server owns. - // TODO: this is in the process of replacing list_possible_databases for the floating databases - // design pub async fn get_server_config_file(inner: &ObjectStore, server_id: ServerId) -> Result { let path = paths::server_config_path(inner, server_id); let mut stream = inner.get(&path).await?; @@ -155,35 +153,6 @@ impl IoxObjectStore { RootPath::new(inner, server_id, database_name) } - /// List database names and their locations in object storage that need to be further checked - /// for generations and whether they're marked as deleted or not. - // TODO: this is in the process of being deprecated in favor of get_server_config_file - pub async fn list_possible_databases( - inner: &ObjectStore, - server_id: ServerId, - ) -> Result, String)>> { - let path = paths::all_databases_path(inner, server_id); - - let list_result = inner.list_with_delimiter(&path).await?; - - Ok(list_result - .common_prefixes - .into_iter() - .filter_map(|prefix| { - let prefix_parsed: DirsAndFileName = prefix.clone().into(); - let last = prefix_parsed - .directories - .last() - .expect("path can't be empty"); - let db_name = DatabaseName::new(last.encoded().to_string()) - .log_if_error("invalid database directory") - .ok()?; - - Some((db_name, prefix.to_raw())) - }) - .collect()) - } - /// List this server's databases in object storage along with their generation IDs. pub async fn list_detailed_databases( inner: &ObjectStore, @@ -208,8 +177,7 @@ impl IoxObjectStore { /// List database names in object storage along with all existing generations for each database /// and whether the generations are marked as deleted or not. Useful for finding candidates - /// to restore or to permanently delete. Makes many more calls to object storage than - /// [`IoxObjectStore::list_possible_databases`]. + /// to restore or to permanently delete. Makes many calls to object storage. async fn list_all_databases( inner: &ObjectStore, server_id: ServerId, @@ -1117,50 +1085,6 @@ mod tests { iox_object_store.write_tombstone().await.unwrap(); } - #[tokio::test] - async fn list_possible_databases_returns_all_potential_databases() { - let object_store = make_object_store(); - let server_id = make_server_id(); - - // Create a normal database, will be in the list - let db_normal = DatabaseName::new("db_normal").unwrap(); - create_database(Arc::clone(&object_store), server_id, &db_normal).await; - - // Create a database, then delete it - will still be in the list - let db_deleted = DatabaseName::new("db_deleted").unwrap(); - let db_deleted_iox_store = - create_database(Arc::clone(&object_store), server_id, &db_deleted).await; - delete_database(&db_deleted_iox_store).await; - - // Put a file in a directory that looks like a database directory but has no rules, - // will still be in the list - let not_a_db = DatabaseName::new("not_a_db").unwrap(); - let mut not_rules_path = object_store.new_path(); - not_rules_path.push_all_dirs(&[&server_id.to_string(), not_a_db.as_str(), "0"]); - not_rules_path.set_file_name("not_rules.txt"); - object_store - .put(¬_rules_path, Bytes::new()) - .await - .unwrap(); - - // Put a file in a directory that's an invalid database name - this WON'T be in the list - let invalid_db_name = ("a".repeat(65)).to_string(); - let mut invalid_db_name_rules_path = object_store.new_path(); - invalid_db_name_rules_path.push_all_dirs(&[&server_id.to_string(), &invalid_db_name, "0"]); - invalid_db_name_rules_path.set_file_name("rules.pb"); - object_store - .put(&invalid_db_name_rules_path, Bytes::new()) - .await - .unwrap(); - - let possible = IoxObjectStore::list_possible_databases(&object_store, server_id) - .await - .unwrap(); - let mut names: Vec<_> = possible.into_iter().map(|d| d.0).collect(); - names.sort(); - assert_eq!(names, vec![db_deleted, db_normal, not_a_db]); - } - #[tokio::test] async fn list_all_databases_returns_generation_info() { let object_store = make_object_store(); diff --git a/lifecycle/Cargo.toml b/lifecycle/Cargo.toml index 8c827855ef..0f61c025e7 100644 --- a/lifecycle/Cargo.toml +++ b/lifecycle/Cargo.toml @@ -2,7 +2,7 @@ name = "lifecycle" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" description = "Implements the IOx data lifecycle" [dependencies] diff --git a/logfmt/Cargo.toml b/logfmt/Cargo.toml index 7118e276ce..3c50fcfb47 100644 --- a/logfmt/Cargo.toml +++ b/logfmt/Cargo.toml @@ -3,13 +3,14 @@ name = "logfmt" version = "0.1.0" authors = ["Andrew Lamb "] description = "tracing_subscriber layer for writing out logfmt formatted events" -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order observability_deps = { path = "../observability_deps" } -tracing-subscriber = "0.2" +tracing-subscriber = "0.3" [dev-dependencies] # In alphabetical order once_cell = { version = "1.4.0", features = ["parking_lot"] } parking_lot = "0.11.2" regex = "1.4.3" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/logfmt/src/lib.rs b/logfmt/src/lib.rs index ee95907761..4b1de785f6 100644 --- a/logfmt/src/lib.rs +++ b/logfmt/src/lib.rs @@ -19,12 +19,18 @@ use tracing_subscriber::{fmt::MakeWriter, layer::Context, registry::LookupSpan, /// looked very small and did not (obviously) work with the tracing subscriber /// /// [logfmt]: https://brandur.org/logfmt -pub struct LogFmtLayer { +pub struct LogFmtLayer +where + W: for<'writer> MakeWriter<'writer>, +{ writer: W, display_target: bool, } -impl LogFmtLayer { +impl LogFmtLayer +where + W: for<'writer> MakeWriter<'writer>, +{ /// Create a new logfmt Layer to pass into tracing_subscriber /// /// Note this layer simply formats and writes to the specified writer. It @@ -68,7 +74,7 @@ impl LogFmtLayer { impl Layer for LogFmtLayer where - W: MakeWriter + 'static, + W: for<'writer> MakeWriter<'writer> + 'static, S: Subscriber + for<'a> LookupSpan<'a>, { fn register_callsite( @@ -78,7 +84,7 @@ where Interest::always() } - fn new_span(&self, attrs: &tracing::span::Attributes<'_>, id: &Id, ctx: Context<'_, S>) { + fn on_new_span(&self, attrs: &tracing::span::Attributes<'_>, id: &Id, ctx: Context<'_, S>) { let writer = self.writer.make_writer(); let metadata = ctx.metadata(id).expect("span should have metadata"); let mut p = FieldPrinter::new(writer, metadata.level(), self.display_target); diff --git a/logfmt/tests/logging.rs b/logfmt/tests/logging.rs index d4f6d04d9c..44d556e384 100644 --- a/logfmt/tests/logging.rs +++ b/logfmt/tests/logging.rs @@ -363,7 +363,7 @@ impl std::io::Write for CapturedWriter { } } -impl MakeWriter for CapturedWriter { +impl MakeWriter<'_> for CapturedWriter { type Writer = Self; fn make_writer(&self) -> Self::Writer { diff --git a/metric/Cargo.toml b/metric/Cargo.toml index 787d25d2f7..f0e24ca25e 100644 --- a/metric/Cargo.toml +++ b/metric/Cargo.toml @@ -2,7 +2,7 @@ name = "metric" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order diff --git a/metric_exporters/Cargo.toml b/metric_exporters/Cargo.toml index 7f9020fe2b..8af1f1cade 100644 --- a/metric_exporters/Cargo.toml +++ b/metric_exporters/Cargo.toml @@ -2,7 +2,7 @@ name = "metric_exporters" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order diff --git a/mutable_batch/Cargo.toml b/mutable_batch/Cargo.toml index ee412b5557..a050d9711d 100644 --- a/mutable_batch/Cargo.toml +++ b/mutable_batch/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mutable_batch" version = "0.1.0" -edition = "2018" +edition = "2021" description = "A mutable arrow RecordBatch" [dependencies] diff --git a/mutable_batch/src/column.rs b/mutable_batch/src/column.rs index b881fa36c1..842818026a 100644 --- a/mutable_batch/src/column.rs +++ b/mutable_batch/src/column.rs @@ -1,10 +1,9 @@ //! A [`Column`] stores the rows for a given column name use std::fmt::Formatter; -use std::iter::Enumerate; +use std::iter::{Enumerate, Zip}; use std::mem; use std::sync::Arc; -use std::{convert::TryInto, iter::Zip}; use arrow::error::ArrowError; use arrow::{ diff --git a/mutable_batch/src/lib.rs b/mutable_batch/src/lib.rs index fc8d945215..c70a2cde09 100644 --- a/mutable_batch/src/lib.rs +++ b/mutable_batch/src/lib.rs @@ -175,6 +175,20 @@ impl MutableBatch { Ok(()) } + /// Extend this [`MutableBatch`] with `ranges` rows from `other` + pub fn extend_from_ranges( + &mut self, + other: &Self, + ranges: &[Range], + ) -> writer::Result<()> { + let to_insert = ranges.iter().map(|x| x.end - x.start).sum(); + + let mut writer = writer::Writer::new(self, to_insert); + writer.write_batch_ranges(other, ranges)?; + writer.commit(); + Ok(()) + } + /// Returns a reference to the specified column pub(crate) fn column(&self, column: &str) -> Result<&Column> { let idx = self diff --git a/mutable_batch/src/writer.rs b/mutable_batch/src/writer.rs index 9ed436080e..a330f1bf98 100644 --- a/mutable_batch/src/writer.rs +++ b/mutable_batch/src/writer.rs @@ -499,86 +499,105 @@ impl<'a> Writer<'a> { src: &MutableBatch, range: Range, ) -> Result<()> { - if range.start == 0 && range.end == src.row_count { + self.write_batch_ranges(src, &[range]) + } + + /// Write the rows identified by `ranges` to the provided MutableBatch + pub(crate) fn write_batch_ranges( + &mut self, + src: &MutableBatch, + ranges: &[Range], + ) -> Result<()> { + let to_insert = self.to_insert; + + if to_insert == src.row_count { return self.write_batch(src); } - assert_eq!(range.end - range.start, self.to_insert); for (src_col_name, src_col_idx) in &src.column_names { let src_col = &src.columns[*src_col_idx]; let (dst_col_idx, dst_col) = self.column_mut(src_col_name, src_col.influx_type)?; let stats = match (&mut dst_col.data, &src_col.data) { - (ColumnData::F64(dst_data, _), ColumnData::F64(src_data, _)) => { - dst_data.extend_from_slice(&src_data[range.clone()]); - Statistics::F64(compute_stats(src_col.valid.bytes(), range.clone(), |x| { - &src_data[x] - })) - } - (ColumnData::I64(dst_data, _), ColumnData::I64(src_data, _)) => { - dst_data.extend_from_slice(&src_data[range.clone()]); - Statistics::I64(compute_stats(src_col.valid.bytes(), range.clone(), |x| { - &src_data[x] - })) - } - (ColumnData::U64(dst_data, _), ColumnData::U64(src_data, _)) => { - dst_data.extend_from_slice(&src_data[range.clone()]); - Statistics::U64(compute_stats(src_col.valid.bytes(), range.clone(), |x| { - &src_data[x] - })) - } + (ColumnData::F64(dst_data, _), ColumnData::F64(src_data, _)) => Statistics::F64( + write_slice(to_insert, ranges, src_col.valid.bytes(), src_data, dst_data), + ), + (ColumnData::I64(dst_data, _), ColumnData::I64(src_data, _)) => Statistics::I64( + write_slice(to_insert, ranges, src_col.valid.bytes(), src_data, dst_data), + ), + (ColumnData::U64(dst_data, _), ColumnData::U64(src_data, _)) => Statistics::U64( + write_slice(to_insert, ranges, src_col.valid.bytes(), src_data, dst_data), + ), (ColumnData::Bool(dst_data, _), ColumnData::Bool(src_data, _)) => { - dst_data.extend_from_range(src_data, range.clone()); - Statistics::Bool(compute_bool_stats( - src_col.valid.bytes(), - range.clone(), - src_data, - )) + dst_data.reserve(to_insert); + let mut stats = StatValues::new_empty(); + for range in ranges { + dst_data.extend_from_range(src_data, range.clone()); + compute_bool_stats( + src_col.valid.bytes(), + range.clone(), + src_data, + &mut stats, + ) + } + Statistics::Bool(stats) } (ColumnData::String(dst_data, _), ColumnData::String(src_data, _)) => { - dst_data.extend_from_range(src_data, range.clone()); - Statistics::String(compute_stats(src_col.valid.bytes(), range.clone(), |x| { - src_data.get(x).unwrap() - })) + let mut stats = StatValues::new_empty(); + for range in ranges { + dst_data.extend_from_range(src_data, range.clone()); + compute_stats(src_col.valid.bytes(), range.clone(), &mut stats, |x| { + src_data.get(x).unwrap() + }) + } + Statistics::String(stats) } ( ColumnData::Tag(dst_data, dst_dict, _), ColumnData::Tag(src_data, src_dict, _), ) => { + dst_data.reserve(to_insert); + let mut mapping: Vec<_> = vec![None; src_dict.values().len()]; - let mut stats = StatValues::new_empty(); - dst_data.extend(src_data[range.clone()].iter().map(|src_id| match *src_id { - INVALID_DID => { - stats.update_for_nulls(1); - INVALID_DID - } - _ => { - let maybe_did = &mut mapping[*src_id as usize]; - match maybe_did { - Some(did) => { - stats.total_count += 1; - *did + for range in ranges { + dst_data.extend(src_data[range.clone()].iter().map( + |src_id| match *src_id { + INVALID_DID => { + stats.update_for_nulls(1); + INVALID_DID } - None => { - let value = src_dict.lookup_id(*src_id).unwrap(); - stats.update(value); + _ => { + let maybe_did = &mut mapping[*src_id as usize]; + match maybe_did { + Some(did) => { + stats.total_count += 1; + *did + } + None => { + let value = src_dict.lookup_id(*src_id).unwrap(); + stats.update(value); - let did = dst_dict.lookup_value_or_insert(value); - *maybe_did = Some(did); - did + let did = dst_dict.lookup_value_or_insert(value); + *maybe_did = Some(did); + did + } + } } - } - } - })); + }, + )); + } Statistics::String(stats) } _ => unreachable!(), }; - dst_col - .valid - .extend_from_range(&src_col.valid, range.clone()); + dst_col.valid.reserve(to_insert); + for range in ranges { + dst_col + .valid + .extend_from_range(&src_col.valid, range.clone()); + } self.statistics.push((dst_col_idx, stats)); } @@ -707,12 +726,16 @@ fn append_valid_mask(column: &mut Column, valid_mask: Option<&[u8]>, to_insert: } } -fn compute_bool_stats(valid: &[u8], range: Range, col_data: &BitSet) -> StatValues { +fn compute_bool_stats( + valid: &[u8], + range: Range, + col_data: &BitSet, + stats: &mut StatValues, +) { // There are likely faster ways to do this let indexes = iter_set_positions_with_offset(valid, range.start).take_while(|idx| *idx < range.end); - let mut stats = StatValues::new_empty(); for index in indexes { let value = col_data.get(index); stats.update(&value) @@ -720,11 +743,33 @@ fn compute_bool_stats(valid: &[u8], range: Range, col_data: &BitSet) -> S let count = range.end - range.start; stats.update_for_nulls(count as u64 - stats.total_count); +} + +fn write_slice( + to_insert: usize, + ranges: &[Range], + valid: &[u8], + src_data: &[T], + dst_data: &mut Vec, +) -> StatValues +where + T: Clone + PartialOrd + IsNan, +{ + dst_data.reserve(to_insert); + let mut stats = StatValues::new_empty(); + for range in ranges { + dst_data.extend_from_slice(&src_data[range.clone()]); + compute_stats(valid, range.clone(), &mut stats, |x| &src_data[x]); + } stats } -fn compute_stats<'a, T, U, F>(valid: &[u8], range: Range, accessor: F) -> StatValues -where +fn compute_stats<'a, T, U, F>( + valid: &[u8], + range: Range, + stats: &mut StatValues, + accessor: F, +) where U: 'a + ToOwned + PartialOrd + ?Sized + IsNan, F: Fn(usize) -> &'a U, T: std::borrow::Borrow, @@ -733,14 +778,12 @@ where .take_while(|idx| *idx < range.end) .map(accessor); - let mut stats = StatValues::new_empty(); for value in values { stats.update(value) } let count = range.end - range.start; stats.update_for_nulls(count as u64 - stats.total_count); - stats } impl<'a> Drop for Writer<'a> { diff --git a/mutable_batch_lp/Cargo.toml b/mutable_batch_lp/Cargo.toml index cc1dd86458..bc36496c3c 100644 --- a/mutable_batch_lp/Cargo.toml +++ b/mutable_batch_lp/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "mutable_batch_lp" version = "0.1.0" -edition = "2018" +edition = "2021" description = "Conversion logic for line protocol -> MutableBatch" [dependencies] +hashbrown = "0.11" influxdb_line_protocol = { path = "../influxdb_line_protocol" } mutable_batch = { path = "../mutable_batch" } schema = { path = "../schema" } diff --git a/mutable_batch_lp/benches/write_lp.rs b/mutable_batch_lp/benches/write_lp.rs index 5bd7d5d1a3..d3038cd6e4 100644 --- a/mutable_batch_lp/benches/write_lp.rs +++ b/mutable_batch_lp/benches/write_lp.rs @@ -4,10 +4,10 @@ use bytes::Bytes; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use flate2::read::GzDecoder; -use mutable_batch_lp::lines_to_batch; +use mutable_batch_lp::lines_to_batches; fn generate_lp_bytes() -> Bytes { - let raw = include_bytes!("../../tests/fixtures/lineproto/read_filter.lp.gz"); + let raw = include_bytes!("../../test_fixtures/lineproto/read_filter.lp.gz"); let mut gz = GzDecoder::new(&raw[..]); let mut buffer = Vec::new(); @@ -23,7 +23,9 @@ pub fn write_lp(c: &mut Criterion) { group.bench_function(BenchmarkId::from_parameter(count), |b| { b.iter(|| { for _ in 0..*count { - lines_to_batch(std::str::from_utf8(&lp_bytes).unwrap(), 0).unwrap(); + let batches = + lines_to_batches(std::str::from_utf8(&lp_bytes).unwrap(), 0).unwrap(); + assert_eq!(batches.len(), 1); } }); }); diff --git a/mutable_batch_lp/src/lib.rs b/mutable_batch_lp/src/lib.rs index 0a5f7267a4..2f2446e808 100644 --- a/mutable_batch_lp/src/lib.rs +++ b/mutable_batch_lp/src/lib.rs @@ -11,6 +11,7 @@ clippy::clone_on_ref_ptr )] +use hashbrown::HashMap; use influxdb_line_protocol::{parse_lines, FieldValue, ParsedLine}; use mutable_batch::writer::Writer; use mutable_batch::MutableBatch; @@ -36,18 +37,26 @@ pub enum Error { /// Result type for line protocol conversion pub type Result = std::result::Result; -/// Converts the provided lines of line protocol to a [`MutableBatch`] -pub fn lines_to_batch(lines: &str, default_time: i64) -> Result { - let mut batch = MutableBatch::new(); +/// Converts the provided lines of line protocol to a set of [`MutableBatch`] +/// keyed by measurement name +pub fn lines_to_batches(lines: &str, default_time: i64) -> Result> { + let mut batches = HashMap::new(); for (line_idx, maybe_line) in parse_lines(lines).enumerate() { let line = maybe_line.context(LineProtocol { line: line_idx + 1 })?; + let measurement = line.series.measurement.as_str(); + + let (_, batch) = batches + .raw_entry_mut() + .from_key(measurement) + .or_insert_with(|| (measurement.to_string(), MutableBatch::new())); // TODO: Reuse writer - let mut writer = Writer::new(&mut batch, 1); + let mut writer = Writer::new(batch, 1); write_line(&mut writer, line, default_time).context(Write { line: line_idx + 1 })?; writer.commit(); } - Ok(batch) + + Ok(batches) } fn write_line( @@ -95,10 +104,14 @@ mod tests { fn test_basic() { let lp = r#"cpu,tag1=v1,tag2=v2 val=2i 0 cpu,tag1=v4,tag2=v1 val=2i 0 + mem,tag1=v2 ival=3i 0 cpu,tag2=v2 val=3i 1 - cpu,tag1=v1,tag2=v2 fval=2.0"#; + cpu,tag1=v1,tag2=v2 fval=2.0 + mem,tag1=v5 ival=2i 1 + "#; - let batch = lines_to_batch(lp, 5).unwrap(); + let batch = lines_to_batches(lp, 5).unwrap(); + assert_eq!(batch.len(), 2); assert_batches_eq!( &[ @@ -111,7 +124,19 @@ mod tests { "| 2 | v1 | v2 | 1970-01-01T00:00:00.000000005Z | |", "+------+------+------+--------------------------------+-----+", ], - &[batch.to_arrow(Selection::All).unwrap()] + &[batch["cpu"].to_arrow(Selection::All).unwrap()] + ); + + assert_batches_eq!( + &[ + "+------+------+--------------------------------+", + "| ival | tag1 | time |", + "+------+------+--------------------------------+", + "| 3 | v2 | 1970-01-01T00:00:00Z |", + "| 2 | v5 | 1970-01-01T00:00:00.000000001Z |", + "+------+------+--------------------------------+", + ], + &[batch["mem"].to_arrow(Selection::All).unwrap()] ); } } diff --git a/mutable_batch_pb/Cargo.toml b/mutable_batch_pb/Cargo.toml index 0d55b58948..af9b89a5e8 100644 --- a/mutable_batch_pb/Cargo.toml +++ b/mutable_batch_pb/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mutable_batch_pb" version = "0.1.0" -edition = "2018" +edition = "2021" description = "Conversion logic for binary write protocol <-> MutableBatch" [dependencies] diff --git a/mutable_buffer/Cargo.toml b/mutable_buffer/Cargo.toml index f6fc45a408..c34e433cb5 100644 --- a/mutable_buffer/Cargo.toml +++ b/mutable_buffer/Cargo.toml @@ -2,7 +2,7 @@ name = "mutable_buffer" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order arrow = { version = "6.0", features = ["prettyprint"] } diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml index a38d56c065..1538f61094 100644 --- a/object_store/Cargo.toml +++ b/object_store/Cargo.toml @@ -2,7 +2,7 @@ name = "object_store" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order async-trait = "0.1.42" diff --git a/object_store/src/aws.rs b/object_store/src/aws.rs index 5d81a6ed5d..b348aa750b 100644 --- a/object_store/src/aws.rs +++ b/object_store/src/aws.rs @@ -131,6 +131,11 @@ pub enum Error { #[snafu(display("Missing aws-secret-access-key"))] MissingSecretAccessKey, + + NotFound { + location: String, + source: rusoto_core::RusotoError, + }, } /// Configuration for connecting to [Amazon S3](https://aws.amazon.com/s3/). @@ -208,9 +213,18 @@ impl ObjectStoreApi for AmazonS3 { .client .get_object(get_request) .await - .context(UnableToGetData { - bucket: self.bucket_name.to_owned(), - location: key.clone(), + .map_err(|e| match e { + rusoto_core::RusotoError::Service(rusoto_s3::GetObjectError::NoSuchKey(_)) => { + Error::NotFound { + location: key.clone(), + source: e, + } + } + _ => Error::UnableToGetData { + bucket: self.bucket_name.to_owned(), + location: key.clone(), + source: e, + }, })? .body .context(NoData { @@ -729,20 +743,20 @@ mod tests { let err = get_nonexistent_object(&integration, Some(location)) .await .unwrap_err(); - if let Some(ObjectStoreError::AwsObjectStoreError { - source: - Error::UnableToGetData { - source, - bucket, - location, - }, - }) = err.downcast_ref::() + if let Some(ObjectStoreError::NotFound { location, source }) = + err.downcast_ref::() { - assert!(matches!( - source, - rusoto_core::RusotoError::Service(rusoto_s3::GetObjectError::NoSuchKey(_)) - )); - assert_eq!(bucket, &config.bucket); + let source_variant = source.downcast_ref::>(); + assert!( + matches!( + source_variant, + Some(rusoto_core::RusotoError::Service( + rusoto_s3::GetObjectError::NoSuchKey(_) + )), + ), + "got: {:?}", + source_variant + ); assert_eq!(location, NON_EXISTENT_NAME); } else { panic!("unexpected error type: {:?}", err); diff --git a/object_store/src/disk.rs b/object_store/src/disk.rs index a77f6e566f..b52e8c3f28 100644 --- a/object_store/src/disk.rs +++ b/object_store/src/disk.rs @@ -30,7 +30,9 @@ pub enum Error { }, #[snafu(display("Unable to walk dir: {}", source))] - UnableToWalkDir { source: walkdir::Error }, + UnableToWalkDir { + source: walkdir::Error, + }, #[snafu(display("Unable to access metadata for {}: {}", path.display(), source))] UnableToAccessMetadata { @@ -39,22 +41,44 @@ pub enum Error { }, #[snafu(display("Unable to copy data to file: {}", source))] - UnableToCopyDataToFile { source: io::Error }, + UnableToCopyDataToFile { + source: io::Error, + }, #[snafu(display("Unable to create dir {}: {}", path.display(), source))] - UnableToCreateDir { source: io::Error, path: PathBuf }, + UnableToCreateDir { + source: io::Error, + path: PathBuf, + }, #[snafu(display("Unable to create file {}: {}", path.display(), err))] - UnableToCreateFile { path: PathBuf, err: io::Error }, + UnableToCreateFile { + path: PathBuf, + err: io::Error, + }, #[snafu(display("Unable to delete file {}: {}", path.display(), source))] - UnableToDeleteFile { source: io::Error, path: PathBuf }, + UnableToDeleteFile { + source: io::Error, + path: PathBuf, + }, #[snafu(display("Unable to open file {}: {}", path.display(), source))] - UnableToOpenFile { source: io::Error, path: PathBuf }, + UnableToOpenFile { + source: io::Error, + path: PathBuf, + }, #[snafu(display("Unable to read data from file {}: {}", path.display(), source))] - UnableToReadBytes { source: io::Error, path: PathBuf }, + UnableToReadBytes { + source: io::Error, + path: PathBuf, + }, + + NotFound { + location: String, + source: io::Error, + }, } /// Local filesystem storage suitable for testing or for opting out of using a @@ -110,9 +134,19 @@ impl ObjectStoreApi for File { async fn get(&self, location: &Self::Path) -> Result>> { let path = self.path(location); - let file = fs::File::open(&path) - .await - .context(UnableToOpenFile { path: &path })?; + let file = fs::File::open(&path).await.map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + Error::NotFound { + location: location.to_string(), + source: e, + } + } else { + Error::UnableToOpenFile { + path: path.clone(), + source: e, + } + } + })?; let s = FramedRead::new(file, BytesCodec::new()) .map_ok(|b| b.freeze()) @@ -297,14 +331,12 @@ impl File { #[cfg(test)] mod tests { - use std::{fs::set_permissions, os::unix::prelude::PermissionsExt}; - use super::*; - use crate::{ - tests::{list_with_delimiter, put_get_delete_list}, - ObjectStore, ObjectStoreApi, ObjectStorePath, + tests::{get_nonexistent_object, list_with_delimiter, put_get_delete_list}, + Error as ObjectStoreError, ObjectStore, ObjectStoreApi, ObjectStorePath, }; + use std::{fs::set_permissions, os::unix::prelude::PermissionsExt}; use tempfile::TempDir; #[tokio::test] @@ -395,4 +427,32 @@ mod tests { // `list_with_delimiter assert!(store.list_with_delimiter(&store.new_path()).await.is_err()); } + + const NON_EXISTENT_NAME: &str = "nonexistentname"; + + #[tokio::test] + async fn get_nonexistent_location() { + let root = TempDir::new().unwrap(); + let integration = ObjectStore::new_file(root.path()); + + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); + + let err = get_nonexistent_object(&integration, Some(location)) + .await + .unwrap_err(); + if let Some(ObjectStoreError::NotFound { location, source }) = + err.downcast_ref::() + { + let source_variant = source.downcast_ref::(); + assert!( + matches!(source_variant, Some(std::io::Error { .. }),), + "got: {:?}", + source_variant + ); + assert_eq!(location, NON_EXISTENT_NAME); + } else { + panic!("unexpected error type: {:?}", err); + } + } } diff --git a/object_store/src/gcp.rs b/object_store/src/gcp.rs index 7a89c84584..0b60c5d4a2 100644 --- a/object_store/src/gcp.rs +++ b/object_store/src/gcp.rs @@ -68,6 +68,11 @@ pub enum Error { bucket: String, location: String, }, + + NotFound { + location: String, + source: cloud_storage::Error, + }, } /// Configuration for connecting to [Google Cloud Storage](https://cloud.google.com/storage/). @@ -122,9 +127,18 @@ impl ObjectStoreApi for GoogleCloudStorage { .object() .download(&bucket_name, &location_copy) .await - .context(UnableToGetData { - bucket: &self.bucket_name, - location, + .map_err(|e| match e { + cloud_storage::Error::Other(ref text) if text.starts_with("No such object") => { + Error::NotFound { + location, + source: e, + } + } + _ => Error::UnableToGetData { + bucket: bucket_name.clone(), + location, + source: e, + }, })?; Ok(futures::stream::once(async move { Ok(bytes.into()) }).boxed()) @@ -337,21 +351,15 @@ mod test { .await .unwrap_err(); - if let Some(ObjectStoreError::GcsObjectStoreError { - source: - Error::UnableToGetData { - source, - bucket, - location, - }, - }) = err.downcast_ref::() + if let Some(ObjectStoreError::NotFound { location, source }) = + err.downcast_ref::() { + let source_variant = source.downcast_ref::(); assert!( - matches!(source, cloud_storage::Error::Other(_)), + matches!(source_variant, Some(cloud_storage::Error::Other(_))), "got: {:?}", - source + source_variant ); - assert_eq!(bucket, &config.bucket); assert_eq!(location, NON_EXISTENT_NAME); } else { panic!("unexpected error type: {:?}", err) diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index e967a5cae2..6bdacaf084 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -292,12 +292,9 @@ impl ObjectStoreApi for ObjectStore { (InMemoryThrottled(in_mem_throttled), path::Path::InMemory(location)) => { in_mem_throttled.get(location).await?.err_into().boxed() } - (File(file), path::Path::File(location)) => file - .get(location) - .await - .context(FileObjectStoreError)? - .err_into() - .boxed(), + (File(file), path::Path::File(location)) => { + file.get(location).await?.err_into().boxed() + } (MicrosoftAzure(azure), path::Path::MicrosoftAzure(location)) => { azure.get(location).await?.err_into().boxed() } @@ -609,25 +606,49 @@ pub enum Error { #[snafu(display("{}", source))] DummyObjectStoreError { source: dummy::Error }, + + #[snafu(display("Object at location {} not found: {}", location, source))] + NotFound { + location: String, + source: Box, + }, } impl From for Error { fn from(source: disk::Error) -> Self { - Self::FileObjectStoreError { source } + match source { + disk::Error::NotFound { location, source } => Self::NotFound { + location, + source: source.into(), + }, + _ => Self::FileObjectStoreError { source }, + } } } #[cfg(feature = "gcp")] impl From for Error { fn from(source: gcp::Error) -> Self { - Self::GcsObjectStoreError { source } + match source { + gcp::Error::NotFound { location, source } => Self::NotFound { + location, + source: source.into(), + }, + _ => Self::GcsObjectStoreError { source }, + } } } #[cfg(feature = "aws")] impl From for Error { fn from(source: aws::Error) -> Self { - Self::AwsObjectStoreError { source } + match source { + aws::Error::NotFound { location, source } => Self::NotFound { + location, + source: source.into(), + }, + _ => Self::AwsObjectStoreError { source }, + } } } @@ -640,7 +661,13 @@ impl From for Error { impl From for Error { fn from(source: memory::Error) -> Self { - Self::InMemoryObjectStoreError { source } + match source { + memory::Error::NoDataInMemory { ref location } => Self::NotFound { + location: location.into(), + source: source.into(), + }, + // currently "not found" is the only error that can happen with the in-memory store + } } } diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs index c3509cf061..45152f2d8f 100644 --- a/object_store/src/memory.rs +++ b/object_store/src/memory.rs @@ -159,8 +159,8 @@ mod tests { use super::*; use crate::{ - tests::{list_with_delimiter, put_get_delete_list}, - ObjectStore, ObjectStoreApi, ObjectStorePath, + tests::{get_nonexistent_object, list_with_delimiter, put_get_delete_list}, + Error as ObjectStoreError, ObjectStore, ObjectStoreApi, ObjectStorePath, }; use futures::TryStreamExt; @@ -194,4 +194,31 @@ mod tests { .unwrap(); assert_eq!(&*read_data, expected_data); } + + const NON_EXISTENT_NAME: &str = "nonexistentname"; + + #[tokio::test] + async fn nonexistent_location() { + let integration = ObjectStore::new_in_memory(); + + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); + + let err = get_nonexistent_object(&integration, Some(location)) + .await + .unwrap_err(); + if let Some(ObjectStoreError::NotFound { location, source }) = + err.downcast_ref::() + { + let source_variant = source.downcast_ref::(); + assert!( + matches!(source_variant, Some(Error::NoDataInMemory { .. }),), + "got: {:?}", + source_variant + ); + assert_eq!(location, NON_EXISTENT_NAME); + } else { + panic!("unexpected error type: {:?}", err); + } + } } diff --git a/observability_deps/Cargo.toml b/observability_deps/Cargo.toml index 4b66765661..01afbce882 100644 --- a/observability_deps/Cargo.toml +++ b/observability_deps/Cargo.toml @@ -2,7 +2,7 @@ name = "observability_deps" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" description = "Observability ecosystem dependencies for InfluxDB IOx, to ensure consistent versions and unified updates" [dependencies] # In alphabetical order diff --git a/packers/Cargo.toml b/packers/Cargo.toml index 2e5580911e..a6f0dc0e60 100644 --- a/packers/Cargo.toml +++ b/packers/Cargo.toml @@ -2,7 +2,7 @@ name = "packers" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order arrow = { version = "6.0", features = ["prettyprint"] } diff --git a/panic_logging/Cargo.toml b/panic_logging/Cargo.toml index 451ddbda33..d46a6b986a 100644 --- a/panic_logging/Cargo.toml +++ b/panic_logging/Cargo.toml @@ -2,7 +2,7 @@ name = "panic_logging" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order observability_deps = { path = "../observability_deps" } diff --git a/parquet_catalog/Cargo.toml b/parquet_catalog/Cargo.toml index 13f64ec10f..1b47f8063f 100644 --- a/parquet_catalog/Cargo.toml +++ b/parquet_catalog/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "parquet_catalog" version = "0.1.0" -edition = "2018" +edition = "2021" [dependencies] arrow = { version = "6.0", features = ["prettyprint"] } diff --git a/parquet_file/Cargo.toml b/parquet_file/Cargo.toml index b5038cc9ad..cb7edf049f 100644 --- a/parquet_file/Cargo.toml +++ b/parquet_file/Cargo.toml @@ -2,7 +2,7 @@ name = "parquet_file" version = "0.1.0" authors = ["Nga Tran "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order arrow = { version = "6.0", features = ["prettyprint"] } diff --git a/persistence_windows/Cargo.toml b/persistence_windows/Cargo.toml index 98ce242dac..a068b524be 100644 --- a/persistence_windows/Cargo.toml +++ b/persistence_windows/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "persistence_windows" version = "0.1.0" -edition = "2018" +edition = "2021" [dependencies] data_types = { path = "../data_types" } diff --git a/predicate/Cargo.toml b/predicate/Cargo.toml index a7f44f6653..843a7ce3a6 100644 --- a/predicate/Cargo.toml +++ b/predicate/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "predicate" version = "0.1.0" -edition = "2018" +edition = "2021" [dependencies] arrow = { version = "6.0", features = ["prettyprint"] } diff --git a/predicate/src/delete_predicate.rs b/predicate/src/delete_predicate.rs index 02e679ad34..6368df3cf7 100644 --- a/predicate/src/delete_predicate.rs +++ b/predicate/src/delete_predicate.rs @@ -120,6 +120,7 @@ impl From for crate::predicate::Predicate { partition_key: None, range: Some(pred.range), exprs: pred.exprs.into_iter().map(|expr| expr.into()).collect(), + value_expr: vec![], } } } diff --git a/predicate/src/predicate.rs b/predicate/src/predicate.rs index b0d6dcf910..1b9ceb2102 100644 --- a/predicate/src/predicate.rs +++ b/predicate/src/predicate.rs @@ -11,7 +11,7 @@ use std::{ use data_types::timestamp::TimestampRange; use datafusion::{ error::DataFusionError, - logical_plan::{col, lit_timestamp_nano, Expr, Operator}, + logical_plan::{col, lit_timestamp_nano, Column, Expr, Operator}, optimizer::utils, }; use datafusion_util::{make_range_expr, AndExprBuilder}; @@ -26,13 +26,15 @@ pub const EMPTY_PREDICATE: Predicate = Predicate { exprs: vec![], range: None, partition_key: None, + value_expr: vec![], }; #[derive(Debug, Clone, Copy)] /// The result of evaluating a predicate on a set of rows pub enum PredicateMatch { - /// There is at least one row that matches the predicate - AtLeastOne, + /// There is at least one row that matches the predicate that has + /// at least one non null value in each field of the predicate + AtLeastOneNonNullField, /// There are exactly zero rows that match the predicate Zero, @@ -72,6 +74,11 @@ pub struct Predicate { /// these expressions should be returned. Other rows are excluded /// from the results. pub exprs: Vec, + + /// Optional arbitrary predicates on the special `_value` column. These + /// expressions are applied to `field_columns` projections in the form of + /// `CASE` statement conditions. + pub value_expr: Vec, } impl Predicate { @@ -472,6 +479,14 @@ impl PredicateBuilder { } } +// A representation of the `BinaryExpr` variant of a Datafusion expression. +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub struct BinaryExpr { + pub left: Column, + pub op: Operator, + pub right: Expr, +} + #[cfg(test)] mod tests { use super::*; diff --git a/query/Cargo.toml b/query/Cargo.toml index 766731c189..8d402d4677 100644 --- a/query/Cargo.toml +++ b/query/Cargo.toml @@ -2,7 +2,7 @@ name = "query" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "IOx Query Interface and Executor" # This crate is designed to be independent of the rest of the IOx diff --git a/query/src/exec/context.rs b/query/src/exec/context.rs index 66493ddb4c..9dbd45df3f 100644 --- a/query/src/exec/context.rs +++ b/query/src/exec/context.rs @@ -22,20 +22,17 @@ use futures::TryStreamExt; use observability_deps::tracing::{debug, trace}; use trace::{ctx::SpanContext, span::SpanRecorder}; -use crate::{ - exec::{ - fieldlist::{FieldList, IntoFieldList}, - non_null_checker::NonNullCheckerExec, - query_tracing::TracedStream, - schema_pivot::{SchemaPivotExec, SchemaPivotNode}, - seriesset::{ - converter::{GroupGenerator, SeriesSetConverter}, - series::Series, - }, - split::StreamSplitExec, - stringset::{IntoStringSet, StringSetRef}, +use crate::exec::{ + fieldlist::{FieldList, IntoFieldList}, + non_null_checker::NonNullCheckerExec, + query_tracing::TracedStream, + schema_pivot::{SchemaPivotExec, SchemaPivotNode}, + seriesset::{ + converter::{GroupGenerator, SeriesSetConverter}, + series::Series, }, - plan::stringset::TableNamePlanBuilder, + split::StreamSplitExec, + stringset::{IntoStringSet, StringSetRef}, }; use crate::plan::{ @@ -499,25 +496,6 @@ impl IOxExecutionContext { } } - /// Executes table_plans and, if returns some rows, add that table into the return list - /// Tables discovered from meta data won't need any plan - pub async fn to_table_names(&self, builder: TableNamePlanBuilder) -> Result { - let ctx = self.child_ctx("to_table_names"); - - // first get all meta data tables - let mut tables = builder.meta_data_table_names().clone(); - - // Now run each plan and if it returns data, add it table in - let table_plans = builder.table_plans(); - for (table, plan) in table_plans { - if !ctx.run_logical_plan(plan).await?.is_empty() { - tables.insert(table.clone()); - } - } - - Ok(Arc::new(tables)) - } - /// Run the plan and return a record batch reader for reading the results pub async fn run_logical_plan(&self, plan: LogicalPlan) -> Result> { self.run_logical_plans(vec![plan]).await diff --git a/query/src/frontend/influxrpc.rs b/query/src/frontend/influxrpc.rs index 84f3bddbc1..38e09a43d5 100644 --- a/query/src/frontend/influxrpc.rs +++ b/query/src/frontend/influxrpc.rs @@ -4,13 +4,13 @@ use std::{ sync::Arc, }; -use arrow::datatypes::{DataType, Field}; +use arrow::datatypes::DataType; use data_types::chunk_metadata::ChunkId; use datafusion::{ error::{DataFusionError, Result as DatafusionResult}, logical_plan::{ - binary_expr, lit, Column, DFSchemaRef, Expr, ExprRewriter, LogicalPlan, LogicalPlanBuilder, - Operator, + binary_expr, lit, when, Column, DFSchemaRef, Expr, ExprRewriter, LogicalPlan, + LogicalPlanBuilder, Operator, }, optimizer::utils::expr_to_columns, prelude::col, @@ -20,13 +20,13 @@ use datafusion_util::AsExpr; use hashbrown::{HashMap, HashSet}; use observability_deps::tracing::{debug, trace}; -use predicate::predicate::{Predicate, PredicateMatch}; +use predicate::predicate::{BinaryExpr, Predicate, PredicateMatch}; use schema::selection::Selection; use schema::{InfluxColumnType, Schema, TIME_COLUMN_NAME}; use snafu::{ensure, OptionExt, ResultExt, Snafu}; use crate::{ - exec::{field::FieldColumns, make_schema_pivot}, + exec::{field::FieldColumns, make_non_null_checker, make_schema_pivot}, func::{ selectors::{selector_first, selector_last, selector_max, selector_min, SelectorOutput}, window::make_window_bound_expr, @@ -35,9 +35,7 @@ use crate::{ plan::{ fieldlist::FieldListPlan, seriesset::{SeriesSetPlan, SeriesSetPlans}, - stringset::{ - Error as StringSetError, StringSetPlan, StringSetPlanBuilder, TableNamePlanBuilder, - }, + stringset::{Error as StringSetError, StringSetPlan, StringSetPlanBuilder}, }, provider::ProviderBuilder, QueryChunk, QueryChunkMeta, QueryDatabase, @@ -229,11 +227,13 @@ impl InfluxRpcPlanner { /// . A set of plans of tables of either /// . chunks with deleted data or /// . chunks without deleted data but cannot be decided from meta data - pub fn table_names(&self, database: &D, predicate: Predicate) -> Result + pub fn table_names(&self, database: &D, predicate: Predicate) -> Result where D: QueryDatabase + 'static, { - let mut builder = TableNamePlanBuilder::new(); + debug!(predicate=?predicate, "planning table_names"); + + let mut builder = StringSetPlanBuilder::new(); let mut normalizer = PredicateNormalizer::new(predicate); // Mapping between table and chunks that need full plan @@ -243,10 +243,11 @@ impl InfluxRpcPlanner { // and which chunks needs full plan and group them into their table for chunk in database.chunks(normalizer.unnormalized()) { let table_name = chunk.table_name(); - let schema = chunk.schema(); + trace!(chunk_id=%chunk.id(), table_name, "Considering table"); // Table is already in the returned table list, no longer needs to discover it from other chunks - if builder.contains_meta_data_table(table_name.to_string()) { + if builder.contains(table_name) { + trace!("already seen"); continue; } @@ -258,10 +259,11 @@ impl InfluxRpcPlanner { .or_insert_with(Vec::new) .push(Arc::clone(&chunk)); } else { - // See if we can have enough info from the chunk's meta data to answer - // that this table participates in the request - let predicate = normalizer.normalized(table_name, schema); - // + // See if we have enough info only from the chunk's + // meta data to know if the table has data that + // matches the predicate + let predicate = normalizer.normalized(table_name); + // Try and apply the predicate using only metadata let pred_result = chunk .apply_predicate_to_metadata(&predicate) @@ -269,38 +271,38 @@ impl InfluxRpcPlanner { .context(CheckingChunkPredicate { chunk_id: chunk.id(), })?; - // + match pred_result { - PredicateMatch::AtLeastOne => { + PredicateMatch::AtLeastOneNonNullField => { + trace!("Metadata predicate: table matches"); // Meta data of the table covers predicates of the request - builder.append_meta_data_table(table_name.to_string()); + builder.append_string(table_name); } PredicateMatch::Unknown => { + trace!("Metadata predicate: unknown match"); // We cannot match the predicate to get answer from meta data, let do full plan full_plan_table_chunks .entry(table_name.to_string()) .or_insert_with(Vec::new) .push(Arc::clone(&chunk)); } - PredicateMatch::Zero => {} // this chunk's table does not participate in the request + PredicateMatch::Zero => { + trace!("Metadata predicate: zero rows match"); + } // this chunk's table does not participate in the request } } } - // remove items from full_plan_table_chunks whose tables are already in the returned list - let meta_data_tables = builder.meta_data_table_names(); - for table in meta_data_tables { - full_plan_table_chunks.remove(&table); + // remove items from full_plan_table_chunks whose tables are + // already in the returned list + for table in builder.known_strings_iter() { + trace!(%table, "Table is known to have matches, skipping plan"); + full_plan_table_chunks.remove(table); if full_plan_table_chunks.is_empty() { break; } } - // No full plans needed - if full_plan_table_chunks.is_empty() { - return Ok(builder); - } - // Now build plans for full-plan tables for (table_name, chunks) in full_plan_table_chunks { let schema = database.table_schema(&table_name).context(TableRemoved { @@ -309,11 +311,11 @@ impl InfluxRpcPlanner { if let Some(plan) = self.table_name_plan(&table_name, schema, &mut normalizer, chunks)? { - builder.append_plans(table_name, plan); + builder = builder.append_other(plan.into()); } } - Ok(builder) + builder.build().context(CreatingStringSet) } /// Returns a set of plans that produces the names of "tag" @@ -346,7 +348,7 @@ impl InfluxRpcPlanner { let mut do_full_plan = chunk.has_delete_predicates(); let table_name = chunk.table_name(); - let predicate = normalizer.normalized(table_name, chunk.schema()); + let predicate = normalizer.normalized(table_name); // Try and apply the predicate using only metadata let pred_result = chunk @@ -427,14 +429,14 @@ impl InfluxRpcPlanner { let plan = self.tag_keys_plan(&table_name, schema, &mut normalizer, chunks)?; if let Some(plan) = plan { - builder = builder.append(plan) + builder = builder.append_other(plan) } } } // add the known columns we could find from metadata only builder - .append(known_columns.into()) + .append_other(known_columns.into()) .build() .context(CreatingStringSet) } @@ -474,7 +476,7 @@ impl InfluxRpcPlanner { let mut do_full_plan = chunk.has_delete_predicates(); let table_name = chunk.table_name(); - let predicate = normalizer.normalized(table_name, chunk.schema()); + let predicate = normalizer.normalized(table_name); // Try and apply the predicate using only metadata let pred_result = chunk @@ -596,13 +598,13 @@ impl InfluxRpcPlanner { .build() .context(BuildingPlan)?; - builder = builder.append(plan.into()); + builder = builder.append_other(plan.into()); } } // add the known values we could find from metadata only builder - .append(known_values.into()) + .append_other(known_values.into()) .build() .context(CreatingStringSet) } @@ -821,7 +823,7 @@ impl InfluxRpcPlanner { { let mut table_chunks = BTreeMap::new(); for chunk in chunks { - let predicate = normalizer.normalized(chunk.table_name(), chunk.schema()); + let predicate = normalizer.normalized(chunk.table_name()); // Try and apply the predicate using only metadata let pred_result = chunk .apply_predicate_to_metadata(&predicate) @@ -830,19 +832,13 @@ impl InfluxRpcPlanner { chunk_id: chunk.id(), })?; - match pred_result { - PredicateMatch::AtLeastOne | + if !matches!(pred_result, PredicateMatch::Zero) { // have to include chunk as we can't rule it out - PredicateMatch::Unknown => { - let table_name = chunk.table_name().to_string(); - table_chunks - .entry(table_name) - .or_insert_with(Vec::new) - .push(Arc::clone(&chunk)); - } - // Skip chunk here based on metadata - PredicateMatch::Zero => { - } + let table_name = chunk.table_name().to_string(); + table_chunks + .entry(table_name) + .or_insert_with(Vec::new) + .push(Arc::clone(&chunk)); } } Ok(table_chunks) @@ -960,11 +956,11 @@ impl InfluxRpcPlanner { Ok(Some(plan)) } - /// Creates a DataFusion LogicalPlan that returns the timestamp - /// for a specified table: + /// Creates a DataFusion LogicalPlan that returns the values in + /// the fields for a specified table: /// - /// The output looks like (time) - /// The time column is chosen because it must be included in all tables + /// The output produces the table name as a single string if any + /// non null values are passed in. /// /// The data is not sorted in any particular order /// @@ -974,13 +970,11 @@ impl InfluxRpcPlanner { /// The created plan looks like: /// /// ```text - /// Projection (select time) + /// NonNullChecker + /// Projection (select fields) /// Filter(predicate) [optional] /// Scan /// ``` - // TODO: for optimization in the future, build `select count(*)` plan instead, - // ,but if we do this, we also need to change the way we handle output - // of the function invoking this because it will always return a number fn table_name_plan( &self, table_name: &str, @@ -991,6 +985,7 @@ impl InfluxRpcPlanner { where C: QueryChunk + 'static, { + debug!(%table_name, "Creating table_name full plan"); let scan_and_filter = self.scan_and_filter(table_name, schema, normalizer, chunks)?; let TableScanAndFilter { plan_builder, @@ -1000,15 +995,11 @@ impl InfluxRpcPlanner { Some(t) => t, }; - // Selection of only time - let select_exprs = schema - .iter() - .filter_map(|(influx_column_type, field)| match influx_column_type { - Some(InfluxColumnType::Timestamp) => Some(col(field.name())), - Some(_) => None, - None => None, - }) - .collect::>(); + // Select only fields requested + let predicate = normalizer.normalized(table_name); + let select_exprs: Vec<_> = filtered_fields_iter(&schema, &predicate) + .map(|field| col(field.name)) + .collect(); let plan = plan_builder .project(select_exprs) @@ -1016,6 +1007,9 @@ impl InfluxRpcPlanner { .build() .context(BuildingPlan)?; + // Add the final node that outputs the table name or not, depending + let plan = make_non_null_checker(table_name, plan); + Ok(Some(plan)) } @@ -1040,9 +1034,8 @@ impl InfluxRpcPlanner { C: QueryChunk + 'static, { let table_name = table_name.as_ref(); - let scan_and_filter = - self.scan_and_filter(table_name, Arc::clone(&schema), normalizer, chunks)?; - let predicate = normalizer.normalized(table_name, schema); + let scan_and_filter = self.scan_and_filter(table_name, schema, normalizer, chunks)?; + let predicate = normalizer.normalized(table_name); let TableScanAndFilter { plan_builder, @@ -1068,9 +1061,9 @@ impl InfluxRpcPlanner { // Select away anything that isn't in the influx data model let tags_fields_and_timestamps: Vec = schema .tags_iter() - .chain(filtered_fields_iter(&schema, &predicate)) - .chain(schema.time_iter()) .map(|field| field.name().as_expr()) + .chain(filtered_fields_iter(&schema, &predicate).map(|f| f.expr)) + .chain(schema.time_iter().map(|field| field.name().as_expr())) .collect(); let plan_builder = plan_builder @@ -1085,7 +1078,7 @@ impl InfluxRpcPlanner { .collect(); let field_columns = filtered_fields_iter(&schema, &predicate) - .map(|field| Arc::from(field.name().as_str())) + .map(|field| Arc::from(field.name)) .collect(); // TODO: remove the use of tag_columns and field_column names @@ -1152,9 +1145,8 @@ impl InfluxRpcPlanner { C: QueryChunk + 'static, { let table_name = table_name.into(); - let scan_and_filter = - self.scan_and_filter(&table_name, Arc::clone(&schema), normalizer, chunks)?; - let predicate = normalizer.normalized(&table_name, schema); + let scan_and_filter = self.scan_and_filter(&table_name, schema, normalizer, chunks)?; + let predicate = normalizer.normalized(&table_name); let TableScanAndFilter { plan_builder, @@ -1263,9 +1255,8 @@ impl InfluxRpcPlanner { C: QueryChunk + 'static, { let table_name = table_name.into(); - let scan_and_filter = - self.scan_and_filter(&table_name, Arc::clone(&schema), normalizer, chunks)?; - let predicate = normalizer.normalized(&table_name, schema); + let scan_and_filter = self.scan_and_filter(&table_name, schema, normalizer, chunks)?; + let predicate = normalizer.normalized(&table_name); let TableScanAndFilter { plan_builder, @@ -1342,7 +1333,7 @@ impl InfluxRpcPlanner { where C: QueryChunk + 'static, { - let predicate = normalizer.normalized(table_name, Arc::clone(&schema)); + let predicate = normalizer.normalized(table_name); // Scan all columns to begin with (DataFusion projection // push-down optimization will prune out unneeded columns later) @@ -1482,14 +1473,56 @@ pub(crate) struct AggExprs { field_columns: FieldColumns, } -/// Returns an iterator of fields from schema that pass the predicate +// Encapsulates a field column projection as an expression. In the simplest case +// the expression is a `Column` expression. In more complex cases it might be +// a predicate that filters rows for the projection. +#[derive(Clone)] +struct FieldExpr<'a> { + expr: Expr, + name: &'a str, + datatype: &'a DataType, +} + +// Returns an iterator of fields from schema that pass the predicate. If there +// are expressions associated with field column projections then these are +// applied to the column via `CASE` statements. +// +// TODO(edd): correctly support multiple `_value` expressions. Right now they +// are OR'd together, which makes sense for equality operators like `_value == xyz`. fn filtered_fields_iter<'a>( schema: &'a Schema, predicate: &'a Predicate, -) -> impl Iterator { - schema - .fields_iter() - .filter(move |f| predicate.should_include_field(f.name())) +) -> impl Iterator> { + schema.fields_iter().filter_map(move |f| { + if !predicate.should_include_field(f.name()) { + return None; + } + + // For example, assume two fields (`field1` and `field2`) along with + // a predicate like: `_value = 1.32 OR _value = 2.87`. The projected + // field columns become: + // + // SELECT + // CASE WHEN #field1 = Float64(1.32) OR #field1 = Float64(2.87) THEN #field1 END AS field1, + // CASE WHEN #field2 = Float64(1.32) OR #field2 = Float64(2.87) THEN #field2 END AS field2 + // + let expr = predicate + .value_expr + .iter() + .map(|BinaryExpr { left: _, op, right }| { + binary_expr(col(f.name()), *op, right.as_expr()) + }) + .reduce(|a, b| a.or(b)) + .map(|when_expr| when(when_expr, col(f.name())).end()) + .unwrap_or_else(|| Ok(col(f.name()))) + .unwrap(); + + Some(FieldExpr { + expr: expr.alias(f.name()), + name: f.name(), + datatype: f.data_type(), + }) + }) } /// Creates aggregate expressions and tracks field output according to @@ -1548,26 +1581,25 @@ impl AggExprs { let mut field_list = Vec::new(); for field in filtered_fields_iter(schema, predicate) { + let field_name = field.name; agg_exprs.push(make_selector_expr( agg, SelectorOutput::Value, - field.name(), - field.data_type(), - field.name(), + field.clone(), + field_name, )?); - let time_column_name = format!("{}_{}", TIME_COLUMN_NAME, field.name()); + let time_column_name = format!("{}_{}", TIME_COLUMN_NAME, field_name); agg_exprs.push(make_selector_expr( agg, SelectorOutput::Time, - field.name(), - field.data_type(), + field, &time_column_name, )?); field_list.push(( - Arc::from(field.name().as_str()), // value name + Arc::from(field_name), // value name Arc::from(time_column_name.as_str()), )); } @@ -1590,12 +1622,21 @@ impl AggExprs { // agg_function(time) as time fn agg_for_read_group(agg: Aggregate, schema: &Schema, predicate: &Predicate) -> Result { let agg_exprs = filtered_fields_iter(schema, predicate) - .chain(schema.time_iter()) - .map(|field| make_agg_expr(agg, field.name())) + .map(|field| make_agg_expr(agg, field)) + .chain(schema.time_iter().map(|field| { + make_agg_expr( + agg, + FieldExpr { + expr: col(field.name()), + datatype: field.data_type(), + name: field.name(), + }, + ) + })) .collect::>>()?; let field_columns = filtered_fields_iter(schema, predicate) - .map(|field| Arc::from(field.name().as_str())) + .map(|field| Arc::from(field.name)) .collect::>() .into(); @@ -1621,11 +1662,11 @@ impl AggExprs { predicate: &Predicate, ) -> Result { let agg_exprs = filtered_fields_iter(schema, predicate) - .map(|field| make_agg_expr(agg, field.name())) + .map(|field| make_agg_expr(agg, field)) .collect::>>()?; let field_columns = filtered_fields_iter(schema, predicate) - .map(|field| Arc::from(field.name().as_str())) + .map(|field| Arc::from(field.name)) .collect::>() .into(); @@ -1639,43 +1680,55 @@ impl AggExprs { /// Creates a DataFusion expression suitable for calculating an aggregate: /// /// equivalent to `CAST agg(field) as field` -fn make_agg_expr(agg: Aggregate, field_name: &str) -> Result { +fn make_agg_expr(agg: Aggregate, field_expr: FieldExpr<'_>) -> Result { // For timestamps, use `MAX` which corresponds to the last // timestamp in the group, unless `MIN` was specifically requested // to be consistent with the Go implementation which takes the // timestamp at the end of the window - let agg = if field_name == TIME_COLUMN_NAME && agg != Aggregate::Min { + let agg = if field_expr.name == TIME_COLUMN_NAME && agg != Aggregate::Min { Aggregate::Max } else { agg }; - agg.to_datafusion_expr(col(field_name)) + let field_name = field_expr.name; + agg.to_datafusion_expr(field_expr.expr) .context(CreatingAggregates) .map(|agg| agg.alias(field_name)) } -/// Creates a DataFusion expression suitable for calculating the time -/// part of a selector: +/// Creates a DataFusion expression suitable for calculating the time part of a +/// selector: /// -/// equivalent to `CAST selector_time(field) as column_name` -fn make_selector_expr( +/// The output expression is equivalent to `CAST selector_time(field_expression) +/// as col_name`. +/// +/// In the simplest scenarios the field expressions are `Column` expressions. +/// In some cases the field expressions are `CASE` statements such as for +/// example: +/// +/// CAST selector_time( +/// CASE WHEN field = 1.87 OR field = 1.99 THEN field +/// ELSE NULL +/// END) as col_name +/// +fn make_selector_expr<'a>( agg: Aggregate, output: SelectorOutput, - field_name: &str, - data_type: &DataType, - column_name: &str, + field: FieldExpr<'a>, + col_name: &'a str, ) -> Result { let uda = match agg { - Aggregate::First => selector_first(data_type, output), - Aggregate::Last => selector_last(data_type, output), - Aggregate::Min => selector_min(data_type, output), - Aggregate::Max => selector_max(data_type, output), + Aggregate::First => selector_first(field.datatype, output), + Aggregate::Last => selector_last(field.datatype, output), + Aggregate::Min => selector_min(field.datatype, output), + Aggregate::Max => selector_max(field.datatype, output), _ => return InternalAggregateNotSelector { agg }.fail(), }; + Ok(uda - .call(vec![col(field_name), col(TIME_COLUMN_NAME)]) - .alias(column_name)) + .call(vec![field.expr, col(TIME_COLUMN_NAME)]) + .alias(col_name)) } /// Creates specialized / normalized predicates that are tailored to a specific @@ -1701,13 +1754,13 @@ impl PredicateNormalizer { /// Return a reference to a predicate specialized for `table_name` based on /// its `schema`. - fn normalized(&mut self, table_name: &str, schema: Arc) -> Arc { + fn normalized(&mut self, table_name: &str) -> Arc { if let Some(normalized_predicate) = self.normalized.get(table_name) { return normalized_predicate.inner(); } let normalized_predicate = - TableNormalizedPredicate::new(table_name, schema, self.unnormalized.clone()); + TableNormalizedPredicate::new(table_name, self.unnormalized.clone()); self.normalized .entry(table_name.to_string()) @@ -1752,13 +1805,18 @@ struct TableNormalizedPredicate { } impl TableNormalizedPredicate { - fn new(table_name: &str, schema: Arc, mut inner: Predicate) -> Self { + fn new(table_name: &str, mut inner: Predicate) -> Self { let mut field_projections = BTreeSet::new(); + let mut field_value_exprs = vec![]; + inner.exprs = inner .exprs .into_iter() .map(|e| rewrite_measurement_references(table_name, e)) - .map(|e| rewrite_field_value_references(Arc::clone(&schema), e)) + // Rewrite any references to `_value = some_value` to literal true values. + // Keeps track of these expressions, which can then be used to + // augment field projections with conditions using `CASE` statements. + .map(|e| rewrite_field_value_references(&mut field_value_exprs, e)) .map(|e| { // Rewrite any references to `_field = a_field_name` with a literal true // and keep track of referenced field names to add to the field @@ -1766,6 +1824,8 @@ impl TableNormalizedPredicate { rewrite_field_column_references(&mut field_projections, e) }) .collect::>(); + // Store any field value (`_value`) expressions on the `Predicate`. + inner.value_expr = field_value_exprs; if !field_projections.is_empty() { match &mut inner.field_columns { @@ -1811,23 +1871,19 @@ impl ExprRewriter for MeasurementRewriter<'_> { } } -/// Rewrites a predicate on `_value` to a disjunctive set of expressions on each -/// distinct field column in the table. -/// -/// For example, the predicate `_value = 1.77` on a table with three field -/// columns would be rewritten to: -/// -/// `(field1 = 1.77 OR field2 = 1.77 OR field3 = 1.77)`. -fn rewrite_field_value_references(schema: Arc, expr: Expr) -> Expr { - let mut rewriter = FieldValueRewriter { schema }; +/// Rewrites an expression on `_value` as a boolean true literal, pushing any +/// encountered expressions onto `value_exprs` so they can be moved onto column +/// projections. +fn rewrite_field_value_references(value_exprs: &mut Vec, expr: Expr) -> Expr { + let mut rewriter = FieldValueRewriter { value_exprs }; expr.rewrite(&mut rewriter).expect("rewrite is infallible") } -struct FieldValueRewriter { - schema: Arc, +struct FieldValueRewriter<'a> { + value_exprs: &'a mut Vec, } -impl ExprRewriter for FieldValueRewriter { +impl<'a> ExprRewriter for FieldValueRewriter<'a> { fn mutate(&mut self, expr: Expr) -> DatafusionResult { Ok(match expr { Expr::BinaryExpr { @@ -1836,21 +1892,16 @@ impl ExprRewriter for FieldValueRewriter { ref right, } => { if let Expr::Column(inner) = &**left { - if inner.name != VALUE_COLUMN_NAME { - return Ok(expr); // column name not `_value`. + if inner.name == VALUE_COLUMN_NAME { + self.value_exprs.push(BinaryExpr { + left: inner.to_owned(), + op, + right: right.as_expr(), + }); + return Ok(Expr::Literal(ScalarValue::Boolean(Some(true)))); } - - // build a disjunctive expression using binary expressions - // for each field column and the original expression's - // operator and rhs. - self.schema - .fields_iter() - .map(|field| binary_expr(col(field.name()), op, *right.clone())) - .reduce(|a, b| a.or(b)) - .expect("at least one field column") - } else { - expr } + expr } _ => expr, }) @@ -1915,7 +1966,7 @@ pub fn schema_has_all_expr_columns(schema: &Schema, expr: &Expr) -> bool { #[cfg(test)] mod tests { - use datafusion::logical_plan::Operator; + use datafusion::logical_plan::{binary_expr, Operator}; use schema::builder::SchemaBuilder; use super::*; @@ -1955,56 +2006,57 @@ mod tests { #[test] fn test_field_value_rewriter() { - let schema = SchemaBuilder::new() - .tag("t1") - .tag("t2") - .field("f1", DataType::Float64) - .field("f2", DataType::Float64) - .timestamp() - .build() - .unwrap(); - let mut rewriter = FieldValueRewriter { - schema: Arc::new(schema), + value_exprs: &mut vec![], }; let cases = vec![ ( binary_expr(col("f1"), Operator::Eq, lit(1.82)), binary_expr(col("f1"), Operator::Eq, lit(1.82)), + vec![], ), - (col("t2"), col("t2")), + (col("t2"), col("t2"), vec![]), ( binary_expr(col(VALUE_COLUMN_NAME), Operator::Eq, lit(1.82)), - // - // _value = 1.82 -> f1 = (1.82 OR f2 = 1.82) - // - binary_expr( - binary_expr(col("f1"), Operator::Eq, lit(1.82)), - Operator::Or, - binary_expr(col("f2"), Operator::Eq, lit(1.82)), - ), + // _value = 1.82 -> true + lit(true), + vec![BinaryExpr { + left: Column { + relation: None, + name: VALUE_COLUMN_NAME.into(), + }, + op: Operator::Eq, + right: lit(1.82), + }], ), ]; - for (input, exp) in cases { + for (input, exp, mut value_exprs) in cases { let rewritten = input.rewrite(&mut rewriter).unwrap(); assert_eq!(rewritten, exp); + assert_eq!(rewriter.value_exprs, &mut value_exprs); } // Test case with single field. - let schema = SchemaBuilder::new() - .field("f1", DataType::Float64) - .timestamp() - .build() - .unwrap(); let mut rewriter = FieldValueRewriter { - schema: Arc::new(schema), + value_exprs: &mut vec![], }; let input = binary_expr(col(VALUE_COLUMN_NAME), Operator::Gt, lit(1.88)); let rewritten = input.rewrite(&mut rewriter).unwrap(); - assert_eq!(rewritten, binary_expr(col("f1"), Operator::Gt, lit(1.88))); + assert_eq!(rewritten, lit(true)); + assert_eq!( + rewriter.value_exprs, + &mut vec![BinaryExpr { + left: Column { + relation: None, + name: VALUE_COLUMN_NAME.into(), + }, + op: Operator::Gt, + right: lit(1.88), + }] + ); } #[test] diff --git a/query/src/plan/stringset.rs b/query/src/plan/stringset.rs index 627d19e544..b8246281a3 100644 --- a/query/src/plan/stringset.rs +++ b/query/src/plan/stringset.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, sync::Arc}; +use std::sync::Arc; use arrow_util::util::str_iter_to_batch; use datafusion::logical_plan::LogicalPlan; @@ -97,7 +97,7 @@ impl StringSetPlanBuilder { /// Append the strings from the passed plan into ourselves if possible, or /// passes on the plan - pub fn append(mut self, other: StringSetPlan) -> Self { + pub fn append_other(mut self, other: StringSetPlan) -> Self { match other { StringSetPlan::Known(ssref) => match Arc::try_unwrap(ssref) { Ok(mut ss) => { @@ -117,6 +117,23 @@ impl StringSetPlanBuilder { self } + /// Return true if we know already that `s` is contained in the + /// StringSet. Note that if `contains()` returns false, `s` may be + /// in the stringset after execution. + pub fn contains(&self, s: impl AsRef) -> bool { + self.strings.contains(s.as_ref()) + } + + /// Append a single string to the known set of strings in this builder + pub fn append_string(&mut self, s: impl Into) { + self.strings.insert(s.into()); + } + + /// returns an iterator over the currently known strings in this builder + pub fn known_strings_iter(&self) -> impl Iterator { + self.strings.iter() + } + /// Create a StringSetPlan that produces the deduplicated (union) /// of all plans `append`ed to this builder. pub fn build(self) -> Result { @@ -143,39 +160,6 @@ impl StringSetPlanBuilder { } } -#[derive(Debug, Default)] -pub struct TableNamePlanBuilder { - /// Known tables achieved from meta data - meta_data_tables: StringSet, - /// Other tables and their general plans - plans: BTreeMap, -} - -impl TableNamePlanBuilder { - pub fn new() -> Self { - Self::default() - } - pub fn append_meta_data_table(&mut self, table: String) { - self.meta_data_tables.insert(table); - } - - pub fn append_plans(&mut self, table_name: String, plan: LogicalPlan) { - self.plans.insert(table_name, plan); - } - - pub fn contains_meta_data_table(&self, table: String) -> bool { - self.meta_data_tables.contains(&table) - } - - pub fn meta_data_table_names(&self) -> StringSet { - self.meta_data_tables.clone() - } - - pub fn table_plans(&self) -> BTreeMap { - self.plans.clone() - } -} - #[cfg(test)] mod tests { use crate::exec::{Executor, ExecutorType}; @@ -196,8 +180,8 @@ mod tests { #[test] fn test_builder_strings_only() { let plan = StringSetPlanBuilder::new() - .append(to_string_set(&["foo", "bar"]).into()) - .append(to_string_set(&["bar", "baz"]).into()) + .append_other(to_string_set(&["foo", "bar"]).into()) + .append_other(to_string_set(&["bar", "baz"]).into()) .build() .unwrap(); @@ -228,9 +212,9 @@ mod tests { // when a df plan is appended the whole plan should be different let plan = StringSetPlanBuilder::new() - .append(to_string_set(&["foo", "bar"]).into()) - .append(vec![df_plan].into()) - .append(to_string_set(&["baz"]).into()) + .append_other(to_string_set(&["foo", "bar"]).into()) + .append_other(vec![df_plan].into()) + .append_other(to_string_set(&["baz"]).into()) .build() .unwrap(); diff --git a/query_tests/Cargo.toml b/query_tests/Cargo.toml index 1b386903d5..bb69fc3fc1 100644 --- a/query_tests/Cargo.toml +++ b/query_tests/Cargo.toml @@ -2,7 +2,7 @@ name = "query_tests" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "Tests of the query engine against different database configurations" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/query_tests/generate/Cargo.toml b/query_tests/generate/Cargo.toml index bbe46ba52b..2d78f367fe 100644 --- a/query_tests/generate/Cargo.toml +++ b/query_tests/generate/Cargo.toml @@ -3,9 +3,9 @@ name = "generate" description = "Creates rust #tests for files in .sql" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order # Note this is a standalone binary and not part of the overall workspace -[workspace] \ No newline at end of file +[workspace] diff --git a/query_tests/src/cancellation.rs b/query_tests/src/cancellation.rs index e0b7a227dd..e39a34a54c 100644 --- a/query_tests/src/cancellation.rs +++ b/query_tests/src/cancellation.rs @@ -8,7 +8,11 @@ use std::{ use arrow_util::assert_batches_sorted_eq; use object_store::{ObjectStore, ObjectStoreIntegration}; -use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner, QueryChunk}; +use query::{ + exec::{ExecutionContextProvider, IOxExecutionContext}, + frontend::sql::SqlQueryPlanner, + QueryChunk, +}; use server::{db::test_helpers::write_lp, utils::TestDb}; #[tokio::test] @@ -58,7 +62,7 @@ async fn test_query_cancellation_slow_store() { // setup query context let ctx = db.new_query_context(None); - assert_eq!(ctx.tasks(), 0); + wait_for_tasks(&ctx, 0).await; // query fast part let expected_fast = vec![ @@ -75,7 +79,7 @@ async fn test_query_cancellation_slow_store() { .unwrap(); let batches = ctx.collect(physical_plan).await.unwrap(); assert_batches_sorted_eq!(&expected_fast, &batches); - assert_eq!(ctx.tasks(), 0); + wait_for_tasks(&ctx, 0).await; // query blocked part let query_slow = "select * from cpu where region='west'"; @@ -92,7 +96,7 @@ async fn test_query_cancellation_slow_store() { }); tokio::time::sleep(std::time::Duration::from_millis(100)).await; assert!(!passed.load(Ordering::SeqCst)); - assert_eq!(ctx.tasks(), 1); + wait_for_tasks(&ctx, 1).await; // querying fast part should not be blocked let physical_plan = SqlQueryPlanner::default() @@ -101,14 +105,20 @@ async fn test_query_cancellation_slow_store() { .unwrap(); let batches = ctx.collect(physical_plan).await.unwrap(); assert_batches_sorted_eq!(&expected_fast, &batches); - assert_eq!(ctx.tasks(), 1); + wait_for_tasks(&ctx, 1).await; // canceling the blocking query should free resources again // cancelation might take a short while join_handle.abort(); + wait_for_tasks(&ctx, 0).await; + assert!(!passed.load(Ordering::SeqCst)); +} + +/// Wait up to 10s for correct task count. +async fn wait_for_tasks(ctx: &IOxExecutionContext, n: usize) { tokio::time::timeout(Duration::from_secs(10), async { loop { - if dbg!(ctx.tasks()) == 0 { + if dbg!(ctx.tasks()) == n { return; } tokio::time::sleep(Duration::from_millis(1)).await; @@ -116,5 +126,4 @@ async fn test_query_cancellation_slow_store() { }) .await .unwrap(); - assert!(!passed.load(Ordering::SeqCst)); } diff --git a/query_tests/src/influxrpc/read_group.rs b/query_tests/src/influxrpc/read_group.rs index b018855619..2b287fd47a 100644 --- a/query_tests/src/influxrpc/read_group.rs +++ b/query_tests/src/influxrpc/read_group.rs @@ -9,7 +9,10 @@ use crate::{ use async_trait::async_trait; use data_types::timestamp::TimestampRange; -use datafusion::prelude::*; +use datafusion::{ + logical_plan::{binary_expr, Operator}, + prelude::*, +}; use predicate::{ delete_expr::DeleteExpr, delete_predicate::DeletePredicate, @@ -1037,3 +1040,118 @@ async fn test_grouped_series_set_plan_group_field_pred_filter_on_field() { ) .await; } + +// Test data to validate fix for: +// https://github.com/influxdata/influxdb_iox/issues/2691 +struct MeasurementForDefect2691 {} +#[async_trait] +impl DbSetup for MeasurementForDefect2691 { + async fn make(&self) -> Vec { + let partition_key = "2018-05-22T19"; + + let lp = vec![ + "system,host=host.local load1=1.83 1527018806000000000", + "system,host=host.local load1=1.63 1527018816000000000", + "system,host=host.local load3=1.72 1527018806000000000", + "system,host=host.local load4=1.77 1527018806000000000", + "system,host=host.local load4=1.78 1527018816000000000", + "system,host=host.local load4=1.77 1527018826000000000", + ]; + + all_scenarios_for_one_chunk(vec![], vec![], lp, "system", partition_key).await + } +} + +// See issue: https://github.com/influxdata/influxdb_iox/issues/2691 +// +// This test adds coverage for filtering on _value when executing a read_group +// plan. +#[tokio::test] +async fn test_grouped_series_set_plan_group_field_pred_filter_on_value() { + // no predicate + let predicate = PredicateBuilder::default() + // 2018-05-22T19:53:26Z, stop: 2018-05-24T00:00:00Z + .timestamp_range(1527018806000000000, 1527120000000000000) + .add_expr(col("_value").eq(lit(1.77))) + .build(); + + let agg = Aggregate::Max; + let group_columns = vec!["_field"]; + + // Expect the data is grouped so output is sorted by measurement and then region + let expected_results = vec![ + "Group tag_keys: _field, _measurement, host partition_key_vals: load4", + "Series tags={_field=load4, _measurement=system, host=host.local}\n FloatPoints timestamps: [1527018806000000000], values: [1.77]", + ]; + + run_read_group_test_case( + MeasurementForDefect2691 {}, + predicate, + agg, + group_columns, + expected_results, + ) + .await; +} + +#[tokio::test] +async fn test_grouped_series_set_plan_group_field_pred_filter_on_multiple_value() { + // no predicate + let predicate = PredicateBuilder::default() + // 2018-05-22T19:53:26Z, stop: 2018-05-24T00:00:00Z + .timestamp_range(1527018806000000000, 1527120000000000000) + .add_expr(binary_expr( + col("_value").eq(lit(1.77)), + Operator::Or, + col("_value").eq(lit(1.72)), + )) + .build(); + + let agg = Aggregate::Max; + let group_columns = vec!["_field"]; + + // Expect the data is grouped so output is sorted by measurement and then region + let expected_results = vec![ + "Group tag_keys: _field, _measurement, host partition_key_vals: load3", + "Series tags={_field=load3, _measurement=system, host=host.local}\n FloatPoints timestamps: [1527018806000000000], values: [1.72]", + "Group tag_keys: _field, _measurement, host partition_key_vals: load4", + "Series tags={_field=load4, _measurement=system, host=host.local}\n FloatPoints timestamps: [1527018806000000000], values: [1.77]", + ]; + + run_read_group_test_case( + MeasurementForDefect2691 {}, + predicate, + agg, + group_columns, + expected_results, + ) + .await; +} + +#[tokio::test] +async fn test_grouped_series_set_plan_group_field_pred_filter_on_value_sum() { + // no predicate + let predicate = PredicateBuilder::default() + // 2018-05-22T19:53:26Z, stop: 2018-05-24T00:00:00Z + .timestamp_range(1527018806000000000, 1527120000000000000) + .add_expr(col("_value").eq(lit(1.77))) + .build(); + + let agg = Aggregate::Sum; + let group_columns = vec!["_field"]; + + // Expect the data is grouped so output is sorted by measurement and then region + let expected_results = vec![ + "Group tag_keys: _field, _measurement, host partition_key_vals: load4", + "Series tags={_field=load4, _measurement=system, host=host.local}\n FloatPoints timestamps: [1527018826000000000], values: [3.54]", + ]; + + run_read_group_test_case( + MeasurementForDefect2691 {}, + predicate, + agg, + group_columns, + expected_results, + ) + .await; +} diff --git a/query_tests/src/influxrpc/table_names.rs b/query_tests/src/influxrpc/table_names.rs index d44dcc1db4..0f881f2ce6 100644 --- a/query_tests/src/influxrpc/table_names.rs +++ b/query_tests/src/influxrpc/table_names.rs @@ -1,4 +1,5 @@ //! Tests for the Influx gRPC queries +use datafusion::logical_plan::{col, lit}; use predicate::predicate::{Predicate, PredicateBuilder, EMPTY_PREDICATE}; use query::{ exec::stringset::{IntoStringSet, StringSetRef}, @@ -24,11 +25,12 @@ where let planner = InfluxRpcPlanner::new(); let ctx = db.executor().new_context(query::exec::ExecutorType::Query); - let builder = planner + let plan = planner .table_names(db.as_ref(), predicate.clone()) .expect("built plan successfully"); + let names = ctx - .to_table_names(builder) + .to_string_set(plan) .await .expect("converted plan to strings successfully"); @@ -53,6 +55,49 @@ async fn list_table_names_no_data_pred() { run_table_names_test_case(TwoMeasurements {}, EMPTY_PREDICATE, vec!["cpu", "disk"]).await; } +#[tokio::test] +async fn list_table_names_no_data_passes() { + // no rows pass this predicate + run_table_names_test_case( + TwoMeasurementsManyFields {}, + tsp(10000000, 20000000), + vec![], + ) + .await; +} + +#[tokio::test] +async fn list_table_names_no_non_null_data_passes() { + // only a single row with a null field passes this predicate (expect no table names) + let predicate = PredicateBuilder::default() + .table("o2") + // only get last row of o2 (timestamp = 300) + .timestamp_range(200, 400) + // model predicate like _field='reading' which last row does not have + .field_columns(vec!["reading"]) + .build(); + + run_table_names_test_case(TwoMeasurementsManyFields {}, predicate, vec![]).await; +} + +#[tokio::test] +async fn list_table_names_no_non_null_general_data_passes() { + // only a single row with a null field passes this predicate + // (expect no table names) -- has a general purpose predicate to + // force a generic plan + let predicate = PredicateBuilder::default() + .table("o2") + // only get last row of o2 (timestamp = 300) + .timestamp_range(200, 400) + // model predicate like _field='reading' which last row does not have + .field_columns(vec!["reading"]) + // (state = CA) OR (temp > 50) + .add_expr(col("state").eq(lit("CA")).or(col("temp").gt(lit(50)))) + .build(); + + run_table_names_test_case(TwoMeasurementsManyFields {}, predicate, vec![]).await; +} + #[tokio::test] async fn list_table_names_no_data_pred_with_delete() { run_table_names_test_case( diff --git a/query_tests/src/pruning.rs b/query_tests/src/pruning.rs index 09b93b8ea9..011c6d6f51 100644 --- a/query_tests/src/pruning.rs +++ b/query_tests/src/pruning.rs @@ -103,11 +103,11 @@ async fn chunk_pruning_influxrpc() { let ctx = db.executor().new_context(query::exec::ExecutorType::Query); - let builder = InfluxRpcPlanner::new() + let plan = InfluxRpcPlanner::new() .table_names(db.as_ref(), predicate) .unwrap(); - let result = ctx.to_table_names(builder).await.unwrap(); + let result = ctx.to_string_set(plan).await.unwrap(); assert_eq!(&expected, result.as_ref()); diff --git a/read_buffer/Cargo.toml b/read_buffer/Cargo.toml index 88877f782d..707189e83b 100644 --- a/read_buffer/Cargo.toml +++ b/read_buffer/Cargo.toml @@ -2,7 +2,7 @@ name = "read_buffer" version = "0.1.0" authors = ["Edd Robinson "] -edition = "2018" +edition = "2021" # Note this crate is designed to be standalone, and should not depend # on the IOx Query Engine. The rationale is: diff --git a/rustfmt.toml b/rustfmt.toml index 4edcabf961..7b060483be 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,4 @@ -edition = "2018" +edition = "2021" # Unstable features not yet supported on stable Rust #wrap_comments = true diff --git a/schema/Cargo.toml b/schema/Cargo.toml index 2a6d3b085a..231d10bf80 100644 --- a/schema/Cargo.toml +++ b/schema/Cargo.toml @@ -2,7 +2,7 @@ name = "schema" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "IOx Schema definition" [dependencies] diff --git a/server/Cargo.toml b/server/Cargo.toml index eb53ec8a8a..fa2d104de3 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -2,7 +2,7 @@ name = "server" version = "0.1.0" authors = ["pauldix "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order arrow = { version = "6.0", features = ["prettyprint"] } diff --git a/server/src/application.rs b/server/src/application.rs index 79e74eb203..83e5eaad7b 100644 --- a/server/src/application.rs +++ b/server/src/application.rs @@ -4,6 +4,7 @@ use object_store::ObjectStore; use observability_deps::tracing::info; use query::exec::Executor; use time::TimeProvider; +use trace::TraceCollector; use write_buffer::config::WriteBufferConfigFactory; use crate::JobRegistry; @@ -18,13 +19,18 @@ pub struct ApplicationState { job_registry: Arc, metric_registry: Arc, time_provider: Arc, + trace_collector: Option>, } impl ApplicationState { /// Creates a new `ApplicationState` /// /// Uses number of CPUs in the system if num_worker_threads is not set - pub fn new(object_store: Arc, num_worker_threads: Option) -> Self { + pub fn new( + object_store: Arc, + num_worker_threads: Option, + trace_collector: Option>, + ) -> Self { let num_threads = num_worker_threads.unwrap_or_else(num_cpus::get); info!(%num_threads, "using specified number of threads per thread pool"); @@ -45,6 +51,7 @@ impl ApplicationState { job_registry, metric_registry, time_provider, + trace_collector, } } @@ -68,6 +75,10 @@ impl ApplicationState { &self.time_provider } + pub fn trace_collector(&self) -> &Option> { + &self.trace_collector + } + pub fn executor(&self) -> &Arc { &self.executor } diff --git a/server/src/database.rs b/server/src/database.rs index 546786ceca..2bca56b868 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -30,7 +30,6 @@ use std::{future::Future, sync::Arc, time::Duration}; use tokio::{sync::Notify, task::JoinError}; use tokio_util::sync::CancellationToken; use trace::ctx::SpanContext; -use trace::{RingBufferTraceCollector, TraceCollector}; use uuid::Uuid; const INIT_BACKOFF: Duration = Duration::from_secs(1); @@ -1312,10 +1311,8 @@ impl DatabaseStateCatalogLoaded { ) -> Result { let db = Arc::clone(&self.db); - // TODO: use proper trace collector - let trace_collector: Arc = Arc::new(RingBufferTraceCollector::new(5)); - let rules = self.provided_rules.rules(); + let trace_collector = shared.application.trace_collector(); let write_buffer_factory = shared.application.write_buffer_factory(); let write_buffer_consumer = match rules.write_buffer_connection.as_ref() { Some(connection) if matches!(connection.direction, WriteBufferDirection::Read) => { @@ -1323,7 +1320,7 @@ impl DatabaseStateCatalogLoaded { .new_config_read( shared.config.server_id, shared.config.name.as_str(), - Some(&trace_collector), + trace_collector.as_ref(), connection, ) .await @@ -1375,13 +1372,14 @@ impl DatabaseStateInitialized { #[cfg(test)] mod tests { + use crate::test_utils::make_application; + use super::*; use data_types::database_rules::{ PartitionTemplate, TemplatePart, WriteBufferConnection, WriteBufferDirection, }; use data_types::sequence::Sequence; use entry::{test_helpers::lp_to_entries, SequencedEntry}; - use object_store::ObjectStore; use std::{ convert::{TryFrom, TryInto}, num::NonZeroU32, @@ -1393,10 +1391,7 @@ mod tests { #[tokio::test] async fn database_shutdown_waits_for_jobs() { - let application = Arc::new(ApplicationState::new( - Arc::new(ObjectStore::new_in_memory()), - None, - )); + let application = make_application(); let database = Database::new( Arc::clone(&application), @@ -1454,10 +1449,7 @@ mod tests { async fn initialized_database() -> (Arc, Database) { let server_id = ServerId::try_from(1).unwrap(); - let application = Arc::new(ApplicationState::new( - Arc::new(ObjectStore::new_in_memory()), - None, - )); + let application = make_application(); let db_name = DatabaseName::new("test").unwrap(); let uuid = Uuid::new_v4(); @@ -1594,10 +1586,7 @@ mod tests { )); // setup application - let application = Arc::new(ApplicationState::new( - Arc::new(ObjectStore::new_in_memory()), - None, - )); + let application = make_application(); application .write_buffer_factory() .register_mock("my_mock".to_string(), state.clone()); diff --git a/server/src/db/chunk.rs b/server/src/db/chunk.rs index 046652cd2b..d1797ea45a 100644 --- a/server/src/db/chunk.rs +++ b/server/src/db/chunk.rs @@ -20,6 +20,7 @@ use predicate::{ }; use query::{exec::stringset::StringSet, QueryChunk, QueryChunkMeta}; use read_buffer::RBChunk; +use schema::InfluxColumnType; use schema::{selection::Selection, sort::SortKey, Schema}; use snafu::{OptionExt, ResultExt, Snafu}; use std::{ @@ -237,6 +238,31 @@ impl DbChunk { debug!(?rub_preds, "RUB delete predicates"); Ok(rub_preds) } + + /// Return true if any of the fields called for in the `predicate` + /// contain at least 1 null value. Returns false ONLY if all + /// fields that pass `predicate` are entirely non null + fn fields_have_nulls(&self, predicate: &Predicate) -> bool { + self.meta.schema.iter().any(|(influx_column_type, field)| { + if matches!(influx_column_type, Some(InfluxColumnType::Field(_))) + && predicate.should_include_field(field.name()) + { + match self.meta.table_summary.column(field.name()) { + Some(column_summary) => { + // only if this is false can we return false + column_summary.null_count() > 0 + } + None => { + // don't know the stats for this column, so assume there can be nulls + true + } + } + } else { + // not a field column + false + } + }) + } } impl QueryChunk for DbChunk { @@ -264,23 +290,12 @@ impl QueryChunk for DbChunk { return Ok(PredicateMatch::Zero); } - // TODO apply predicate pruning here... - let pred_result = match &self.state { State::MutableBuffer { chunk, .. } => { - if predicate.has_exprs() { - // TODO: Support more predicates + if predicate.has_exprs() || chunk.has_timerange(&predicate.range) { + // TODO some more work to figure out if we + // definite have / do not have result PredicateMatch::Unknown - } else if chunk.has_timerange(&predicate.range) { - // Note: this isn't precise / correct: if the - // chunk has the timerange, some other part of the - // predicate may rule out the rows, and thus - // without further work this clause should return - // "Unknown" rather than falsely claiming that - // there is at least one row: - // - // https://github.com/influxdata/influxdb_iox/issues/1590 - PredicateMatch::AtLeastOne } else { PredicateMatch::Zero } @@ -305,19 +320,21 @@ impl QueryChunk for DbChunk { // on meta-data only. This should be possible without needing to // know the execution engine the chunk is held in. if chunk.satisfies_predicate(&rb_predicate) { - PredicateMatch::AtLeastOne + // if any of the fields referred to in the + // predicate has nulls, don't know without more + // work if the rows that matched had non null values + if self.fields_have_nulls(predicate) { + PredicateMatch::Unknown + } else { + PredicateMatch::AtLeastOneNonNullField + } } else { PredicateMatch::Zero } } State::ParquetFile { chunk, .. } => { - if predicate.has_exprs() { - // TODO: Support more predicates + if predicate.has_exprs() || chunk.has_timerange(predicate.range.as_ref()) { PredicateMatch::Unknown - } else if chunk.has_timerange(predicate.range.as_ref()) { - // As above, this should really be "Unknown" rather than AtLeastOne - // for precision / correctness. - PredicateMatch::AtLeastOne } else { PredicateMatch::Zero } diff --git a/server/src/db/replay.rs b/server/src/db/replay.rs index b23278a734..1bd5c216ec 100644 --- a/server/src/db/replay.rs +++ b/server/src/db/replay.rs @@ -408,14 +408,11 @@ impl SequenceNumberSection { #[cfg(test)] mod tests { use super::*; - - use std::{ - convert::TryFrom, - num::{NonZeroU32, NonZeroU64, NonZeroUsize}, - sync::Arc, - time::{Duration, Instant}, + use crate::{ + lifecycle::LifecycleWorker, + utils::{TestDb, TestDbBuilder}, + write_buffer::WriteBufferConsumer, }; - use arrow_util::assert_batches_eq; use data_types::{ database_rules::{PartitionTemplate, Partitioner, TemplatePart}, @@ -432,16 +429,18 @@ mod tests { min_max_sequence::OptionalMinMaxSequence, }; use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner}; + use std::{ + convert::TryFrom, + num::{NonZeroU32, NonZeroU64, NonZeroUsize}, + sync::Arc, + time::{Duration, Instant}, + }; use test_helpers::{assert_contains, assert_not_contains, tracing::TracingCapture}; use time::{Time, TimeProvider}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use write_buffer::mock::{MockBufferForReading, MockBufferSharedState}; - use crate::lifecycle::LifecycleWorker; - use crate::utils::TestDb; - use crate::write_buffer::WriteBufferConsumer; - #[derive(Debug)] struct TestSequencedEntry { sequencer_id: u32, @@ -572,15 +571,17 @@ mod tests { let write_buffer_state = MockBufferSharedState::empty_with_n_sequencers(self.n_sequencers); - let (mut test_db, mut shutdown, mut join_handle) = Self::create_test_db( + let test_db_builder = Self::create_test_db_builder( Arc::clone(&object_store), server_id, db_name, partition_template.clone(), self.catalog_transactions_until_checkpoint, Arc::::clone(&time), - ) - .await; + ); + + let (mut test_db, mut shutdown, mut join_handle) = + Self::create_test_db(&test_db_builder).await; let mut lifecycle = LifecycleWorker::new(Arc::clone(&test_db.db)); @@ -620,15 +621,8 @@ mod tests { drop(test_db); // then create new one - let (test_db_tmp, shutdown_tmp, join_handle_tmp) = Self::create_test_db( - Arc::clone(&object_store), - server_id, - db_name, - partition_template.clone(), - self.catalog_transactions_until_checkpoint, - Arc::::clone(&time), - ) - .await; + let (test_db_tmp, shutdown_tmp, join_handle_tmp) = + Self::create_test_db(&test_db_builder).await; test_db = test_db_tmp; shutdown = shutdown_tmp; join_handle = join_handle_tmp; @@ -759,14 +753,29 @@ mod tests { } async fn create_test_db( + builder: &TestDbBuilder, + ) -> (TestDb, CancellationToken, JoinHandle<()>) { + let test_db = builder.build().await; + // start background worker + + let shutdown: CancellationToken = Default::default(); + let shutdown_captured = shutdown.clone(); + let db_captured = Arc::clone(&test_db.db); + let join_handle = + tokio::spawn(async move { db_captured.background_worker(shutdown_captured).await }); + + (test_db, shutdown, join_handle) + } + + fn create_test_db_builder( object_store: Arc, server_id: ServerId, db_name: &'static str, partition_template: PartitionTemplate, catalog_transactions_until_checkpoint: NonZeroU64, time_provider: Arc, - ) -> (TestDb, CancellationToken, JoinHandle<()>) { - let test_db = TestDb::builder() + ) -> TestDbBuilder { + TestDb::builder() .object_store(object_store) .server_id(server_id) .lifecycle_rules(data_types::database_rules::LifecycleRules { @@ -779,17 +788,6 @@ mod tests { .partition_template(partition_template) .time_provider(time_provider) .db_name(db_name) - .build() - .await; - - // start background worker - let shutdown: CancellationToken = Default::default(); - let shutdown_captured = shutdown.clone(); - let db_captured = Arc::clone(&test_db.db); - let join_handle = - tokio::spawn(async move { db_captured.background_worker(shutdown_captured).await }); - - (test_db, shutdown, join_handle) } /// Evaluates given checks. diff --git a/server/src/lib.rs b/server/src/lib.rs index 369f4cd82e..c3a26547f7 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -876,6 +876,9 @@ where // immediately to the client and abort all other outstanding requests. futures_util::future::try_join_all(sharded_entries.into_iter().map( |sharded_entry| async { + // capture entire entry in closure + let sharded_entry = sharded_entry; + let sink = match &rules.routing_rules { Some(RoutingRules::ShardConfig(shard_config)) => { let id = sharded_entry.shard_id.expect("sharded entry"); @@ -1189,7 +1192,13 @@ async fn maybe_initialize_server(shared: &ServerShared) { init_ready.server_id, ) .await - .map_err(|e| InitError::GetServerConfig { source: e }) + .or_else(|e| match e { + // If this is the first time starting up this server and there is no config file yet, + // this isn't a problem. Start an empty server config. + object_store::Error::NotFound { .. } => Ok(bytes::Bytes::new()), + // Any other error is a problem. + _ => Err(InitError::GetServerConfig { source: e }), + }) .and_then(|config| { generated_types::server_config::decode_persisted_server_config(config) .map_err(|e| InitError::DeserializeServerConfig { source: e }) @@ -1204,21 +1213,9 @@ async fn maybe_initialize_server(shared: &ServerShared) { location, ) }) - .collect() + .collect::>() }); - // TODO: This is a temporary fallback until the transition to server config files being the - // source of truth for database name and location is finished. - let maybe_databases = match maybe_databases { - Ok(maybe) => Ok(maybe), - Err(_) => IoxObjectStore::list_possible_databases( - shared.application.object_store(), - init_ready.server_id, - ) - .await - .map_err(|e| InitError::ListDatabases { source: e }), - }; - let next_state = match maybe_databases { Ok(databases) => { let mut state = ServerStateInitialized { @@ -1352,6 +1349,7 @@ pub mod test_utils { Arc::new(ApplicationState::new( Arc::new(ObjectStore::new_in_memory()), None, + None, )) } @@ -1630,7 +1628,7 @@ mod tests { } #[tokio::test] - async fn load_databases_and_transition_to_server_config() { + async fn load_databases() { let application = make_application(); let server = make_server(Arc::clone(&application)); @@ -1662,13 +1660,6 @@ mod tests { .await .expect("cannot delete rules file"); - // delete server config file - this is not something that's supposed to happen but is - // what will happen during the transition to using the server config file - let mut path = application.object_store().new_path(); - path.push_dir(server_id.to_string()); - path.set_file_name("config.pb"); - application.object_store().delete(&path).await.unwrap(); - let server = make_server(Arc::clone(&application)); server.set_id(ServerId::try_from(1).unwrap()).unwrap(); server.wait_for_init().await.unwrap(); @@ -2091,15 +2082,19 @@ mod tests { async fn init_error_generic() { // use an object store that will hopefully fail to read let store = Arc::new(ObjectStore::new_failing_store().unwrap()); - let application = Arc::new(ApplicationState::new(store, None)); + let application = Arc::new(ApplicationState::new(store, None, None)); let server = make_server(application); server.set_id(ServerId::try_from(1).unwrap()).unwrap(); let err = server.wait_for_init().await.unwrap_err(); - assert!(matches!(err.as_ref(), InitError::ListDatabases { .. })); + assert!( + matches!(err.as_ref(), InitError::GetServerConfig { .. }), + "got: {:?}", + err + ); assert_contains!( server.server_init_error().unwrap().to_string(), - "error listing databases in object storage:" + "error getting server config from object storage:" ); } @@ -2217,24 +2212,27 @@ mod tests { let foo_db_name = DatabaseName::new("foo").unwrap(); - // create a directory in object storage that looks like it could - // be a database directory, but doesn't have any valid generation - // directories in it - let mut fake_db_path = application.object_store().new_path(); - fake_db_path.push_all_dirs(&[server_id.to_string().as_str(), foo_db_name.as_str()]); - let mut not_generation_file = fake_db_path.clone(); - not_generation_file.set_file_name("not-a-generation"); - application - .object_store() - .put(¬_generation_file, Bytes::new()) - .await - .unwrap(); - // start server let server = make_server(Arc::clone(&application)); server.set_id(server_id).unwrap(); server.wait_for_init().await.unwrap(); + // create database + create_simple_database(&server, &foo_db_name) + .await + .expect("failed to create database"); + + // delete database + server + .delete_database(&foo_db_name) + .await + .expect("failed to delete database"); + + // restart server + let server = make_server(Arc::clone(&application)); + server.set_id(server_id).unwrap(); + server.wait_for_init().await.unwrap(); + // generic error MUST NOT be set assert!(server.server_init_error().is_none()); diff --git a/server/src/write_buffer.rs b/server/src/write_buffer.rs index c00cf172d5..4078f75a0a 100644 --- a/server/src/write_buffer.rs +++ b/server/src/write_buffer.rs @@ -10,6 +10,7 @@ use tokio_util::sync::CancellationToken; use entry::SequencedEntry; use observability_deps::tracing::{debug, error, info, warn}; +use trace::span::SpanRecorder; use write_buffer::core::{FetchHighWatermark, WriteBufferError, WriteBufferReading}; use crate::Db; @@ -151,12 +152,20 @@ async fn stream_in_sequenced_entries<'a>( // store entry let mut logged_hard_limit = false; loop { + let mut span_recorder = SpanRecorder::new( + sequenced_entry + .span_context() + .map(|parent| parent.child("IOx write buffer")), + ); + match db.store_sequenced_entry( Arc::clone(&sequenced_entry), crate::db::filter_table_batch_keep_all, ) { Ok(_) => { metrics.success(); + span_recorder.ok("stored entry"); + break; } Err(crate::db::Error::HardLimitReached {}) => { @@ -169,6 +178,8 @@ async fn stream_in_sequenced_entries<'a>( ); logged_hard_limit = true; } + span_recorder.error("hard limit reached"); + tokio::time::sleep(Duration::from_millis(100)).await; continue; } @@ -179,6 +190,7 @@ async fn stream_in_sequenced_entries<'a>( sequencer_id, "Error storing SequencedEntry from write buffer in database" ); + span_recorder.error("cannot store entry"); // no retry break; diff --git a/server_benchmarks/Cargo.toml b/server_benchmarks/Cargo.toml index d90c79527d..401c6898bc 100644 --- a/server_benchmarks/Cargo.toml +++ b/server_benchmarks/Cargo.toml @@ -2,7 +2,7 @@ name = "server_benchmarks" version = "0.1.0" authors = ["Andrew Lamb "] -edition = "2018" +edition = "2021" description = "Server related bechmarks, grouped into their own crate to minimize build dev build times" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/server_benchmarks/benches/line_parser.rs b/server_benchmarks/benches/line_parser.rs index 42eef49fac..44f6c2bc96 100644 --- a/server_benchmarks/benches/line_parser.rs +++ b/server_benchmarks/benches/line_parser.rs @@ -1,7 +1,7 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use std::time::Duration; -static LINES: &str = include_str!("../../tests/fixtures/lineproto/prometheus.lp"); +static LINES: &str = include_str!("../../test_fixtures/lineproto/prometheus.lp"); fn line_parser(c: &mut Criterion) { let mut group = c.benchmark_group("line_parser"); diff --git a/server_benchmarks/benches/read_filter.rs b/server_benchmarks/benches/read_filter.rs index c57c1742d6..e871151bf8 100644 --- a/server_benchmarks/benches/read_filter.rs +++ b/server_benchmarks/benches/read_filter.rs @@ -40,7 +40,7 @@ use server::db::Db; // In total there are 10K rows. The timespan of the points in the line // protocol is around 1m of wall-clock time. async fn setup_scenarios() -> Vec { - let raw = include_bytes!("../../tests/fixtures/lineproto/read_filter.lp.gz"); + let raw = include_bytes!("../../test_fixtures/lineproto/read_filter.lp.gz"); let mut gz = GzDecoder::new(&raw[..]); let mut lp = String::new(); gz.read_to_string(&mut lp).unwrap(); diff --git a/server_benchmarks/benches/read_group.rs b/server_benchmarks/benches/read_group.rs index 2eb29af995..a44c01edb1 100644 --- a/server_benchmarks/benches/read_group.rs +++ b/server_benchmarks/benches/read_group.rs @@ -41,7 +41,7 @@ use server::db::Db; // In total there are 10K rows. The timespan of the points in the line // protocol is around 1m of wall-clock time. async fn setup_scenarios() -> Vec { - let raw = include_bytes!("../../tests/fixtures/lineproto/read_filter.lp.gz"); + let raw = include_bytes!("../../test_fixtures/lineproto/read_filter.lp.gz"); let mut gz = GzDecoder::new(&raw[..]); let mut lp = String::new(); gz.read_to_string(&mut lp).unwrap(); diff --git a/server_benchmarks/benches/snapshot.rs b/server_benchmarks/benches/snapshot.rs index d971b05a1a..490d2205c5 100644 --- a/server_benchmarks/benches/snapshot.rs +++ b/server_benchmarks/benches/snapshot.rs @@ -12,7 +12,7 @@ fn snapshot_chunk(chunk: &MBChunk) { fn chunk(count: usize) -> MBChunk { let mut chunk: Option = None; - let raw = include_bytes!("../../tests/fixtures/lineproto/tag_values.lp.gz"); + let raw = include_bytes!("../../test_fixtures/lineproto/tag_values.lp.gz"); let mut gz = GzDecoder::new(&raw[..]); let mut lp = String::new(); gz.read_to_string(&mut lp).unwrap(); diff --git a/server_benchmarks/benches/tag_values.rs b/server_benchmarks/benches/tag_values.rs index d7c43871f8..0b6d177551 100644 --- a/server_benchmarks/benches/tag_values.rs +++ b/server_benchmarks/benches/tag_values.rs @@ -38,7 +38,7 @@ use server::db::Db; // The timespan of the points in the line protocol is around 1m or wall-clock // time. async fn setup_scenarios() -> Vec { - let raw = include_bytes!("../../tests/fixtures/lineproto/tag_values.lp.gz"); + let raw = include_bytes!("../../test_fixtures/lineproto/tag_values.lp.gz"); let mut gz = GzDecoder::new(&raw[..]); let mut lp = String::new(); gz.read_to_string(&mut lp).unwrap(); diff --git a/server_benchmarks/benches/write.rs b/server_benchmarks/benches/write.rs index 90d04f5e95..fc6441373e 100644 --- a/server_benchmarks/benches/write.rs +++ b/server_benchmarks/benches/write.rs @@ -33,7 +33,7 @@ fn write_chunk(count: usize, entries: &[Entry]) { } fn load_entries() -> Vec { - let raw = include_bytes!("../../tests/fixtures/lineproto/tag_values.lp.gz"); + let raw = include_bytes!("../../test_fixtures/lineproto/tag_values.lp.gz"); let mut gz = GzDecoder::new(&raw[..]); let mut lp = String::new(); gz.read_to_string(&mut lp).unwrap(); diff --git a/tests/fixtures/000000000000005-000000002.tsm.gz b/test_fixtures/000000000000005-000000002.tsm.gz similarity index 100% rename from tests/fixtures/000000000000005-000000002.tsm.gz rename to test_fixtures/000000000000005-000000002.tsm.gz diff --git a/tests/fixtures/000000000000462-000000002.tsm.gz b/test_fixtures/000000000000462-000000002.tsm.gz similarity index 100% rename from tests/fixtures/000000000000462-000000002.tsm.gz rename to test_fixtures/000000000000462-000000002.tsm.gz diff --git a/tests/fixtures/cpu_usage.tsm.gz b/test_fixtures/cpu_usage.tsm.gz similarity index 100% rename from tests/fixtures/cpu_usage.tsm.gz rename to test_fixtures/cpu_usage.tsm.gz diff --git a/tests/fixtures/lineproto/air_and_water.lp b/test_fixtures/lineproto/air_and_water.lp similarity index 100% rename from tests/fixtures/lineproto/air_and_water.lp rename to test_fixtures/lineproto/air_and_water.lp diff --git a/tests/fixtures/lineproto/metrics.lp b/test_fixtures/lineproto/metrics.lp similarity index 100% rename from tests/fixtures/lineproto/metrics.lp rename to test_fixtures/lineproto/metrics.lp diff --git a/tests/fixtures/lineproto/prometheus.lp b/test_fixtures/lineproto/prometheus.lp similarity index 100% rename from tests/fixtures/lineproto/prometheus.lp rename to test_fixtures/lineproto/prometheus.lp diff --git a/tests/fixtures/lineproto/read_filter.lp.gz b/test_fixtures/lineproto/read_filter.lp.gz similarity index 100% rename from tests/fixtures/lineproto/read_filter.lp.gz rename to test_fixtures/lineproto/read_filter.lp.gz diff --git a/tests/fixtures/lineproto/tag_values.lp.gz b/test_fixtures/lineproto/tag_values.lp.gz similarity index 100% rename from tests/fixtures/lineproto/tag_values.lp.gz rename to test_fixtures/lineproto/tag_values.lp.gz diff --git a/tests/fixtures/lineproto/temperature.lp b/test_fixtures/lineproto/temperature.lp similarity index 100% rename from tests/fixtures/lineproto/temperature.lp rename to test_fixtures/lineproto/temperature.lp diff --git a/tests/fixtures/merge-tsm/merge_a.tsm.gz b/test_fixtures/merge-tsm/merge_a.tsm.gz similarity index 100% rename from tests/fixtures/merge-tsm/merge_a.tsm.gz rename to test_fixtures/merge-tsm/merge_a.tsm.gz diff --git a/tests/fixtures/merge-tsm/merge_b.tsm.gz b/test_fixtures/merge-tsm/merge_b.tsm.gz similarity index 100% rename from tests/fixtures/merge-tsm/merge_b.tsm.gz rename to test_fixtures/merge-tsm/merge_b.tsm.gz diff --git a/tests/fixtures/parquet/temperature.parquet b/test_fixtures/parquet/temperature.parquet similarity index 100% rename from tests/fixtures/parquet/temperature.parquet rename to test_fixtures/parquet/temperature.parquet diff --git a/test_helpers/Cargo.toml b/test_helpers/Cargo.toml index 4ccfa8f678..ddfcc04610 100644 --- a/test_helpers/Cargo.toml +++ b/test_helpers/Cargo.toml @@ -2,11 +2,11 @@ name = "test_helpers" version = "0.1.0" authors = ["Paul Dix "] -edition = "2018" +edition = "2021" [dependencies] # In alphabetical order dotenv = "0.15.0" parking_lot = "0.11.2" tempfile = "3.1.0" -tracing-subscriber = "0.2" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } observability_deps = { path = "../observability_deps" } diff --git a/time/Cargo.toml b/time/Cargo.toml index 0557fda4c0..f1b4c9075b 100644 --- a/time/Cargo.toml +++ b/time/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "time" version = "0.1.0" -edition = "2018" +edition = "2021" description = "Time functionality for IOx" [dependencies] diff --git a/trace/Cargo.toml b/trace/Cargo.toml index 1e1d0f2527..c5173ac459 100644 --- a/trace/Cargo.toml +++ b/trace/Cargo.toml @@ -2,7 +2,7 @@ name = "trace" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" description = "Distributed tracing support within IOx" [dependencies] diff --git a/trace_exporters/Cargo.toml b/trace_exporters/Cargo.toml index c9e3d07f60..1f91219680 100644 --- a/trace_exporters/Cargo.toml +++ b/trace_exporters/Cargo.toml @@ -2,7 +2,7 @@ name = "trace_exporters" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" description = "Additional tracing exporters for IOx" [dependencies] diff --git a/trace_http/Cargo.toml b/trace_http/Cargo.toml index 74ef6b9ef6..311a636f93 100644 --- a/trace_http/Cargo.toml +++ b/trace_http/Cargo.toml @@ -2,7 +2,7 @@ name = "trace_http" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" description = "Distributed tracing support for HTTP services" [dependencies] diff --git a/tracker/Cargo.toml b/tracker/Cargo.toml index 66ab6b6f48..3a71bcd1fc 100644 --- a/tracker/Cargo.toml +++ b/tracker/Cargo.toml @@ -2,7 +2,7 @@ name = "tracker" version = "0.1.0" authors = ["Raphael Taylor-Davies "] -edition = "2018" +edition = "2021" description = "Utilities for tracking resource utilisation within IOx" [dependencies] diff --git a/trogging/Cargo.toml b/trogging/Cargo.toml index 0647192fab..c44c0bc164 100644 --- a/trogging/Cargo.toml +++ b/trogging/Cargo.toml @@ -2,7 +2,7 @@ name = "trogging" version = "0.1.0" authors = ["Marko Mikulicic "] -edition = "2018" +edition = "2021" description = "IOx logging pipeline built upon tokio-tracing" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -12,7 +12,7 @@ logfmt = { path = "../logfmt" } observability_deps = { path = "../observability_deps" } thiserror = "1.0.30" tracing-log = "0.1" -tracing-subscriber = "0.2" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } structopt = { version = "0.3.25", optional = true } [dev-dependencies] diff --git a/trogging/src/cli.rs b/trogging/src/cli.rs index c0538d14f9..e1166fd30c 100644 --- a/trogging/src/cli.rs +++ b/trogging/src/cli.rs @@ -105,7 +105,7 @@ impl LoggingConfig { pub fn with_builder(&self, builder: Builder) -> Builder where - W: MakeWriter + Send + Sync + Clone + 'static, + W: for<'writer> MakeWriter<'writer> + Send + Sync + Clone + 'static, { builder .with_log_filter(&self.log_filter) @@ -129,7 +129,7 @@ pub trait LoggingConfigBuilderExt { impl LoggingConfigBuilderExt for Builder where - W: MakeWriter + Send + Sync + Clone + 'static, + W: for<'writer> MakeWriter<'writer> + Send + Sync + Clone + 'static, { fn with_logging_config(self, config: &LoggingConfig) -> Builder { config.with_builder(self) diff --git a/trogging/src/lib.rs b/trogging/src/lib.rs index 95f1f32abf..dc3c7bde92 100644 --- a/trogging/src/lib.rs +++ b/trogging/src/lib.rs @@ -86,7 +86,7 @@ impl Builder { impl Builder { pub fn with_writer(self, make_writer: W2) -> Builder where - W2: MakeWriter + Send + Sync + 'static, + W2: for<'writer> MakeWriter<'writer> + Send + Sync + 'static, { Builder:: { make_writer, @@ -103,7 +103,7 @@ impl Builder { // This needs to be a separate impl block because they place different bounds on the type parameters. impl Builder where - W: MakeWriter + Send + Sync + 'static, + W: for<'writer> MakeWriter<'writer> + Send + Sync + 'static, { pub const DEFAULT_LOG_FILTER: &'static str = "warn"; @@ -277,17 +277,30 @@ impl Drop for TroggingGuard { fn make_writer(m: M) -> BoxMakeWriter where - M: MakeWriter + Send + Sync + 'static, + M: for<'writer> MakeWriter<'writer> + Send + Sync + 'static, { - fmt::writer::BoxMakeWriter::new(move || { - std::io::LineWriter::with_capacity( - MAX_LINE_LENGTH, - LimitedWriter(MAX_LINE_LENGTH, m.make_writer()), - ) + BoxMakeWriter::new(MakeWriterHelper { + inner: BoxMakeWriter::new(m), }) } +struct MakeWriterHelper { + inner: BoxMakeWriter, +} + +impl<'a> MakeWriter<'a> for MakeWriterHelper { + type Writer = Box; + + fn make_writer(&'a self) -> Self::Writer { + Box::new(std::io::LineWriter::with_capacity( + MAX_LINE_LENGTH, + LimitedWriter(MAX_LINE_LENGTH, self.inner.make_writer()), + )) + } +} + struct LimitedWriter(usize, W); + impl Write for LimitedWriter { fn write(&mut self, buf: &[u8]) -> std::io::Result { if buf.is_empty() { @@ -341,7 +354,7 @@ pub mod test_util { } } - impl MakeWriter for TestWriter { + impl MakeWriter<'_> for TestWriter { type Writer = SynchronizedWriter>; fn make_writer(&self) -> Self::Writer { @@ -356,9 +369,9 @@ pub mod test_util { /// Removes non-determinism by removing timestamps from the log lines. /// It supports the built-in tracing timestamp format and the logfmt timestamps. pub fn without_timestamps(&self) -> String { - // logfmt or fmt::layer() time format + // logfmt (e.g. `time=12345`) or fmt::layer() (e.g. `2021-10-25T13:48:50.555258`) time format let timestamp = regex::Regex::new( - r"(?m)( ?time=[0-9]+|^([A-Z][a-z]{2}) \d{1,2} \d{2}:\d{2}:\d{2}.\d{3} *)", + r"(?m)( ?time=[0-9]+|^(\d{4})-\d{1,2}-\d{1,2}T\d{2}:\d{2}:\d{2}.\d+Z *)", ) .unwrap(); timestamp.replace_all(&self.to_string(), "").to_string() @@ -379,7 +392,7 @@ pub mod test_util { /// the logging macros invoked by the function. pub fn log_test(builder: Builder, f: F) -> Captured where - W: MakeWriter + Send + Sync + 'static, + W: for<'writer> MakeWriter<'writer> + Send + Sync + 'static, F: Fn(), { let (writer, output) = TestWriter::new(); @@ -401,7 +414,7 @@ pub mod test_util { /// and returns the captured output. pub fn simple_test(builder: Builder) -> Captured where - W: MakeWriter + Send + Sync + 'static, + W: for<'writer> MakeWriter<'writer> + Send + Sync + 'static, { log_test(builder, || { error!("foo"); @@ -598,7 +611,8 @@ ERROR foo #[test] fn line_buffering() { let (test_writer, captured) = TestWriter::new(); - let mut writer = make_writer(test_writer).make_writer(); + let mw = make_writer(test_writer); + let mut writer = mw.make_writer(); writer.write_all("foo".as_bytes()).unwrap(); // wasn't flushed yet because there was no newline yet assert_eq!(captured.to_string(), ""); @@ -611,7 +625,8 @@ ERROR foo // another case when the line buffer flushes even before a newline is when the internal buffer limit let (test_writer, captured) = TestWriter::new(); - let mut writer = make_writer(test_writer).make_writer(); + let mw = make_writer(test_writer); + let mut writer = mw.make_writer(); let long = std::iter::repeat(b'X') .take(MAX_LINE_LENGTH) .collect::>(); diff --git a/write_buffer/Cargo.toml b/write_buffer/Cargo.toml index a6fbda7608..2afe4a73b4 100644 --- a/write_buffer/Cargo.toml +++ b/write_buffer/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "write_buffer" version = "0.1.0" -edition = "2018" +edition = "2021" [dependencies] async-trait = "0.1" diff --git a/write_buffer/src/file.rs b/write_buffer/src/file.rs index 630e2749e8..ac99e3dc1b 100644 --- a/write_buffer/src/file.rs +++ b/write_buffer/src/file.rs @@ -318,12 +318,7 @@ impl WriteBufferReading for FileBufferConsumer { let fetch_high_watermark = move || { let committed = committed.clone(); - let fut = async move { - let files = scan_dir::(&committed, FileType::File).await?; - let watermark = files.keys().max().map(|n| n + 1).unwrap_or(0); - - Ok(watermark) - }; + let fut = async move { watermark(&committed).await }; fut.boxed() as FetchHighWatermarkFut<'_> }; let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>; @@ -385,40 +380,67 @@ impl ConsumerStream { // read file let file_path = path.join(sequence_number.to_string()); - let data = match tokio::fs::read(&file_path).await { - Ok(data) => data, - Err(_) => { - // just wait a bit - tokio::time::sleep(std::time::Duration::from_secs(1)).await; - continue; - } - }; - - // decode file - let sequence = Sequence { - id: sequencer_id, - number: sequence_number, - }; - let msg = match Self::decode_file(data, sequence, trace_collector.clone()) { - Ok(sequence) => { - match next_sequence_number.compare_exchange( - sequence_number, - sequence_number + 1, - Ordering::SeqCst, - Ordering::SeqCst, - ) { - Ok(_) => { - // can send to output - Ok(sequence) + let msg = match tokio::fs::read(&file_path).await { + Ok(data) => { + // decode file + let sequence = Sequence { + id: sequencer_id, + number: sequence_number, + }; + match Self::decode_file(data, sequence, trace_collector.clone()) { + Ok(sequence) => { + match next_sequence_number.compare_exchange( + sequence_number, + sequence_number + 1, + Ordering::SeqCst, + Ordering::SeqCst, + ) { + Ok(_) => { + // can send to output + Ok(sequence) + } + Err(_) => { + // interleaving change, retry + continue; + } + } } - Err(_) => { - // interleaving change, retry + e => e, + } + } + Err(error) => { + match error.kind() { + std::io::ErrorKind::NotFound => { + // figure out watermark and see if there's a gap in the stream + if let Ok(watermark) = watermark(&path).await { + // watermark is "last sequence number + 1", so substract 1 before comparing + if watermark.saturating_sub(1) > sequence_number { + // update position + // failures are OK here since we'll re-read this value next round + next_sequence_number + .compare_exchange( + sequence_number, + sequence_number + 1, + Ordering::SeqCst, + Ordering::SeqCst, + ) + .ok(); + continue; + } + }; + + // no gap detected, just wait a bit for new data + tokio::time::sleep(std::time::Duration::from_secs(1)).await; continue; } + _ => { + // cannot read file => communicate to user + Err(Box::new(error) as WriteBufferError) + } } } - e => e, }; + if tx.send(msg).await.is_err() { // Receiver is gone return; @@ -627,15 +649,46 @@ where Ok(results) } +async fn watermark(path: &Path) -> Result { + let files = scan_dir::(path, FileType::File).await?; + let watermark = files.keys().max().map(|n| n + 1).unwrap_or(0); + Ok(watermark) +} + +pub mod test_utils { + use std::path::Path; + + /// Remove specific entry from write buffer. + pub async fn remove_entry( + write_buffer_path: &Path, + database_name: &str, + sequencer_id: u32, + sequence_number: u64, + ) { + tokio::fs::remove_file( + write_buffer_path + .join(database_name) + .join("active") + .join(sequencer_id.to_string()) + .join("committed") + .join(sequence_number.to_string()), + ) + .await + .unwrap() + } +} + #[cfg(test)] mod tests { use std::num::NonZeroU32; + use entry::test_helpers::lp_to_entry; use tempfile::TempDir; use trace::RingBufferTraceCollector; use crate::core::test_utils::{perform_generic_tests, TestAdapter, TestContext}; + use super::test_utils::remove_entry; use super::*; struct FileTestAdapter { @@ -717,4 +770,99 @@ mod tests { async fn test_generic() { perform_generic_tests(FileTestAdapter::new()).await; } + + #[tokio::test] + async fn test_ignores_missing_files_multi() { + let adapter = FileTestAdapter::new(); + let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await; + + let writer = ctx.writing(true).await.unwrap(); + let sequencer_id = writer.sequencer_ids().into_iter().next().unwrap(); + let entry_1 = lp_to_entry("upc,region=east user=1 100"); + let entry_2 = lp_to_entry("upc,region=east user=2 200"); + let entry_3 = lp_to_entry("upc,region=east user=3 300"); + let entry_4 = lp_to_entry("upc,region=east user=4 400"); + let (sequence_1, _) = writer + .store_entry(&entry_1, sequencer_id, None) + .await + .unwrap(); + let (sequence_2, _) = writer + .store_entry(&entry_2, sequencer_id, None) + .await + .unwrap(); + let (sequence_3, _) = writer + .store_entry(&entry_3, sequencer_id, None) + .await + .unwrap(); + let (sequence_4, _) = writer + .store_entry(&entry_4, sequencer_id, None) + .await + .unwrap(); + + remove_entry( + &ctx.path, + &ctx.database_name, + sequencer_id, + sequence_2.number, + ) + .await; + remove_entry( + &ctx.path, + &ctx.database_name, + sequencer_id, + sequence_3.number, + ) + .await; + + let mut reader = ctx.reading(true).await.unwrap(); + let mut stream = reader.streams().remove(&sequencer_id).unwrap(); + let sequenced_entry_1 = stream.stream.next().await.unwrap().unwrap(); + let sequenced_entry_4 = stream.stream.next().await.unwrap().unwrap(); + assert_eq!( + sequence_1.number, + sequenced_entry_1.sequence().unwrap().number + ); + assert_eq!( + sequence_4.number, + sequenced_entry_4.sequence().unwrap().number + ); + assert_eq!(&entry_1, sequenced_entry_1.entry()); + assert_eq!(&entry_4, sequenced_entry_4.entry()); + } + + #[tokio::test] + async fn test_ignores_missing_files_single() { + let adapter = FileTestAdapter::new(); + let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await; + + let writer = ctx.writing(true).await.unwrap(); + let sequencer_id = writer.sequencer_ids().into_iter().next().unwrap(); + let entry_1 = lp_to_entry("upc,region=east user=1 100"); + let entry_2 = lp_to_entry("upc,region=east user=2 200"); + let (sequence_1, _) = writer + .store_entry(&entry_1, sequencer_id, None) + .await + .unwrap(); + let (sequence_2, _) = writer + .store_entry(&entry_2, sequencer_id, None) + .await + .unwrap(); + + remove_entry( + &ctx.path, + &ctx.database_name, + sequencer_id, + sequence_1.number, + ) + .await; + + let mut reader = ctx.reading(true).await.unwrap(); + let mut stream = reader.streams().remove(&sequencer_id).unwrap(); + let sequenced_entry_2 = stream.stream.next().await.unwrap().unwrap(); + assert_eq!( + sequence_2.number, + sequenced_entry_2.sequence().unwrap().number + ); + assert_eq!(&entry_2, sequenced_entry_2.entry()); + } }