From 8eece6135bd4e357e6983be78f5d709c15c2edd5 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 19 May 2022 14:39:05 -0400 Subject: [PATCH 01/12] chore: Update to Rust 1.61 --- rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 1e0347218d..4e832fcec3 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.60" +channel = "1.61" components = [ "rustfmt", "clippy" ] From 5fcf18cc029913b2ac900702fb7ef1deca06d22b Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 19 May 2022 14:39:51 -0400 Subject: [PATCH 02/12] fix: Add missing assert call around contains tests `contains` is now must_use. Thanks Rust! --- compactor/src/compact.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compactor/src/compact.rs b/compactor/src/compact.rs index deb614c4b8..02cb7584f4 100644 --- a/compactor/src/compact.rs +++ b/compactor/src/compact.rs @@ -1887,8 +1887,8 @@ mod tests { // They should be 2 groups assert_eq!(groups.len(), 2, "There should have been two group"); - groups[0].parquet_files.contains(&pf1); - groups[1].parquet_files.contains(&pf2); + assert!(groups[0].parquet_files.contains(&pf1)); + assert!(groups[1].parquet_files.contains(&pf2)); } #[test] From 792c394cf26ae59110cf4a55f44501a18fd1161a Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 19 May 2022 14:50:02 -0400 Subject: [PATCH 03/12] fix: Update expected value to new debug formatting Debug formatting is always considered unstable. This changed in Rust 1.61. References: - https://github.com/rust-lang/rust/issues/95732 - https://github.com/rust-lang/rust/pull/95345 --- logfmt/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logfmt/src/lib.rs b/logfmt/src/lib.rs index 408a897d29..f87581dfac 100644 --- a/logfmt/src/lib.rs +++ b/logfmt/src/lib.rs @@ -392,7 +392,7 @@ mod test { fn quote_not_printable() { assert_eq!(quote_and_escape("foo\nbar"), r#""foo\nbar""#); assert_eq!(quote_and_escape("foo\r\nbar"), r#""foo\r\nbar""#); - assert_eq!(quote_and_escape("foo\0bar"), r#""foo\u{0}bar""#); + assert_eq!(quote_and_escape("foo\0bar"), r#""foo\0bar""#); } #[test] From 53a94c4c7b2c162258e06b84a29bfd1d64b1b43b Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 19 May 2022 15:20:09 -0400 Subject: [PATCH 04/12] fix: Don't use clippy::use_self in influxdb2_client due to false positive See https://github.com/rust-lang/rust-clippy/issues/6902 It's an interaction between clippy and serde; the lint produces confusing and incorrect warnings. --- influxdb2_client/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/influxdb2_client/src/lib.rs b/influxdb2_client/src/lib.rs index 6e36bb3fe5..3f53c11459 100644 --- a/influxdb2_client/src/lib.rs +++ b/influxdb2_client/src/lib.rs @@ -1,10 +1,11 @@ #![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] +// `clippy::use_self` is deliberately excluded from the lints this crate uses. +// See . #![warn( missing_copy_implementations, missing_debug_implementations, missing_docs, clippy::explicit_iter_loop, - clippy::use_self, clippy::clone_on_ref_ptr, clippy::future_not_send )] From b2279fae3984a29e73a7070d0b99ae24675eb606 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 19 May 2022 15:26:56 -0400 Subject: [PATCH 05/12] fix: Don't lint on holding lock across await points in tracker tests These have explicit drops that Clippy isn't recognizing; none of these are actually being held across await points. See . --- tracker/src/lock.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tracker/src/lock.rs b/tracker/src/lock.rs index 88bf3f69be..0f067dfdc2 100644 --- a/tracker/src/lock.rs +++ b/tracker/src/lock.rs @@ -294,9 +294,12 @@ unsafe impl lock_api::RawRwLockUpgrade #[cfg(test)] mod tests { - use std::time::Duration; + // Clippy isn't recognizing the explicit drops; none of these locks are actually being held + // across await points. See + #![allow(clippy::await_holding_lock)] use super::*; + use std::time::Duration; #[test] fn test_counts() { From 15ee3f326f1b2ecc609d854ad08fc59f7b4c5156 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 19 May 2022 15:40:33 -0400 Subject: [PATCH 06/12] fix: Rearrange lock usage to not hold lock across an await point --- influxdb2_client/tests/common/server_fixture.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/influxdb2_client/tests/common/server_fixture.rs b/influxdb2_client/tests/common/server_fixture.rs index 2d6dde839f..fc0eab81d7 100644 --- a/influxdb2_client/tests/common/server_fixture.rs +++ b/influxdb2_client/tests/common/server_fixture.rs @@ -71,10 +71,13 @@ impl ServerFixture { let shared_server = SHARED_SERVER.get_or_init(|| parking_lot::Mutex::new(Weak::new())); - let mut shared_server = shared_server.lock(); + let shared_upgraded = { + let locked = shared_server.lock(); + locked.upgrade() + }; // is a shared server already present? - let server = match shared_server.upgrade() { + let server = match shared_upgraded { Some(server) => server, None => { // if not, create one @@ -86,11 +89,11 @@ impl ServerFixture { // save a reference for other threads that may want to // use this server, but don't prevent it from being // destroyed when going out of scope + let mut shared_server = shared_server.lock(); *shared_server = Arc::downgrade(&server); server } }; - std::mem::drop(shared_server); Self { server } } From 4bad553dc66c0644567ae3b0e0b2699049555279 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 19 May 2022 16:09:47 -0400 Subject: [PATCH 07/12] fix: Use a method instead of holding a lock across an await point --- ingester/src/data.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index ce48216c9c..1cf6360908 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -2206,8 +2206,8 @@ mod tests { .await .unwrap(); { - let tables = data.tables.read(); - let table = tables.get("mem").unwrap().read().await; + let table_data = data.table_data("mem").unwrap(); + let table = table_data.read().await; let p = table.partition_data.get("1970-01-01").unwrap(); assert_eq!( p.data.max_persisted_sequence_number, @@ -2229,8 +2229,8 @@ mod tests { .await .unwrap(); - let tables = data.tables.read(); - let table = tables.get("mem").unwrap().read().await; + let table_data = data.table_data("mem").unwrap(); + let table = table_data.read().await; let partition = table.partition_data.get("1970-01-01").unwrap(); assert_eq!( partition.data.buffer.as_ref().unwrap().min_sequence_number, From 359046f3f29b048affc1633aec7d18c9a66c1dff Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 20 May 2022 10:44:06 -0400 Subject: [PATCH 08/12] ci: give the doc builder more memory --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 00381812eb..d9bb1587b5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -131,6 +131,7 @@ jobs: doc: docker: - image: quay.io/influxdb/rust:ci + resource_class: medium+ # use of a smaller executor runs out of memory environment: # Disable incremental compilation to avoid overhead. We are not preserving these files anyway. CARGO_INCREMENTAL: "0" From 1bc02b1487e3572088e9001383db93c469e7c3da Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 May 2022 08:21:39 +0000 Subject: [PATCH 09/12] chore(deps): Bump regex-syntax from 0.6.25 to 0.6.26 (#4653) Bumps [regex-syntax](https://github.com/rust-lang/regex) from 0.6.25 to 0.6.26. - [Release notes](https://github.com/rust-lang/regex/releases) - [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/regex/commits) --- updated-dependencies: - dependency-name: regex-syntax dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- query_functions/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fc377488d0..afb017cf5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4436,9 +4436,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.25" +version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" [[package]] name = "remove_dir_all" diff --git a/query_functions/Cargo.toml b/query_functions/Cargo.toml index 1ed6884a6b..9cf48c0bc8 100644 --- a/query_functions/Cargo.toml +++ b/query_functions/Cargo.toml @@ -13,7 +13,7 @@ itertools = "0.10.2" lazy_static = "1.4.0" observability_deps = { path = "../observability_deps" } regex = "1" -regex-syntax = "0.6.25" +regex-syntax = "0.6.26" schema = { path = "../schema" } snafu = "0.7" workspace-hack = { path = "../workspace-hack"} From 292f71759e0bc7030045255a84dff38c28b95e94 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 May 2022 08:30:49 +0000 Subject: [PATCH 10/12] chore(deps): Bump http-body from 0.4.4 to 0.4.5 (#4654) Bumps [http-body](https://github.com/hyperium/http-body) from 0.4.4 to 0.4.5. - [Release notes](https://github.com/hyperium/http-body/releases) - [Changelog](https://github.com/hyperium/http-body/blob/master/CHANGELOG.md) - [Commits](https://github.com/hyperium/http-body/compare/v0.4.4...v0.4.5) --- updated-dependencies: - dependency-name: http-body dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afb017cf5c..3d129e3e23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2008,9 +2008,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", "http", From 5c033b462e9b5f3f333b277b89c70eb5ff3bcb79 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 May 2022 08:39:01 +0000 Subject: [PATCH 11/12] chore(deps): Bump regex from 1.5.5 to 1.5.6 (#4655) Bumps [regex](https://github.com/rust-lang/regex) from 1.5.5 to 1.5.6. - [Release notes](https://github.com/rust-lang/regex/releases) - [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/regex/compare/1.5.5...1.5.6) --- updated-dependencies: - dependency-name: regex dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- service_grpc_influxrpc/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3d129e3e23..53a5565207 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4416,9 +4416,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.5.5" +version = "1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" dependencies = [ "aho-corasick", "memchr", diff --git a/service_grpc_influxrpc/Cargo.toml b/service_grpc_influxrpc/Cargo.toml index 3b9347a6ab..1d6519aef3 100644 --- a/service_grpc_influxrpc/Cargo.toml +++ b/service_grpc_influxrpc/Cargo.toml @@ -20,7 +20,7 @@ arrow = { version = "14.0.0", features = ["prettyprint"] } async-trait = "0.1" futures = "0.3" prost = "0.10" -regex = "1.5.4" +regex = "1.5.6" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.81" snafu = "0.7" From 47347bef9f90d9eb36543176c3ebfa36a1cc8bc5 Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Mon, 23 May 2022 14:13:41 +0200 Subject: [PATCH 12/12] test: add query test scenario w/ missing columns in different chunks (#4656) * test: do NOT filter out query test scenarios w/ unordered stages in different partitions It should be possible to have two chunks in different partitions where both are in the ingester stage or the first one is in the parquet stage and the 2nd one in the ingester stage. * test: add query test scenario w/ missing columns in different chunks Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- .../in/two_chunks_missing_columns.expected | 8 +++ .../cases/in/two_chunks_missing_columns.sql | 5 ++ query_tests/src/cases.rs | 58 +++++++++-------- query_tests/src/scenarios.rs | 1 + query_tests/src/scenarios/library.rs | 27 ++++++++ query_tests/src/scenarios/util.rs | 63 +++++++++++++------ 6 files changed, 117 insertions(+), 45 deletions(-) create mode 100644 query_tests/cases/in/two_chunks_missing_columns.expected create mode 100644 query_tests/cases/in/two_chunks_missing_columns.sql diff --git a/query_tests/cases/in/two_chunks_missing_columns.expected b/query_tests/cases/in/two_chunks_missing_columns.expected new file mode 100644 index 0000000000..8694f327af --- /dev/null +++ b/query_tests/cases/in/two_chunks_missing_columns.expected @@ -0,0 +1,8 @@ +-- Test Setup: TwoChunksMissingColumns +-- SQL: SELECT * from "table" order by time; ++--------+--------+--------+------+------+------+--------------------------------+ +| field1 | field2 | field3 | tag1 | tag2 | tag3 | time | ++--------+--------+--------+------+------+------+--------------------------------+ +| 10 | 11 | | a | b | | 1970-01-01T00:00:00.000000100Z | +| 20 | | 22 | a | | c | 1970-01-01T00:00:00.000000200Z | ++--------+--------+--------+------+------+------+--------------------------------+ diff --git a/query_tests/cases/in/two_chunks_missing_columns.sql b/query_tests/cases/in/two_chunks_missing_columns.sql new file mode 100644 index 0000000000..6f0cba621c --- /dev/null +++ b/query_tests/cases/in/two_chunks_missing_columns.sql @@ -0,0 +1,5 @@ +-- Basic query tests +-- IOX_SETUP: TwoChunksMissingColumns + +-- query data +SELECT * from "table" order by time; diff --git a/query_tests/src/cases.rs b/query_tests/src/cases.rs index e046e395ca..aee30e211e 100644 --- a/query_tests/src/cases.rs +++ b/query_tests/src/cases.rs @@ -2,7 +2,7 @@ //! This file is auto generated by query_tests/generate. //! Do not edit manually --> will result in sadness use std::path::Path; -use crate::runner::{Runner, make_output_path, read_file}; +use crate::runner::Runner; #[tokio::test] // Tests from "basic.sql", @@ -117,9 +117,9 @@ async fn test_cases_new_sql_system_tables_sql() { } #[tokio::test] -// Tests from "two_chunks.sql", -async fn test_cases_two_sql() { - let input_path = Path::new("cases").join("in").join("two_chunks.sql"); +// Tests from "pushdown.sql", +async fn test_cases_pushdown_sql() { + let input_path = Path::new("cases").join("in").join("pushdown.sql"); let mut runner = Runner::new(); runner .run(input_path) @@ -134,28 +134,6 @@ async fn test_cases_two_sql() { // Tests from "several_chunks.sql", async fn test_cases_several_chunks_sql() { let input_path = Path::new("cases").join("in").join("several_chunks.sql"); - let output_path = make_output_path(&input_path).unwrap(); - let expected_path = input_path.with_extension("expected"); - - let mut runner = Runner::new(); - let result = runner - .run(input_path) - .await; - if result.is_err() { - let output_contents = read_file(&output_path); - let expected_contents = read_file(&expected_path); - pretty_assertions::assert_eq!(expected_contents, output_contents); - } else { - runner - .flush() - .expect("flush worked"); - } -} - -#[tokio::test] -// Tests from "pushdown.sql", -async fn test_cases_pushdown_sql() { - let input_path = Path::new("cases").join("in").join("pushdown.sql"); let mut runner = Runner::new(); runner .run(input_path) @@ -192,4 +170,32 @@ async fn test_cases_timestamps_sql() { runner .flush() .expect("flush worked"); +} + +#[tokio::test] +// Tests from "two_chunks.sql", +async fn test_cases_two_chunks_sql() { + let input_path = Path::new("cases").join("in").join("two_chunks.sql"); + let mut runner = Runner::new(); + runner + .run(input_path) + .await + .expect("test failed"); + runner + .flush() + .expect("flush worked"); +} + +#[tokio::test] +// Tests from "two_chunks_missing_columns.sql", +async fn test_cases_two_chunks_missing_columns_sql() { + let input_path = Path::new("cases").join("in").join("two_chunks_missing_columns.sql"); + let mut runner = Runner::new(); + runner + .run(input_path) + .await + .expect("test failed"); + runner + .flush() + .expect("flush worked"); } \ No newline at end of file diff --git a/query_tests/src/scenarios.rs b/query_tests/src/scenarios.rs index 22e42fd088..30584ed6d4 100644 --- a/query_tests/src/scenarios.rs +++ b/query_tests/src/scenarios.rs @@ -61,6 +61,7 @@ pub fn get_all_setups() -> &'static HashMap> { register_setup!(OneMeasurementRealisticTimes), register_setup!(TwoMeasurementsManyFieldsTwoChunks), register_setup!(ManyFieldsSeveralChunks), + register_setup!(TwoChunksMissingColumns), ] .into_iter() .map(|(name, setup)| (name.to_string(), setup as Arc)) diff --git a/query_tests/src/scenarios/library.rs b/query_tests/src/scenarios/library.rs index 0c5dfd59ac..301fc1352b 100644 --- a/query_tests/src/scenarios/library.rs +++ b/query_tests/src/scenarios/library.rs @@ -1322,3 +1322,30 @@ impl DbSetup for MeasurementForDefect2890 { all_scenarios_for_one_chunk(vec![], vec![], lp, "mm", partition_key).await } } + +#[derive(Debug)] +pub struct TwoChunksMissingColumns {} +#[async_trait] +impl DbSetup for TwoChunksMissingColumns { + async fn make(&self) -> Vec { + let partition_key1 = "a"; + let partition_key2 = "b"; + + let lp_lines1 = vec!["table,tag1=a,tag2=b field1=10,field2=11 100"]; + let lp_lines2 = vec!["table,tag1=a,tag3=c field1=20,field3=22 200"]; + + make_n_chunks_scenario(&[ + ChunkData { + lp_lines: lp_lines1, + partition_key: partition_key1, + ..Default::default() + }, + ChunkData { + lp_lines: lp_lines2, + partition_key: partition_key2, + ..Default::default() + }, + ]) + .await + } +} diff --git a/query_tests/src/scenarios/util.rs b/query_tests/src/scenarios/util.rs index 553ca62d3e..a32daacfe3 100644 --- a/query_tests/src/scenarios/util.rs +++ b/query_tests/src/scenarios/util.rs @@ -94,7 +94,7 @@ impl<'a, 'b> ChunkData<'a, 'b> { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ChunkStage { /// In parquet file. Parquet, @@ -383,25 +383,17 @@ pub async fn make_n_chunks_scenario(chunks: &[ChunkData<'_, '_>]) -> Vec]) -> Vec>::new(); + for chunk_data in &chunks { + stage_by_partition + .entry(chunk_data.partition_key) + .or_default() + .push( + chunk_data + .chunk_stage + .expect("Stage should be initialized by now"), + ); + } + for stages in stage_by_partition.values() { + if !stages.windows(2).all(|stages| { + stages[0] + .partial_cmp(&stages[1]) + .map(|o| o.is_le()) + .unwrap_or_default() + }) { + continue 'stage_combinations; + } + } + + // build scenario + let mut scenario_name = format!("{} chunks:", chunks.len()); + let mut mock_ingester = MockIngester::new().await; + + for chunk_data in chunks { let name = make_chunk(&mut mock_ingester, chunk_data).await; write!(&mut scenario_name, ", {}", name).unwrap(); } - assert!(stages_it.next().is_none(), "generated too many stages"); - let db = mock_ingester.into_query_namespace().await; scenarios.push(DbScenario { scenario_name, db }); } @@ -550,7 +572,10 @@ async fn make_chunk(mock_ingester: &mut MockIngester, chunk: ChunkData<'_, '_>) } } - let mut name = format!("Chunk {}", chunk_stage); + let mut name = format!( + "Chunk stage={} partition={}", + chunk_stage, chunk.partition_key + ); let n_preds = chunk.preds.len(); if n_preds > 0 { let delete_names: Vec<_> = chunk