Merge branch 'main' into dependabot/cargo/clap-4.0.2
commit
266b8f2a58
|
|
@ -1050,7 +1050,7 @@ dependencies = [
|
|||
"influxdb_line_protocol",
|
||||
"iox_time",
|
||||
"observability_deps",
|
||||
"ordered-float 3.1.0",
|
||||
"ordered-float 3.2.0",
|
||||
"percent-encoding",
|
||||
"schema",
|
||||
"serde",
|
||||
|
|
@ -1094,7 +1094,7 @@ dependencies = [
|
|||
"log",
|
||||
"num_cpus",
|
||||
"object_store",
|
||||
"ordered-float 3.1.0",
|
||||
"ordered-float 3.2.0",
|
||||
"parking_lot 0.12.1",
|
||||
"parquet",
|
||||
"paste",
|
||||
|
|
@ -1116,7 +1116,7 @@ source = "git+https://github.com/apache/arrow-datafusion.git?rev=c7f3a70a79ee840
|
|||
dependencies = [
|
||||
"arrow",
|
||||
"object_store",
|
||||
"ordered-float 3.1.0",
|
||||
"ordered-float 3.2.0",
|
||||
"parquet",
|
||||
"sqlparser 0.23.0",
|
||||
]
|
||||
|
|
@ -1163,7 +1163,7 @@ dependencies = [
|
|||
"hashbrown",
|
||||
"lazy_static",
|
||||
"md-5",
|
||||
"ordered-float 3.1.0",
|
||||
"ordered-float 3.2.0",
|
||||
"paste",
|
||||
"rand",
|
||||
"regex",
|
||||
|
|
@ -1741,9 +1741,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "handlebars"
|
||||
version = "4.3.4"
|
||||
version = "4.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56b224eaa4987c03c30b251de7ef0c15a6a59f34222905850dbc3026dfb24d5f"
|
||||
checksum = "433e4ab33f1213cdc25b5fa45c76881240cfe79284cf2b395e8b9e312a30a2fd"
|
||||
dependencies = [
|
||||
"log",
|
||||
"pest",
|
||||
|
|
@ -2061,7 +2061,9 @@ dependencies = [
|
|||
"data_types",
|
||||
"datafusion 0.1.0",
|
||||
"dotenvy",
|
||||
"flate2",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"generated_types",
|
||||
"hashbrown",
|
||||
"http",
|
||||
|
|
@ -2126,12 +2128,13 @@ dependencies = [
|
|||
"client_util",
|
||||
"futures-util",
|
||||
"generated_types",
|
||||
"mockito",
|
||||
"influxdb_line_protocol",
|
||||
"prost 0.11.0",
|
||||
"rand",
|
||||
"reqwest",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic",
|
||||
]
|
||||
|
||||
|
|
@ -2182,7 +2185,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"generated_types",
|
||||
"snafu",
|
||||
"sqlparser 0.24.0",
|
||||
"sqlparser 0.25.0",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
|
|
@ -2222,6 +2225,7 @@ dependencies = [
|
|||
"pin-project",
|
||||
"predicate",
|
||||
"prost 0.11.0",
|
||||
"rand",
|
||||
"schema",
|
||||
"snafu",
|
||||
"test_helpers",
|
||||
|
|
@ -2681,9 +2685,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.134"
|
||||
version = "0.2.135"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb"
|
||||
checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
|
|
@ -3130,9 +3134,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "object_store"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2168fee79ee3e7695905bc3a48777d807f82d956f821186fa7a2601c1295a73e"
|
||||
checksum = "56ce10a205d9f610ae3532943039c34c145930065ce0c4284134c897fe6073b1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64",
|
||||
|
|
@ -3142,7 +3146,7 @@ dependencies = [
|
|||
"itertools",
|
||||
"parking_lot 0.12.1",
|
||||
"percent-encoding",
|
||||
"quick-xml 0.24.1",
|
||||
"quick-xml 0.25.0",
|
||||
"rand",
|
||||
"reqwest",
|
||||
"ring",
|
||||
|
|
@ -3207,9 +3211,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "3.1.0"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "98ffdb14730ed2ef599c65810c15b000896e21e8776b512de0db0c3d7335cc2a"
|
||||
checksum = "129d36517b53c461acc6e1580aeb919c8ae6708a4b1eae61c4463a615d4f0411"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
|
@ -3581,7 +3585,7 @@ dependencies = [
|
|||
"schema",
|
||||
"serde_json",
|
||||
"snafu",
|
||||
"sqlparser 0.24.0",
|
||||
"sqlparser 0.25.0",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
|
@ -3670,9 +3674,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.43"
|
||||
version = "1.0.46"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
|
||||
checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
|
@ -3942,9 +3946,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.24.1"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37dddbbe9df96afafcb8027fcf263971b726530e12f0787f620a7ba5b4846081"
|
||||
checksum = "58e21a144a0ffb5fad7b464babcdab934a325ad69b7c0373bcfef5cbd9799ca9"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
|
|
@ -4412,9 +4416,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.85"
|
||||
version = "1.0.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
|
||||
checksum = "41feea4228a6f1cd09ec7a3593a682276702cd67b5273544757dae23c096f074"
|
||||
dependencies = [
|
||||
"itoa 1.0.3",
|
||||
"ryu",
|
||||
|
|
@ -4669,15 +4673,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.9.0"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
|
||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
||||
|
||||
[[package]]
|
||||
name = "snafu"
|
||||
version = "0.7.1"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5177903bf45656592d9eb5c0e22f408fc023aae51dbe2088889b71633ba451f2"
|
||||
checksum = "dd726aec4ebad65756394ff89a9b9598793d4e30121cd71690244c1e497b3aee"
|
||||
dependencies = [
|
||||
"doc-comment",
|
||||
"snafu-derive",
|
||||
|
|
@ -4685,9 +4689,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "snafu-derive"
|
||||
version = "0.7.1"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "410b26ed97440d90ced3e2488c868d56a86e2064f5d7d6f417909b286afe25e5"
|
||||
checksum = "712529e9b0b014eabaa345b38e06032767e3dc393e8b017e853b1d7247094e74"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
|
|
@ -4748,9 +4752,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.24.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dac9c312566fdfc45a38ecf1924013c82af2a7d5315e46f67b1cc987f12be260"
|
||||
checksum = "0781f2b6bd03e5adf065c8e772b49eaea9f640d06a1b9130330fe8bd2563f4fd"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
|
@ -4953,9 +4957,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.101"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
|
||||
checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -5228,9 +5232,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tokio-stream"
|
||||
version = "0.1.10"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6edf2d6bc038a43d31353570e27270603f4648d18f5ed10c0e179abe43255af"
|
||||
checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
|
|
@ -5434,9 +5438,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.36"
|
||||
version = "0.1.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fce9567bd60a67d08a16488756721ba392f24f29006402881e43b19aac64307"
|
||||
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"log",
|
||||
|
|
@ -5447,9 +5451,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.22"
|
||||
version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11c75893af559bc8e10716548bdef5cb2b983f8e637db9d0e15126b61b484ee2"
|
||||
checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -5458,9 +5462,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.29"
|
||||
version = "0.1.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5aeea4303076558a00714b823f9ad67d58a3bbda1df83d8827d21193156e22f7"
|
||||
checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ humantime = "2.1.0"
|
|||
iox_catalog = { path = "../iox_catalog" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.83"
|
||||
serde_json = "1.0.86"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.1.0"
|
||||
trace = { path = "../trace" }
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ datafusion = { path = "../datafusion" }
|
|||
futures = "0.3"
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
predicate = { path = "../predicate" }
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ pub async fn compact(compactor: Arc<Compactor>, do_full_compact: bool) -> usize
|
|||
compaction_type,
|
||||
CompactionLevel::Initial,
|
||||
compact_in_parallel,
|
||||
false, // no split
|
||||
true, // split
|
||||
candidates.clone().into(),
|
||||
)
|
||||
.await;
|
||||
|
|
@ -57,7 +57,7 @@ pub async fn compact(compactor: Arc<Compactor>, do_full_compact: bool) -> usize
|
|||
compaction_type,
|
||||
CompactionLevel::FileNonOverlapped,
|
||||
compact_in_parallel,
|
||||
false, // don't split
|
||||
true, // split
|
||||
candidates.into(),
|
||||
)
|
||||
.await;
|
||||
|
|
@ -812,24 +812,42 @@ mod tests {
|
|||
|
||||
compact(compactor, true).await;
|
||||
|
||||
// Should have 1 non-soft-deleted file:
|
||||
// Should have 2 non-soft-deleted file:
|
||||
//
|
||||
// - the level 2 file created after combining all 3 level 1 files created by the first step
|
||||
// - the 2 level-2 files created after combining all 3 level 1 files created by the first step
|
||||
// of compaction to compact remaining level 0 files
|
||||
let mut files = catalog.list_by_table_not_to_delete(table.table.id).await;
|
||||
assert_eq!(files.len(), 1, "{files:?}");
|
||||
assert_eq!(files.len(), 2, "{files:?}");
|
||||
let files_and_levels: Vec<_> = files
|
||||
.iter()
|
||||
.map(|f| (f.id.get(), f.compaction_level))
|
||||
.collect();
|
||||
|
||||
// The initial files are: L0 1-4, L1 5-6. The first step of cold compaction took files 1-5
|
||||
// and compacted them into a l-1 file 7. The second step of cold compaction
|
||||
// took 6 and 7 and combined them all into file 8.
|
||||
assert_eq!(files_and_levels, vec![(8, CompactionLevel::Final)]);
|
||||
// and compacted them into two l-1 files 7, 8. The second step of cold compaction
|
||||
// took 6, 7, and 8 and combined them all into two files 9 and 10.
|
||||
assert_eq!(
|
||||
files_and_levels,
|
||||
vec![(9, CompactionLevel::Final), (10, CompactionLevel::Final)]
|
||||
);
|
||||
|
||||
// ------------------------------------------------
|
||||
// Verify the parquet file content
|
||||
// first file:
|
||||
let file = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 421 | | OH | 21 | 1970-01-01T00:00:00.000091Z |",
|
||||
"| 81601 | | PA | 15 | 1970-01-01T00:00:00.000090Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
// second file
|
||||
let file = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
|
|
@ -847,9 +865,7 @@ mod tests {
|
|||
"| 20 | | VT | 20 | 1970-01-01T00:00:00.000026Z |",
|
||||
"| 21 | | OH | 21 | 1970-01-01T00:00:00.000000025Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 421 | | OH | 21 | 1970-01-01T00:00:00.000091Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 81601 | | PA | 15 | 1970-01-01T00:00:00.000090Z |",
|
||||
"+-----------+------+------+------+--------------------------------+",
|
||||
],
|
||||
&batches
|
||||
|
|
@ -1027,14 +1043,14 @@ mod tests {
|
|||
|
||||
compact(compactor, true).await;
|
||||
|
||||
// Should have 3 non-soft-deleted files:
|
||||
// Should have 4 non-soft-deleted files:
|
||||
//
|
||||
// - pf4, the level 1 file untouched because it didn't fit in the memory budget
|
||||
// - pf6, the level 2 file untouched because it doesn't overlap anything
|
||||
// - the level 2 file created after combining all 3 level 1 files created by the first step
|
||||
// - two level-2 files created after combining all 3 level 1 files created by the first step
|
||||
// of compaction to compact remaining level 0 files
|
||||
let mut files = catalog.list_by_table_not_to_delete(table.table.id).await;
|
||||
assert_eq!(files.len(), 3, "{files:?}");
|
||||
assert_eq!(files.len(), 4, "{files:?}");
|
||||
let files_and_levels: Vec<_> = files
|
||||
.iter()
|
||||
.map(|f| (f.id.get(), f.compaction_level))
|
||||
|
|
@ -1042,20 +1058,35 @@ mod tests {
|
|||
|
||||
// File 4 was L1 but didn't fit in the memory budget, so was untouched.
|
||||
// File 6 was already L2 and did not overlap with anything, so was untouched.
|
||||
// Cold compaction took files 1, 2, 3, 5 and compacted them into file 7.
|
||||
// Cold compaction took files 1, 2, 3, 5 and compacted them into 2 files 7 and 8.
|
||||
assert_eq!(
|
||||
files_and_levels,
|
||||
vec![
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(6, CompactionLevel::Final),
|
||||
(7, CompactionLevel::Final),
|
||||
(8, CompactionLevel::Final),
|
||||
]
|
||||
);
|
||||
|
||||
// ------------------------------------------------
|
||||
// Verify the parquet file content
|
||||
let file1 = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file1).await;
|
||||
// newly created L-2 with largest timestamp
|
||||
let file = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
// newly created L-2 with smallest timestamp
|
||||
let file = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+--------------------------------+",
|
||||
|
|
@ -1068,15 +1099,14 @@ mod tests {
|
|||
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000000009Z |",
|
||||
"| 21 | | OH | 21 | 1970-01-01T00:00:00.000000025Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"+-----------+------+------+------+--------------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
let file0 = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file0).await;
|
||||
// available L2 that does not overlap
|
||||
let file = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
|
|
@ -1088,6 +1118,20 @@ mod tests {
|
|||
],
|
||||
&batches
|
||||
);
|
||||
// available L1 that did not fit in the memory budget
|
||||
let file = files.pop().unwrap();
|
||||
let batches = table.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
"| field_int | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
"| 1600 | WA | 10 | 1970-01-01T00:00:00.000028Z |",
|
||||
"| 20 | VT | 20 | 1970-01-01T00:00:00.000026Z |",
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
}
|
||||
|
||||
struct TestDb {
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@ use data_types::{
|
|||
ChunkId, ChunkOrder, CompactionLevel, DeletePredicate, PartitionId, SequenceNumber,
|
||||
TableSummary, Timestamp, TimestampMinMax, Tombstone,
|
||||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
QueryChunk, QueryChunkError, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkMeta,
|
||||
};
|
||||
use observability_deps::tracing::trace;
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
|
|
@ -194,7 +194,7 @@ impl QueryChunk for QueryableParquetChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
_columns: Selection<'_>,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
|
|
@ -208,7 +208,7 @@ impl QueryChunk for QueryableParquetChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
|
|
@ -230,7 +230,7 @@ impl QueryChunk for QueryableParquetChunk {
|
|||
mut ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, QueryChunkError> {
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
ctx.set_metadata("storage", "compactor");
|
||||
ctx.set_metadata("projection", format!("{}", selection));
|
||||
trace!(?selection, "selection");
|
||||
|
|
@ -238,7 +238,7 @@ impl QueryChunk for QueryableParquetChunk {
|
|||
self.data
|
||||
.read_filter(predicate, selection)
|
||||
.context(ReadParquetSnafu)
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))
|
||||
}
|
||||
|
||||
/// Returns chunk type
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ use datafusion::execution::context::TaskContext;
|
|||
use datafusion::physical_expr::PhysicalExpr;
|
||||
use datafusion::physical_plan::common::SizedRecordBatchStream;
|
||||
use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MemTrackingMetrics};
|
||||
use datafusion::physical_plan::{collect, ExecutionPlan};
|
||||
use datafusion::physical_plan::{collect, EmptyRecordBatchStream, ExecutionPlan};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion::{
|
||||
arrow::{
|
||||
|
|
@ -236,12 +236,19 @@ where
|
|||
}
|
||||
|
||||
/// Create a SendableRecordBatchStream a RecordBatch
|
||||
pub fn stream_from_batch(batch: RecordBatch) -> SendableRecordBatchStream {
|
||||
stream_from_batches(vec![Arc::new(batch)])
|
||||
pub fn stream_from_batch(schema: Arc<Schema>, batch: RecordBatch) -> SendableRecordBatchStream {
|
||||
stream_from_batches(schema, vec![Arc::new(batch)])
|
||||
}
|
||||
|
||||
/// Create a SendableRecordBatchStream from Vec of RecordBatches with the same schema
|
||||
pub fn stream_from_batches(batches: Vec<Arc<RecordBatch>>) -> SendableRecordBatchStream {
|
||||
pub fn stream_from_batches(
|
||||
schema: Arc<Schema>,
|
||||
batches: Vec<Arc<RecordBatch>>,
|
||||
) -> SendableRecordBatchStream {
|
||||
if batches.is_empty() {
|
||||
return Box::pin(EmptyRecordBatchStream::new(schema));
|
||||
}
|
||||
|
||||
let dummy_metrics = ExecutionPlanMetricsSet::new();
|
||||
let mem_metrics = MemTrackingMetrics::new(&dummy_metrics, 0);
|
||||
let stream = SizedRecordBatchStream::new(batches[0].schema(), batches, mem_metrics);
|
||||
|
|
|
|||
|
|
@ -15,17 +15,25 @@ developers.
|
|||
Build IOx for release with pprof:
|
||||
|
||||
```shell
|
||||
cd influxdb_iox
|
||||
cargo build --release --features=pprof
|
||||
```
|
||||
|
||||
## Step 2: Start redpanda and postgres
|
||||
You can also install the `influxdb_iox` command locally via
|
||||
|
||||
Now, start up redpanda and postgres locally in docker containers:
|
||||
```shell
|
||||
cd influxdb_iox
|
||||
cargo install --path influxdb_iox
|
||||
```
|
||||
|
||||
## Step 2: Start kafka and postgres
|
||||
|
||||
Now, start up kafka and postgres locally in docker containers:
|
||||
```shell
|
||||
# get rskafka from https://github.com/influxdata/rskafka
|
||||
cd rskafka
|
||||
# Run redpanda on localhost:9010
|
||||
docker-compose -f docker-compose-redpanda.yml up &
|
||||
# Run kafka on localhost:9010
|
||||
docker-compose -f docker-compose-kafka.yml up &
|
||||
# now run postgres
|
||||
docker run -p 5432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust postgres &
|
||||
```
|
||||
|
|
@ -136,8 +144,8 @@ INFLUXDB_IOX_GRPC_BIND_ADDR=localhost:8084 \
|
|||
INFLUXDB_IOX_WRITE_BUFFER_TYPE=kafka \
|
||||
INFLUXDB_IOX_WRITE_BUFFER_ADDR=localhost:9010 \
|
||||
xINFLUXDB_IOX_WRITE_BUFFER_AUTO_CREATE_TOPICS=10 \
|
||||
INFLUXDB_IOX_WRITE_BUFFER_PARTITION_RANGE_START=0 \
|
||||
INFLUXDB_IOX_WRITE_BUFFER_PARTITION_RANGE_END=0 \
|
||||
INFLUXDB_IOX_SHARD_INDEX_RANGE_START=0 \
|
||||
INFLUXDB_IOX_SHARD_INDEX_RANGE_END=0 \
|
||||
INFLUXDB_IOX_PAUSE_INGEST_SIZE_BYTES=5000000000 \
|
||||
INFLUXDB_IOX_PERSIST_MEMORY_THRESHOLD_BYTES=4000000000 \
|
||||
INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \
|
||||
|
|
@ -151,6 +159,11 @@ LOG_FILTER=info \
|
|||
|
||||
# Step 5: Ingest data
|
||||
|
||||
You can load data using the influxdb_iox client:
|
||||
```shell
|
||||
influxdb_iox --host=http://localhost:8080 -v write test_db test_fixtures/lineproto/*.lp
|
||||
```
|
||||
|
||||
Now you can post data to `http://localhost:8080` with your favorite load generating tool
|
||||
|
||||
My favorite is https://github.com/alamb/low_card
|
||||
|
|
@ -171,3 +184,17 @@ posting fairly large requests (necessitating the
|
|||
# Step 6: Profile
|
||||
|
||||
See [`profiling.md`](./profiling.md).
|
||||
|
||||
|
||||
# Step 7: Clean up local state
|
||||
|
||||
If you find yourself needing to clean up postgres / kafka state use these commands:
|
||||
```shell
|
||||
docker ps -a -q | xargs docker stop
|
||||
docker rm rskafka_proxy_1
|
||||
docker rm rskafka_kafka-0_1
|
||||
docker rm rskafka_kafka-1_1
|
||||
docker rm rskafka_kafka-2_1
|
||||
docker rm rskafka_zookeeper_1
|
||||
docker volume rm rskafka_kafka_0_data rskafka_kafka_1_data rskafka_kafka_2_data rskafka_zookeeper_data
|
||||
```
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ data_types = { path = "../data_types" }
|
|||
futures = "0.3"
|
||||
humantime = "2.1.0"
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
object_store = { version = "0.5.0" }
|
||||
object_store = { version = "0.5.1" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1", features = ["macros", "rt", "sync"] }
|
||||
|
|
|
|||
|
|
@ -82,8 +82,9 @@ message PartitionStatus {
|
|||
// Max sequence number persisted
|
||||
optional int64 parquet_max_sequence_number = 1;
|
||||
|
||||
// Max sequence number for a tombstone associated
|
||||
optional int64 tombstone_max_sequence_number = 2;
|
||||
// Deprecated tombstone support in ingester (#5825).
|
||||
reserved "tombstone_max_sequence_number";
|
||||
reserved 2;
|
||||
}
|
||||
|
||||
// Serialization of `predicate::predicate::Predicate` that contains DataFusion `Expr`s
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@ futures = "0.3"
|
|||
generated_types = { path = "../generated_types" }
|
||||
influxdb_iox_client = { path = "../influxdb_iox_client" }
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
object_store = { version = "0.5.0", features = ["aws"] }
|
||||
object_store = { version = "0.5.1", features = ["aws"] }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.82"
|
||||
serde_json = "1.0.86"
|
||||
thiserror = "1.0.37"
|
||||
tokio = { version = "1.21" }
|
||||
tonic = { version = "0.8" }
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ bytes = "1.2"
|
|||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.83"
|
||||
serde_json = "1.0.86"
|
||||
snafu = "0.7"
|
||||
url = "2.3.1"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use crate::expression::conditional::{conditional_expression, ConditionalExpressi
|
|||
use crate::identifier::{identifier, Identifier};
|
||||
use crate::internal::{expect, ParseResult};
|
||||
use crate::literal::unsigned_integer;
|
||||
use crate::string::{regex, Regex};
|
||||
use core::fmt;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{tag, tag_no_case};
|
||||
|
|
@ -11,73 +12,82 @@ use nom::multi::separated_list1;
|
|||
use nom::sequence::{pair, preceded, terminated};
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
/// Represents a fully-qualified measurement name.
|
||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||
pub struct MeasurementNameExpression {
|
||||
/// Represents a measurement name as either an identifier or a regular expression.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum MeasurementName {
|
||||
/// A measurement name expressed as an [`Identifier`].
|
||||
Name(Identifier),
|
||||
|
||||
/// A measurement name expressed as a [`Regex`].
|
||||
Regex(Regex),
|
||||
}
|
||||
|
||||
impl Parser for MeasurementName {
|
||||
/// Parse a measurement name, which may be an identifier or a regular expression.
|
||||
fn parse(i: &str) -> ParseResult<&str, Self> {
|
||||
alt((
|
||||
map(identifier, MeasurementName::Name),
|
||||
map(regex, MeasurementName::Regex),
|
||||
))(i)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for MeasurementName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Name(ident) => fmt::Display::fmt(ident, f),
|
||||
Self::Regex(regex) => fmt::Display::fmt(regex, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a fully-qualified, 3-part measurement name.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct QualifiedMeasurementName {
|
||||
pub database: Option<Identifier>,
|
||||
pub retention_policy: Option<Identifier>,
|
||||
pub name: Identifier,
|
||||
pub name: MeasurementName,
|
||||
}
|
||||
|
||||
impl MeasurementNameExpression {
|
||||
/// Constructs a new `MeasurementNameExpression` with the specified `name`.
|
||||
pub fn new(name: Identifier) -> Self {
|
||||
Self {
|
||||
database: None,
|
||||
retention_policy: None,
|
||||
name,
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a new `MeasurementNameExpression` with the specified `name` and `database`.
|
||||
pub fn new_db(name: Identifier, database: Identifier) -> Self {
|
||||
Self {
|
||||
database: Some(database),
|
||||
retention_policy: None,
|
||||
name,
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a new `MeasurementNameExpression` with the specified `name`, `database` and `retention_policy`.
|
||||
pub fn new_db_rp(name: Identifier, database: Identifier, retention_policy: Identifier) -> Self {
|
||||
Self {
|
||||
database: Some(database),
|
||||
retention_policy: Some(retention_policy),
|
||||
name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MeasurementNameExpression {
|
||||
impl Display for QualifiedMeasurementName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self {
|
||||
database: None,
|
||||
retention_policy: None,
|
||||
name,
|
||||
} => write!(f, "{}", name)?,
|
||||
} => write!(f, "{}", name),
|
||||
Self {
|
||||
database: Some(db),
|
||||
retention_policy: None,
|
||||
name,
|
||||
} => write!(f, "{}..{}", db, name)?,
|
||||
} => write!(f, "{}..{}", db, name),
|
||||
Self {
|
||||
database: None,
|
||||
retention_policy: Some(rp),
|
||||
name,
|
||||
} => write!(f, "{}.{}", rp, name)?,
|
||||
} => write!(f, "{}.{}", rp, name),
|
||||
Self {
|
||||
database: Some(db),
|
||||
retention_policy: Some(rp),
|
||||
name,
|
||||
} => write!(f, "{}.{}.{}", db, rp, name)?,
|
||||
};
|
||||
Ok(())
|
||||
} => write!(f, "{}.{}.{}", db, rp, name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Match a 3-part measurement name expression.
|
||||
pub fn measurement_name_expression(i: &str) -> ParseResult<&str, MeasurementNameExpression> {
|
||||
/// Match a fully-qualified, 3-part measurement name.
|
||||
///
|
||||
/// ```text
|
||||
/// qualified_measurement_name ::= measurement_name |
|
||||
/// ( policy_name "." measurement_name ) |
|
||||
/// ( db_name "." policy_name? "." measurement_name )
|
||||
///
|
||||
/// db_name ::= identifier
|
||||
/// policy_name ::= identifier
|
||||
/// measurement_name ::= identifier | regex_lit
|
||||
/// ```
|
||||
pub fn qualified_measurement_name(i: &str) -> ParseResult<&str, QualifiedMeasurementName> {
|
||||
let (remaining_input, (opt_db_rp, name)) = pair(
|
||||
opt(alt((
|
||||
// database "." retention_policy "."
|
||||
|
|
@ -93,7 +103,7 @@ pub fn measurement_name_expression(i: &str) -> ParseResult<&str, MeasurementName
|
|||
// retention_policy "."
|
||||
map(terminated(identifier, tag(".")), |rp| (None, Some(rp))),
|
||||
))),
|
||||
identifier,
|
||||
MeasurementName::parse,
|
||||
)(i)?;
|
||||
|
||||
// Extract possible `database` and / or `retention_policy`
|
||||
|
|
@ -104,7 +114,7 @@ pub fn measurement_name_expression(i: &str) -> ParseResult<&str, MeasurementName
|
|||
|
||||
Ok((
|
||||
remaining_input,
|
||||
MeasurementNameExpression {
|
||||
QualifiedMeasurementName {
|
||||
database,
|
||||
retention_policy,
|
||||
name,
|
||||
|
|
@ -290,35 +300,107 @@ mod tests {
|
|||
use crate::assert_expect_error;
|
||||
use nom::character::complete::alphanumeric1;
|
||||
|
||||
#[test]
|
||||
fn test_measurement_name_expression() {
|
||||
let (_, got) = measurement_name_expression("diskio").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementNameExpression {
|
||||
impl From<&str> for MeasurementName {
|
||||
/// Convert a `str` to [`MeasurementName::Name`].
|
||||
fn from(s: &str) -> Self {
|
||||
Self::Name(Identifier(s.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl QualifiedMeasurementName {
|
||||
/// Constructs a new `MeasurementNameExpression` with the specified `name`.
|
||||
pub fn new(name: MeasurementName) -> Self {
|
||||
Self {
|
||||
database: None,
|
||||
retention_policy: None,
|
||||
name: "diskio".into(),
|
||||
name,
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a new `MeasurementNameExpression` with the specified `name` and `database`.
|
||||
pub fn new_db(name: MeasurementName, database: Identifier) -> Self {
|
||||
Self {
|
||||
database: Some(database),
|
||||
retention_policy: None,
|
||||
name,
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a new `MeasurementNameExpression` with the specified `name`, `database` and `retention_policy`.
|
||||
pub fn new_db_rp(
|
||||
name: MeasurementName,
|
||||
database: Identifier,
|
||||
retention_policy: Identifier,
|
||||
) -> Self {
|
||||
Self {
|
||||
database: Some(database),
|
||||
retention_policy: Some(retention_policy),
|
||||
name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_qualified_measurement_name() {
|
||||
use MeasurementName::*;
|
||||
|
||||
let (_, got) = qualified_measurement_name("diskio").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
QualifiedMeasurementName {
|
||||
database: None,
|
||||
retention_policy: None,
|
||||
name: Name("diskio".into()),
|
||||
}
|
||||
);
|
||||
|
||||
let (_, got) = measurement_name_expression("telegraf.autogen.diskio").unwrap();
|
||||
let (_, got) = qualified_measurement_name("/diskio/").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementNameExpression {
|
||||
QualifiedMeasurementName {
|
||||
database: None,
|
||||
retention_policy: None,
|
||||
name: Regex("diskio".into()),
|
||||
}
|
||||
);
|
||||
|
||||
let (_, got) = qualified_measurement_name("telegraf.autogen.diskio").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
QualifiedMeasurementName {
|
||||
database: Some("telegraf".into()),
|
||||
retention_policy: Some("autogen".into()),
|
||||
name: "diskio".into(),
|
||||
name: Name("diskio".into()),
|
||||
}
|
||||
);
|
||||
|
||||
let (_, got) = measurement_name_expression("telegraf..diskio").unwrap();
|
||||
let (_, got) = qualified_measurement_name("telegraf.autogen./diskio/").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementNameExpression {
|
||||
QualifiedMeasurementName {
|
||||
database: Some("telegraf".into()),
|
||||
retention_policy: Some("autogen".into()),
|
||||
name: Regex("diskio".into()),
|
||||
}
|
||||
);
|
||||
|
||||
let (_, got) = qualified_measurement_name("telegraf..diskio").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
QualifiedMeasurementName {
|
||||
database: Some("telegraf".into()),
|
||||
retention_policy: None,
|
||||
name: "diskio".into(),
|
||||
name: Name("diskio".into()),
|
||||
}
|
||||
);
|
||||
|
||||
let (_, got) = qualified_measurement_name("telegraf../diskio/").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
QualifiedMeasurementName {
|
||||
database: Some("telegraf".into()),
|
||||
retention_policy: None,
|
||||
name: Regex("diskio".into()),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,9 +73,14 @@ mod test {
|
|||
// Validate via the Display trait, as we don't need to validate the contents of the
|
||||
// FROM and / or WHERE clauses, given they are tested in their on modules.
|
||||
|
||||
// Measurement name expressed as an identifier
|
||||
let (_, got) = delete_statement("DELETE FROM foo").unwrap();
|
||||
assert_eq!(format!("{}", got), "DELETE FROM foo");
|
||||
|
||||
// Measurement name expressed as a regular expression
|
||||
let (_, got) = delete_statement("DELETE FROM /foo/").unwrap();
|
||||
assert_eq!(format!("{}", got), "DELETE FROM /foo/");
|
||||
|
||||
let (_, got) = delete_statement("DELETE FROM foo WHERE time > 10").unwrap();
|
||||
assert_eq!(format!("{}", got), "DELETE FROM foo WHERE time > 10");
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,140 @@
|
|||
#![allow(dead_code)] // Temporary
|
||||
|
||||
use crate::internal::{expect, ParseResult};
|
||||
use crate::select::{select_statement, SelectStatement};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag_no_case;
|
||||
use nom::character::complete::multispace1;
|
||||
use nom::combinator::{map, opt, value};
|
||||
use nom::sequence::{preceded, tuple};
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
/// Represents various options for an `EXPLAIN` statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ExplainOption {
|
||||
/// `EXPLAIN VERBOSE statement`
|
||||
Verbose,
|
||||
/// `EXPLAIN ANALYZE statement`
|
||||
Analyze,
|
||||
/// `EXPLAIN ANALYZE VERBOSE statement`
|
||||
AnalyzeVerbose,
|
||||
}
|
||||
|
||||
impl Display for ExplainOption {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Verbose => f.write_str("VERBOSE"),
|
||||
Self::Analyze => f.write_str("ANALYZE"),
|
||||
Self::AnalyzeVerbose => f.write_str("ANALYZE VERBOSE"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents an `EXPLAIN` statement.
|
||||
///
|
||||
/// ```text
|
||||
/// explain ::= "EXPLAIN" explain_options? select_statement
|
||||
/// explain_options ::= "VERBOSE" | ( "ANALYZE" "VERBOSE"? )
|
||||
/// ```
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ExplainStatement {
|
||||
options: Option<ExplainOption>,
|
||||
select: Box<SelectStatement>,
|
||||
}
|
||||
|
||||
impl Display for ExplainStatement {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("EXPLAIN ")?;
|
||||
if let Some(options) = &self.options {
|
||||
write!(f, "{} ", options)?;
|
||||
}
|
||||
Display::fmt(&self.select, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse an `EXPLAIN` statement.
|
||||
pub fn explain_statement(i: &str) -> ParseResult<&str, ExplainStatement> {
|
||||
map(
|
||||
tuple((
|
||||
tag_no_case("EXPLAIN"),
|
||||
opt(preceded(
|
||||
multispace1,
|
||||
alt((
|
||||
map(
|
||||
preceded(
|
||||
tag_no_case("ANALYZE"),
|
||||
opt(preceded(multispace1, tag_no_case("VERBOSE"))),
|
||||
),
|
||||
|v| match v {
|
||||
// If the optional combinator is Some, then it matched VERBOSE
|
||||
Some(_) => ExplainOption::AnalyzeVerbose,
|
||||
_ => ExplainOption::Analyze,
|
||||
},
|
||||
),
|
||||
value(ExplainOption::Verbose, tag_no_case("VERBOSE")),
|
||||
)),
|
||||
)),
|
||||
multispace1,
|
||||
expect(
|
||||
"invalid EXPLAIN statement, expected SELECT statement",
|
||||
select_statement,
|
||||
),
|
||||
)),
|
||||
|(_, options, _, select)| ExplainStatement {
|
||||
options,
|
||||
select: Box::new(select),
|
||||
},
|
||||
)(i)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::assert_expect_error;
|
||||
use crate::explain::{explain_statement, ExplainOption};
|
||||
use assert_matches::assert_matches;
|
||||
|
||||
#[test]
|
||||
fn test_explain_statement() {
|
||||
let (remain, got) = explain_statement("EXPLAIN SELECT val from temp").unwrap();
|
||||
assert_eq!(remain, ""); // assert that all input was consumed
|
||||
assert_matches!(got.options, None);
|
||||
assert_eq!(format!("{}", got), "EXPLAIN SELECT val FROM temp");
|
||||
|
||||
let (remain, got) = explain_statement("EXPLAIN VERBOSE SELECT val from temp").unwrap();
|
||||
assert_eq!(remain, "");
|
||||
assert_matches!(&got.options, Some(o) if *o == ExplainOption::Verbose);
|
||||
assert_eq!(format!("{}", got), "EXPLAIN VERBOSE SELECT val FROM temp");
|
||||
|
||||
let (remain, got) = explain_statement("EXPLAIN ANALYZE SELECT val from temp").unwrap();
|
||||
assert_eq!(remain, "");
|
||||
assert_matches!(&got.options, Some(o) if *o == ExplainOption::Analyze);
|
||||
assert_eq!(format!("{}", got), "EXPLAIN ANALYZE SELECT val FROM temp");
|
||||
|
||||
let (remain, got) =
|
||||
explain_statement("EXPLAIN ANALYZE VERBOSE SELECT val from temp").unwrap();
|
||||
assert_eq!(remain, "");
|
||||
assert_matches!(&got.options, Some(o) if *o == ExplainOption::AnalyzeVerbose);
|
||||
assert_eq!(
|
||||
format!("{}", got),
|
||||
"EXPLAIN ANALYZE VERBOSE SELECT val FROM temp"
|
||||
);
|
||||
|
||||
// Fallible cases
|
||||
|
||||
assert_expect_error!(
|
||||
explain_statement("EXPLAIN ANALYZE SHOW DATABASES"),
|
||||
"invalid EXPLAIN statement, expected SELECT statement"
|
||||
);
|
||||
|
||||
assert_expect_error!(
|
||||
explain_statement("EXPLAIN ANALYZE EXPLAIN SELECT val from temp"),
|
||||
"invalid EXPLAIN statement, expected SELECT statement"
|
||||
);
|
||||
|
||||
// surfaces statement-specific errors
|
||||
assert_expect_error!(
|
||||
explain_statement("EXPLAIN ANALYZE SELECT cpu FROM 'foo'"),
|
||||
"invalid FROM clause, expected identifier, regular expression or subquery"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -22,12 +22,10 @@ impl<I: Display> Display for Error<I> {
|
|||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Syntax { input: _, message } => {
|
||||
write!(f, "Syntax error: {}", message)?;
|
||||
write!(f, "Syntax error: {}", message)
|
||||
}
|
||||
Self::Nom(_, kind) => write!(f, "nom error: {:?}", kind)?,
|
||||
Self::Nom(_, kind) => write!(f, "nom error: {:?}", kind),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ mod test_util;
|
|||
mod common;
|
||||
mod delete;
|
||||
mod drop;
|
||||
mod explain;
|
||||
mod expression;
|
||||
mod identifier;
|
||||
mod internal;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::common::{
|
||||
limit_clause, measurement_name_expression, offset_clause, order_by_clause, where_clause,
|
||||
MeasurementNameExpression, OneOrMore, OrderByClause, Parser,
|
||||
limit_clause, offset_clause, order_by_clause, qualified_measurement_name, where_clause,
|
||||
OneOrMore, OrderByClause, Parser, QualifiedMeasurementName,
|
||||
};
|
||||
use crate::expression::arithmetic::Expr::Wildcard;
|
||||
use crate::expression::arithmetic::{
|
||||
|
|
@ -164,8 +164,7 @@ pub fn select_statement(i: &str) -> ParseResult<&str, SelectStatement> {
|
|||
/// Represents a single measurement selection found in a `FROM` clause.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum MeasurementSelection {
|
||||
Name(MeasurementNameExpression),
|
||||
Regex(Regex),
|
||||
Name(QualifiedMeasurementName),
|
||||
Subquery(Box<SelectStatement>),
|
||||
}
|
||||
|
||||
|
|
@ -173,7 +172,6 @@ impl Display for MeasurementSelection {
|
|||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Name(ref name) => fmt::Display::fmt(name, f),
|
||||
Self::Regex(ref re) => fmt::Display::fmt(re, f),
|
||||
Self::Subquery(ref subquery) => write!(f, "({})", subquery),
|
||||
}
|
||||
}
|
||||
|
|
@ -182,8 +180,7 @@ impl Display for MeasurementSelection {
|
|||
impl Parser for MeasurementSelection {
|
||||
fn parse(i: &str) -> ParseResult<&str, Self> {
|
||||
alt((
|
||||
map(measurement_name_expression, MeasurementSelection::Name),
|
||||
map(regex, MeasurementSelection::Regex),
|
||||
map(qualified_measurement_name, MeasurementSelection::Name),
|
||||
map(
|
||||
delimited(
|
||||
preceded(multispace0, char('(')),
|
||||
|
|
@ -812,7 +809,7 @@ mod test {
|
|||
assert_matches!(got, MeasurementSelection::Name(_));
|
||||
|
||||
let (_, got) = MeasurementSelection::parse("/regex/").unwrap();
|
||||
assert_matches!(got, MeasurementSelection::Regex(_));
|
||||
assert_matches!(got, MeasurementSelection::Name(_));
|
||||
|
||||
let (_, got) = MeasurementSelection::parse("(SELECT foo FROM bar)").unwrap();
|
||||
assert_matches!(got, MeasurementSelection::Subquery(_));
|
||||
|
|
|
|||
|
|
@ -2,24 +2,21 @@
|
|||
//!
|
||||
//! [sql]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-schema/#show-measurements
|
||||
|
||||
use crate::common::{
|
||||
limit_clause, offset_clause, qualified_measurement_name, where_clause, QualifiedMeasurementName,
|
||||
};
|
||||
use crate::expression::conditional::ConditionalExpression;
|
||||
use crate::identifier::{identifier, Identifier};
|
||||
use crate::internal::{expect, ParseResult};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{tag, tag_no_case};
|
||||
use nom::character::complete::{char, multispace0, multispace1};
|
||||
use nom::character::complete::{multispace0, multispace1};
|
||||
use nom::combinator::{map, opt, value};
|
||||
use nom::sequence::tuple;
|
||||
use nom::sequence::{pair, preceded, terminated};
|
||||
use std::fmt;
|
||||
use std::fmt::Formatter;
|
||||
|
||||
use crate::common::{
|
||||
limit_clause, measurement_name_expression, offset_clause, where_clause,
|
||||
MeasurementNameExpression,
|
||||
};
|
||||
use crate::expression::conditional::ConditionalExpression;
|
||||
use crate::identifier::{identifier, Identifier};
|
||||
use crate::string::{regex, Regex};
|
||||
|
||||
/// OnExpression represents an InfluxQL database or retention policy name
|
||||
/// or a wildcard.
|
||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||
|
|
@ -110,18 +107,16 @@ impl fmt::Display for ShowMeasurementsStatement {
|
|||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum MeasurementExpression {
|
||||
Equals(MeasurementNameExpression),
|
||||
Regex(Regex),
|
||||
Equals(QualifiedMeasurementName),
|
||||
Regex(QualifiedMeasurementName),
|
||||
}
|
||||
|
||||
impl fmt::Display for MeasurementExpression {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Equals(ref name) => write!(f, "= {}", name)?,
|
||||
Self::Regex(ref re) => write!(f, "=~ {}", re)?,
|
||||
};
|
||||
|
||||
Ok(())
|
||||
Self::Equals(ref name) => write!(f, "= {}", name),
|
||||
Self::Regex(ref re) => write!(f, "=~ {}", re),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -140,23 +135,15 @@ fn with_measurement_clause(i: &str) -> ParseResult<&str, MeasurementExpression>
|
|||
"expected = or =~",
|
||||
alt((
|
||||
map(
|
||||
tuple((
|
||||
tag("=~"),
|
||||
multispace0,
|
||||
expect("expected regular expression literal", regex),
|
||||
)),
|
||||
|(_, _, regex)| MeasurementExpression::Regex(regex),
|
||||
preceded(pair(tag("=~"), multispace0), qualified_measurement_name),
|
||||
MeasurementExpression::Regex,
|
||||
),
|
||||
map(
|
||||
tuple((
|
||||
char('='),
|
||||
multispace0,
|
||||
expect(
|
||||
"expected measurement name or wildcard",
|
||||
measurement_name_expression,
|
||||
),
|
||||
)),
|
||||
|(_, _, name)| MeasurementExpression::Equals(name),
|
||||
preceded(
|
||||
pair(tag("="), multispace0),
|
||||
expect("expected measurement name", qualified_measurement_name),
|
||||
),
|
||||
MeasurementExpression::Equals,
|
||||
),
|
||||
)),
|
||||
),
|
||||
|
|
@ -200,6 +187,7 @@ pub fn show_measurements(i: &str) -> ParseResult<&str, ShowMeasurementsStatement
|
|||
mod test {
|
||||
use super::*;
|
||||
use crate::assert_expect_error;
|
||||
use crate::common::MeasurementName;
|
||||
use crate::expression::arithmetic::Expr;
|
||||
use assert_matches::assert_matches;
|
||||
|
||||
|
|
@ -232,7 +220,7 @@ mod test {
|
|||
ShowMeasurementsStatement {
|
||||
on_expression: Some(OnExpression::Database("foo".into())),
|
||||
measurement_expression: Some(MeasurementExpression::Equals(
|
||||
MeasurementNameExpression {
|
||||
QualifiedMeasurementName {
|
||||
database: None,
|
||||
retention_policy: None,
|
||||
name: "bar".into(),
|
||||
|
|
@ -255,7 +243,9 @@ mod test {
|
|||
got,
|
||||
ShowMeasurementsStatement {
|
||||
on_expression: Some(OnExpression::Database("foo".into())),
|
||||
measurement_expression: Some(MeasurementExpression::Regex(Regex("bar".into()))),
|
||||
measurement_expression: Some(MeasurementExpression::Regex(
|
||||
QualifiedMeasurementName::new(MeasurementName::Regex("bar".into()))
|
||||
)),
|
||||
condition: Some(Expr::Literal(true.into()).into()),
|
||||
limit: None,
|
||||
offset: None
|
||||
|
|
@ -343,33 +333,50 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_with_measurement_clause() {
|
||||
use crate::common::MeasurementName::*;
|
||||
|
||||
let (_, got) = with_measurement_clause("WITH measurement = foo").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementExpression::Equals(MeasurementNameExpression {
|
||||
database: None,
|
||||
retention_policy: None,
|
||||
name: "foo".into()
|
||||
})
|
||||
MeasurementExpression::Equals(QualifiedMeasurementName::new(Name("foo".into())))
|
||||
);
|
||||
|
||||
let (_, got) = with_measurement_clause("WITH measurement =~ /foo/").unwrap();
|
||||
assert_eq!(got, MeasurementExpression::Regex(Regex("foo".into())));
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementExpression::Regex(QualifiedMeasurementName::new(Regex("foo".into())))
|
||||
);
|
||||
|
||||
// Expressions are still valid when whitespace is omitted
|
||||
|
||||
let (_, got) = with_measurement_clause("WITH measurement=foo..bar").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementExpression::Equals(MeasurementNameExpression {
|
||||
database: Some("foo".into()),
|
||||
retention_policy: None,
|
||||
name: "bar".into()
|
||||
})
|
||||
MeasurementExpression::Equals(QualifiedMeasurementName::new_db(
|
||||
Name("bar".into()),
|
||||
"foo".into()
|
||||
))
|
||||
);
|
||||
|
||||
let (_, got) = with_measurement_clause("WITH measurement=~/foo/").unwrap();
|
||||
assert_eq!(got, MeasurementExpression::Regex(Regex("foo".into())));
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementExpression::Regex(QualifiedMeasurementName::new(Regex("foo".into())))
|
||||
);
|
||||
|
||||
// Quirks of InfluxQL per https://github.com/influxdata/influxdb_iox/issues/5662
|
||||
|
||||
let (_, got) = with_measurement_clause("WITH measurement =~ foo").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementExpression::Regex(QualifiedMeasurementName::new(Name("foo".into())))
|
||||
);
|
||||
|
||||
let (_, got) = with_measurement_clause("WITH measurement = /foo/").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
MeasurementExpression::Equals(QualifiedMeasurementName::new(Regex("foo".into())))
|
||||
);
|
||||
|
||||
// Fallible cases
|
||||
|
||||
|
|
@ -379,28 +386,16 @@ mod test {
|
|||
"invalid WITH clause, expected MEASUREMENT"
|
||||
);
|
||||
|
||||
// Must have a regex for equal regex operator
|
||||
assert_expect_error!(
|
||||
with_measurement_clause("WITH measurement =~ foo"),
|
||||
"expected regular expression literal"
|
||||
);
|
||||
|
||||
// Unsupported regex not equal operator
|
||||
assert_expect_error!(
|
||||
with_measurement_clause("WITH measurement !~ foo"),
|
||||
"expected = or =~"
|
||||
);
|
||||
|
||||
// Must have an identifier for equal operator
|
||||
assert_expect_error!(
|
||||
with_measurement_clause("WITH measurement = /foo/"),
|
||||
"expected measurement name or wildcard"
|
||||
);
|
||||
|
||||
// Must have an identifier
|
||||
assert_expect_error!(
|
||||
with_measurement_clause("WITH measurement = 1"),
|
||||
"expected measurement name or wildcard"
|
||||
"expected measurement name"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,41 +1,12 @@
|
|||
use crate::common::{measurement_name_expression, MeasurementNameExpression, OneOrMore, Parser};
|
||||
use crate::common::{
|
||||
qualified_measurement_name, MeasurementName, OneOrMore, Parser, QualifiedMeasurementName,
|
||||
};
|
||||
use crate::identifier::{identifier, Identifier};
|
||||
use crate::internal::ParseResult;
|
||||
use crate::string::{regex, Regex};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag_no_case;
|
||||
use nom::character::complete::multispace1;
|
||||
use nom::combinator::map;
|
||||
use nom::sequence::{pair, preceded};
|
||||
use std::fmt;
|
||||
use std::fmt::Formatter;
|
||||
|
||||
/// Represents a single measurement selection found in a `FROM` measurement clause.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum MeasurementSelection<T: Parser> {
|
||||
Name(T),
|
||||
Regex(Regex),
|
||||
}
|
||||
|
||||
impl<T: Parser> Parser for MeasurementSelection<T> {
|
||||
fn parse(i: &str) -> ParseResult<&str, Self> {
|
||||
alt((
|
||||
map(T::parse, MeasurementSelection::Name),
|
||||
map(regex, MeasurementSelection::Regex),
|
||||
))(i)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Display + Parser> fmt::Display for MeasurementSelection<T> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Name(ref name) => fmt::Display::fmt(name, f)?,
|
||||
Self::Regex(ref re) => fmt::Display::fmt(re, f)?,
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a `FROM` clause of a `DELETE` or `SHOW` statement.
|
||||
///
|
||||
|
|
@ -43,7 +14,7 @@ impl<T: fmt::Display + Parser> fmt::Display for MeasurementSelection<T> {
|
|||
/// for measurements names.
|
||||
///
|
||||
/// A `FROM` clause for a number of `SHOW` statements can accept a 3-part measurement name or
|
||||
pub type FromMeasurementClause<U> = OneOrMore<MeasurementSelection<U>>;
|
||||
pub type FromMeasurementClause<U> = OneOrMore<U>;
|
||||
|
||||
fn from_clause<T: Parser + fmt::Display>(i: &str) -> ParseResult<&str, FromMeasurementClause<T>> {
|
||||
preceded(
|
||||
|
|
@ -54,9 +25,9 @@ fn from_clause<T: Parser + fmt::Display>(i: &str) -> ParseResult<&str, FromMeasu
|
|||
)(i)
|
||||
}
|
||||
|
||||
impl Parser for MeasurementNameExpression {
|
||||
impl Parser for QualifiedMeasurementName {
|
||||
fn parse(i: &str) -> ParseResult<&str, Self> {
|
||||
measurement_name_expression(i)
|
||||
qualified_measurement_name(i)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -68,10 +39,9 @@ impl Parser for MeasurementNameExpression {
|
|||
/// It is defined by the following EBNF notation:
|
||||
///
|
||||
/// ```text
|
||||
/// from_clause ::= "FROM" measurement_selection ("," measurement_selection)*
|
||||
/// measurement_selection ::= measurement
|
||||
/// from_clause ::= "FROM" qualified_measurement_name ("," qualified_measurement_name)*
|
||||
///
|
||||
/// measurement ::= measurement_name |
|
||||
/// qualified_measurement_name ::= measurement_name |
|
||||
/// ( policy_name "." measurement_name ) |
|
||||
/// ( db_name "." policy_name? "." measurement_name )
|
||||
///
|
||||
|
|
@ -92,7 +62,7 @@ impl Parser for MeasurementNameExpression {
|
|||
/// ```text
|
||||
/// FROM foo, /bar/, some_database..foo, some_retention_policy.foobar
|
||||
/// ```
|
||||
pub type ShowFromClause = FromMeasurementClause<MeasurementNameExpression>;
|
||||
pub type ShowFromClause = FromMeasurementClause<QualifiedMeasurementName>;
|
||||
|
||||
/// Parse a `FROM` clause for various `SHOW` statements.
|
||||
pub fn show_from_clause(i: &str) -> ParseResult<&str, ShowFromClause> {
|
||||
|
|
@ -106,7 +76,7 @@ impl Parser for Identifier {
|
|||
}
|
||||
|
||||
/// Represents a `FROM` clause for a `DELETE` statement.
|
||||
pub type DeleteFromClause = FromMeasurementClause<Identifier>;
|
||||
pub type DeleteFromClause = FromMeasurementClause<MeasurementName>;
|
||||
|
||||
/// Parse a `FROM` clause for a `DELETE` statement.
|
||||
pub fn delete_from_clause(i: &str) -> ParseResult<&str, DeleteFromClause> {
|
||||
|
|
@ -119,49 +89,52 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_show_from_clause() {
|
||||
use crate::simple_from_clause::MeasurementSelection::*;
|
||||
use crate::common::MeasurementName::*;
|
||||
|
||||
let (_, from) = show_from_clause("FROM c").unwrap();
|
||||
assert_eq!(
|
||||
from,
|
||||
ShowFromClause::new(vec![Name(MeasurementNameExpression::new("c".into()))])
|
||||
ShowFromClause::new(vec![QualifiedMeasurementName::new(Name("c".into()))])
|
||||
);
|
||||
|
||||
let (_, from) = show_from_clause("FROM a..c").unwrap();
|
||||
assert_eq!(
|
||||
from,
|
||||
ShowFromClause::new(vec![Name(MeasurementNameExpression::new_db(
|
||||
"c".into(),
|
||||
ShowFromClause::new(vec![QualifiedMeasurementName::new_db(
|
||||
Name("c".into()),
|
||||
"a".into()
|
||||
))])
|
||||
)])
|
||||
);
|
||||
|
||||
let (_, from) = show_from_clause("FROM a.b.c").unwrap();
|
||||
assert_eq!(
|
||||
from,
|
||||
ShowFromClause::new(vec![Name(MeasurementNameExpression::new_db_rp(
|
||||
"c".into(),
|
||||
ShowFromClause::new(vec![QualifiedMeasurementName::new_db_rp(
|
||||
Name("c".into()),
|
||||
"a".into(),
|
||||
"b".into()
|
||||
))])
|
||||
)])
|
||||
);
|
||||
|
||||
let (_, from) = show_from_clause("FROM /reg/").unwrap();
|
||||
assert_eq!(from, ShowFromClause::new(vec![Regex("reg".into())]));
|
||||
assert_eq!(
|
||||
from,
|
||||
ShowFromClause::new(vec![QualifiedMeasurementName::new(Regex("reg".into()))])
|
||||
);
|
||||
|
||||
let (_, from) = show_from_clause("FROM c, /reg/").unwrap();
|
||||
assert_eq!(
|
||||
from,
|
||||
ShowFromClause::new(vec![
|
||||
Name(MeasurementNameExpression::new("c".into())),
|
||||
Regex("reg".into())
|
||||
QualifiedMeasurementName::new(Name("c".into())),
|
||||
QualifiedMeasurementName::new(Regex("reg".into()))
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_from_clause() {
|
||||
use crate::simple_from_clause::MeasurementSelection::*;
|
||||
use crate::common::MeasurementName::*;
|
||||
|
||||
let (_, from) = delete_from_clause("FROM c").unwrap();
|
||||
assert_eq!(from, DeleteFromClause::new(vec![Name("c".into())]));
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::delete::{delete_statement, DeleteStatement};
|
||||
use crate::drop::{drop_statement, DropMeasurementStatement};
|
||||
use crate::explain::{explain_statement, ExplainStatement};
|
||||
use crate::internal::ParseResult;
|
||||
use crate::select::{select_statement, SelectStatement};
|
||||
use crate::show::{show_statement, ShowDatabasesStatement};
|
||||
|
|
@ -19,6 +20,8 @@ pub enum Statement {
|
|||
Delete(Box<DeleteStatement>),
|
||||
/// Represents a `DROP MEASUREMENT` statement.
|
||||
DropMeasurement(Box<DropMeasurementStatement>),
|
||||
/// Represents an `EXPLAIN` statement.
|
||||
Explain(Box<ExplainStatement>),
|
||||
/// Represents a `SELECT` statement.
|
||||
Select(Box<SelectStatement>),
|
||||
/// Represents a `SHOW DATABASES` statement.
|
||||
|
|
@ -40,6 +43,7 @@ impl Display for Statement {
|
|||
match self {
|
||||
Self::Delete(s) => Display::fmt(s, f),
|
||||
Self::DropMeasurement(s) => Display::fmt(s, f),
|
||||
Self::Explain(s) => Display::fmt(s, f),
|
||||
Self::Select(s) => Display::fmt(s, f),
|
||||
Self::ShowDatabases(s) => Display::fmt(s, f),
|
||||
Self::ShowMeasurements(s) => Display::fmt(s, f),
|
||||
|
|
@ -56,6 +60,7 @@ pub fn statement(i: &str) -> ParseResult<&str, Statement> {
|
|||
alt((
|
||||
map(delete_statement, |s| Statement::Delete(Box::new(s))),
|
||||
map(drop_statement, |s| Statement::DropMeasurement(Box::new(s))),
|
||||
map(explain_statement, |s| Statement::Explain(Box::new(s))),
|
||||
map(select_statement, |s| Statement::Select(Box::new(s))),
|
||||
show_statement,
|
||||
))(i)
|
||||
|
|
@ -77,6 +82,10 @@ mod test {
|
|||
let (got, _) = statement("DROP MEASUREMENT foo").unwrap();
|
||||
assert_eq!(got, "");
|
||||
|
||||
// explain_statement combinator
|
||||
let (got, _) = statement("EXPLAIN SELECT * FROM cpu").unwrap();
|
||||
assert_eq!(got, "");
|
||||
|
||||
let (got, _) = statement("SELECT * FROM foo WHERE time > now() - 5m AND host = 'bar' GROUP BY TIME(5m) FILL(previous) ORDER BY time DESC").unwrap();
|
||||
assert_eq!(got, "");
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ ioxd_querier = { path = "../ioxd_querier"}
|
|||
ioxd_router = { path = "../ioxd_router"}
|
||||
ioxd_test = { path = "../ioxd_test"}
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
object_store_metrics = { path = "../object_store_metrics" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
panic_logging = { path = "../panic_logging" }
|
||||
|
|
@ -47,6 +47,8 @@ clap = { version = "4", features = ["derive", "env"] }
|
|||
console-subscriber = { version = "0.1.8", optional = true, features = ["parking_lot"] }
|
||||
dotenvy = "0.15.5"
|
||||
futures = "0.3"
|
||||
futures-util = { version = "0.3" }
|
||||
flate2 = "1.0"
|
||||
hashbrown = "0.12"
|
||||
http = "0.2.8"
|
||||
humantime = "2.1.0"
|
||||
|
|
@ -55,7 +57,7 @@ libc = { version = "0.2" }
|
|||
num_cpus = "1.13.0"
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
rustyline = { version = "10.0", default-features = false }
|
||||
serde_json = "1.0.83"
|
||||
serde_json = "1.0.86"
|
||||
snafu = "0.7"
|
||||
thiserror = "1.0.37"
|
||||
tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ pub enum Error {
|
|||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
enum QueryEngine {
|
||||
/// Run queries against the named database on the remote server
|
||||
/// Run queries against the namespace on the remote server
|
||||
Remote(String),
|
||||
|
||||
/// Run queries against a local `Observer` instance
|
||||
|
|
@ -177,7 +177,7 @@ pub struct Repl {
|
|||
/// Client for running sql
|
||||
flight_client: influxdb_iox_client::flight::Client,
|
||||
|
||||
/// database name against which SQL commands are run
|
||||
/// namespace name against which SQL commands are run
|
||||
query_engine: Option<QueryEngine>,
|
||||
|
||||
/// Formatter to use to format query results
|
||||
|
|
@ -239,8 +239,8 @@ impl Repl {
|
|||
.map_err(|e| println!("{}", e))
|
||||
.ok();
|
||||
}
|
||||
ReplCommand::UseDatabase { db_name } => {
|
||||
self.use_database(db_name);
|
||||
ReplCommand::UseNamespace { db_name } => {
|
||||
self.use_namespace(db_name);
|
||||
}
|
||||
ReplCommand::SqlCommand { sql } => {
|
||||
self.run_sql(sql).await.map_err(|e| println!("{}", e)).ok();
|
||||
|
|
@ -302,18 +302,18 @@ impl Repl {
|
|||
self.print_results(&[record_batch])
|
||||
}
|
||||
|
||||
// Run a command against the currently selected remote database
|
||||
// Run a command against the currently selected remote namespace
|
||||
async fn run_sql(&mut self, sql: String) -> Result<()> {
|
||||
let start = Instant::now();
|
||||
|
||||
let batches = match &mut self.query_engine {
|
||||
None => {
|
||||
println!("Error: no database selected.");
|
||||
println!("Hint: Run USE DATABASE <dbname> to select database");
|
||||
println!("Error: no namespace selected.");
|
||||
println!("Hint: Run USE NAMESPACE <dbname> to select namespace");
|
||||
return Ok(());
|
||||
}
|
||||
Some(QueryEngine::Remote(db_name)) => {
|
||||
info!(%db_name, %sql, "Running sql on remote database");
|
||||
info!(%db_name, %sql, "Running sql on remote namespace");
|
||||
|
||||
scrape_query(&mut self.flight_client, db_name, &sql).await?
|
||||
}
|
||||
|
|
@ -349,9 +349,9 @@ impl Repl {
|
|||
}
|
||||
}
|
||||
|
||||
fn use_database(&mut self, db_name: String) {
|
||||
info!(%db_name, "setting current database");
|
||||
println!("You are now in remote mode, querying database {}", db_name);
|
||||
fn use_namespace(&mut self, db_name: String) {
|
||||
info!(%db_name, "setting current namespace");
|
||||
println!("You are now in remote mode, querying namespace {}", db_name);
|
||||
self.set_query_engine(QueryEngine::Remote(db_name));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ pub enum ReplCommand {
|
|||
ShowNamespaces,
|
||||
Observer,
|
||||
SetFormat { format: String },
|
||||
UseDatabase { db_name: String },
|
||||
UseNamespace { db_name: String },
|
||||
SqlCommand { sql: String },
|
||||
Exit,
|
||||
}
|
||||
|
|
@ -64,18 +64,18 @@ impl TryFrom<&str> for ReplCommand {
|
|||
["observer"] => Ok(Self::Observer),
|
||||
["exit"] => Ok(Self::Exit),
|
||||
["quit"] => Ok(Self::Exit),
|
||||
["use", "database"] => {
|
||||
Err("name not specified. Usage: USE DATABASE <name>".to_string())
|
||||
} // USE DATABASE
|
||||
["use", "database", _name] => {
|
||||
// USE DATABASE <name>
|
||||
Ok(Self::UseDatabase {
|
||||
["use", "namespace"] => {
|
||||
Err("name not specified. Usage: USE NAMESPACE <name>".to_string())
|
||||
} // USE NAMESPACE
|
||||
["use", "namespace", _name] => {
|
||||
// USE namespace <name>
|
||||
Ok(Self::UseNamespace {
|
||||
db_name: raw_commands[2].to_string(),
|
||||
})
|
||||
}
|
||||
["use", _command] => {
|
||||
// USE <name>
|
||||
Ok(Self::UseDatabase {
|
||||
Ok(Self::UseNamespace {
|
||||
db_name: raw_commands[1].to_string(),
|
||||
})
|
||||
}
|
||||
|
|
@ -98,9 +98,9 @@ impl ReplCommand {
|
|||
Available commands (not case sensitive):
|
||||
HELP (this one)
|
||||
|
||||
SHOW NAMESPACES: List databases available on the server
|
||||
SHOW NAMESPACES: List namespaces available on the server
|
||||
|
||||
USE [DATABASE|NAMESPACE] <name>: Set the current remote database to name
|
||||
USE NAMESPACE <name>: Set the current remote namespace to name
|
||||
|
||||
SET FORMAT <format>: Set the output format to Pretty, csv or json
|
||||
|
||||
|
|
@ -108,9 +108,9 @@ OBSERVER: Locally query unified queryable views of remote system tables
|
|||
|
||||
[EXIT | QUIT]: Quit this session and exit the program
|
||||
|
||||
# Examples: use remote database foo
|
||||
SHOW DATABASES;
|
||||
USE DATABASE foo;
|
||||
# Examples: use remote namespace foo
|
||||
SHOW NAMESPACES;
|
||||
USE foo;
|
||||
|
||||
# Basic IOx SQL Primer
|
||||
|
||||
|
|
@ -199,35 +199,35 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn use_database() {
|
||||
let expected = Ok(ReplCommand::UseDatabase {
|
||||
fn use_namespace() {
|
||||
let expected = Ok(ReplCommand::UseNamespace {
|
||||
db_name: "Foo".to_string(),
|
||||
});
|
||||
assert_eq!("use Foo".try_into(), expected);
|
||||
assert_eq!("use Database Foo;".try_into(), expected);
|
||||
assert_eq!("use Database Foo ;".try_into(), expected);
|
||||
assert_eq!(" use Database Foo; ".try_into(), expected);
|
||||
assert_eq!(" use Database Foo; ".try_into(), expected);
|
||||
assert_eq!("use Namespace Foo;".try_into(), expected);
|
||||
assert_eq!("use Namespace Foo ;".try_into(), expected);
|
||||
assert_eq!(" use Namespace Foo; ".try_into(), expected);
|
||||
assert_eq!(" use Namespace Foo; ".try_into(), expected);
|
||||
|
||||
// ensure that database name is case sensitive
|
||||
let expected = Ok(ReplCommand::UseDatabase {
|
||||
// ensure that namespace name is case sensitive
|
||||
let expected = Ok(ReplCommand::UseNamespace {
|
||||
db_name: "FOO".to_string(),
|
||||
});
|
||||
assert_eq!("use FOO".try_into(), expected);
|
||||
assert_eq!("use DATABASE FOO;".try_into(), expected);
|
||||
assert_eq!("USE DATABASE FOO;".try_into(), expected);
|
||||
assert_eq!("use NAMESPACE FOO;".try_into(), expected);
|
||||
assert_eq!("USE NAMESPACE FOO;".try_into(), expected);
|
||||
|
||||
let expected: Result<ReplCommand, String> =
|
||||
Err("name not specified. Usage: USE DATABASE <name>".to_string());
|
||||
assert_eq!("use Database;".try_into(), expected);
|
||||
assert_eq!("use DATABASE".try_into(), expected);
|
||||
assert_eq!("use database".try_into(), expected);
|
||||
Err("name not specified. Usage: USE NAMESPACE <name>".to_string());
|
||||
assert_eq!("use Namespace;".try_into(), expected);
|
||||
assert_eq!("use NAMESPACE".try_into(), expected);
|
||||
assert_eq!("use namespace".try_into(), expected);
|
||||
|
||||
let expected = sql_cmd("use database foo bar");
|
||||
assert_eq!("use database foo bar".try_into(), expected);
|
||||
let expected = sql_cmd("use namespace foo bar");
|
||||
assert_eq!("use namespace foo bar".try_into(), expected);
|
||||
|
||||
let expected = sql_cmd("use database foo BAR");
|
||||
assert_eq!("use database foo BAR".try_into(), expected);
|
||||
let expected = sql_cmd("use namespace foo BAR");
|
||||
assert_eq!("use namespace foo BAR".try_into(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,14 @@
|
|||
use futures::StreamExt;
|
||||
use influxdb_iox_client::{connection::Connection, write};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{fs::File, io::Read, path::PathBuf};
|
||||
use observability_deps::tracing::info;
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{BufReader, Read},
|
||||
num::NonZeroUsize,
|
||||
path::PathBuf,
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(Debug, Snafu)]
|
||||
|
|
@ -11,10 +19,30 @@ pub enum Error {
|
|||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error reading files: {:#?}", sources))]
|
||||
ReadingFiles { sources: Vec<Error> },
|
||||
|
||||
#[snafu(display("Client error: {source}"))]
|
||||
ClientError {
|
||||
source: influxdb_iox_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error converting parquet: {}", source))]
|
||||
Conversion {
|
||||
source: parquet_to_line_protocol::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Line protocol was not valid utf8: {}", source))]
|
||||
InvalidUtf8 { source: std::string::FromUtf8Error },
|
||||
|
||||
#[snafu(display("Error decoding gzip {:?}: {}", file_name, source))]
|
||||
Gz {
|
||||
file_name: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Max concurrent uploads must be greater than zero"))]
|
||||
MaxConcurrentUploadsVerfication,
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
|
@ -22,36 +50,176 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|||
/// Write data into the specified database
|
||||
#[derive(Debug, clap::Parser)]
|
||||
pub struct Config {
|
||||
/// If specified, restricts the maxium amount of line protocol
|
||||
/// sent per request to this many bytes. Defaults to 1MB
|
||||
#[clap(action, long, short = 'b', default_value = "1048576")]
|
||||
max_request_payload_size_bytes: usize,
|
||||
|
||||
/// Uploads up to this many http requests at a time. Defaults to 10
|
||||
#[clap(action, long, short = 'c', default_value = "10")]
|
||||
max_concurrent_uploads: usize,
|
||||
|
||||
/// The namespace into which to write
|
||||
#[clap(action)]
|
||||
namespace: String,
|
||||
|
||||
/// File with data to load. Currently supported formats are .lp
|
||||
/// File(s) with data to load. Currently supported formats are .lp (line protocol),
|
||||
/// .parquet (IOx created parquet files), and .gz (gzipped line protocol)
|
||||
#[clap(action)]
|
||||
file_name: PathBuf,
|
||||
file_names: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
||||
let start = Instant::now();
|
||||
|
||||
let Config {
|
||||
namespace,
|
||||
file_name,
|
||||
file_names,
|
||||
max_request_payload_size_bytes,
|
||||
max_concurrent_uploads,
|
||||
} = config;
|
||||
let file_name = &file_name;
|
||||
|
||||
let mut file = File::open(file_name).context(ReadingFileSnafu { file_name })?;
|
||||
let max_concurrent_uploads =
|
||||
NonZeroUsize::new(max_concurrent_uploads).context(MaxConcurrentUploadsVerficationSnafu)?;
|
||||
|
||||
let mut lp_data = String::new();
|
||||
file.read_to_string(&mut lp_data)
|
||||
.context(ReadingFileSnafu { file_name })?;
|
||||
info!(
|
||||
num_files = file_names.len(),
|
||||
max_request_payload_size_bytes, max_concurrent_uploads, "Beginning upload"
|
||||
);
|
||||
|
||||
let mut client = write::Client::new(connection);
|
||||
// first pass is to check that all the files exist and can be
|
||||
// opened and if not fail fast.
|
||||
let file_open_errors: Vec<_> = file_names
|
||||
.iter()
|
||||
.filter_map(|file_name| {
|
||||
File::open(file_name)
|
||||
.context(ReadingFileSnafu { file_name })
|
||||
.err()
|
||||
})
|
||||
.collect();
|
||||
|
||||
ensure!(
|
||||
file_open_errors.is_empty(),
|
||||
ReadingFilesSnafu {
|
||||
sources: file_open_errors
|
||||
}
|
||||
);
|
||||
|
||||
// if everything looked good, go through and read the files out
|
||||
// them potentially in parallel.
|
||||
let lp_stream = futures_util::stream::iter(file_names)
|
||||
.map(|file_name| tokio::task::spawn(slurp_file(file_name)))
|
||||
// Since the contents of each file are buffered into a string,
|
||||
// limit the number that are open at once to the maximum
|
||||
// possible uploads
|
||||
.buffered(max_concurrent_uploads.into())
|
||||
// warn and skip any errors
|
||||
.filter_map(|res| async move {
|
||||
match res {
|
||||
Ok(Ok(lp_data)) => Some(lp_data),
|
||||
Ok(Err(e)) => {
|
||||
eprintln!("WARNING: ignoring error : {}", e);
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("WARNING: ignoring task fail: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut client = write::Client::new(connection)
|
||||
.with_max_concurrent_uploads(max_concurrent_uploads)
|
||||
.with_max_request_payload_size_bytes(Some(max_request_payload_size_bytes));
|
||||
|
||||
let total_bytes = client
|
||||
.write_lp(namespace, lp_data)
|
||||
.write_lp_stream(namespace, lp_stream)
|
||||
.await
|
||||
.context(ClientSnafu)?;
|
||||
|
||||
println!("{} Bytes OK", total_bytes);
|
||||
let elapsed = Instant::now() - start;
|
||||
let mb = (total_bytes as f64) / (1024.0 * 1024.0);
|
||||
let mb_per_sec = (mb / (elapsed.as_millis() as f64)) * (1000.0);
|
||||
println!("{total_bytes} Bytes OK in {elapsed:?}. {mb_per_sec:.2} MB/sec");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reads the contents of `file_name into a string
|
||||
///
|
||||
/// .parquet files --> iox parquet files (convert to parquet)
|
||||
/// .gz --> treated as gzipped line protocol
|
||||
/// .lp (or anything else) --> treated as raw line protocol
|
||||
///
|
||||
async fn slurp_file(file_name: PathBuf) -> Result<String> {
|
||||
let file_name = &file_name;
|
||||
|
||||
let extension = file_name
|
||||
.extension()
|
||||
.map(|extension| extension.to_ascii_lowercase());
|
||||
|
||||
match extension {
|
||||
// Transform parquet to line protocol prior to upload
|
||||
// Not the most efficient process, but it is expedient
|
||||
Some(extension) if extension.to_string_lossy() == "parquet" => {
|
||||
let mut lp_data = vec![];
|
||||
parquet_to_line_protocol::convert_file(file_name, &mut lp_data)
|
||||
.await
|
||||
.context(ConversionSnafu)?;
|
||||
|
||||
let lp_data = String::from_utf8(lp_data).context(InvalidUtf8Snafu)?;
|
||||
info!(
|
||||
?file_name,
|
||||
file_size_bytes = lp_data.len(),
|
||||
"Buffered line protocol from parquet file"
|
||||
);
|
||||
Ok(lp_data)
|
||||
}
|
||||
// decompress as gz
|
||||
Some(extension) if extension.to_string_lossy() == "gz" => {
|
||||
let mut lp_data = String::new();
|
||||
let reader =
|
||||
BufReader::new(File::open(&file_name).context(ReadingFileSnafu { file_name })?);
|
||||
|
||||
flate2::read::GzDecoder::new(reader)
|
||||
.read_to_string(&mut lp_data)
|
||||
.context(GzSnafu { file_name })?;
|
||||
|
||||
info!(
|
||||
?file_name,
|
||||
file_size_bytes = lp_data.len(),
|
||||
"Buffered line protocol from gzipped line protocol file"
|
||||
);
|
||||
Ok(lp_data)
|
||||
}
|
||||
// anything else, treat as line protocol
|
||||
Some(_) | None => {
|
||||
let lp_data =
|
||||
std::fs::read_to_string(file_name).context(ReadingFileSnafu { file_name })?;
|
||||
|
||||
info!(
|
||||
?file_name,
|
||||
file_size_bytes = lp_data.len(),
|
||||
"Buffered line protocol file"
|
||||
);
|
||||
Ok(lp_data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use clap::Parser;
|
||||
use influxdb_iox_client::write::DEFAULT_MAX_REQUEST_PAYLOAD_SIZE_BYTES;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn command_default_is_same_as_client_default() {
|
||||
let config = Config::try_parse_from(vec!["my_db", "file1"]).unwrap();
|
||||
assert_eq!(
|
||||
Some(config.max_request_payload_size_bytes),
|
||||
DEFAULT_MAX_REQUEST_PAYLOAD_SIZE_BYTES
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ use predicates::prelude::*;
|
|||
use serde_json::Value;
|
||||
use std::time::{Duration, Instant};
|
||||
use tempfile::tempdir;
|
||||
use test_helpers::make_temp_file;
|
||||
use test_helpers_end_to_end::{
|
||||
maybe_skip_integration, AddAddrEnv, BindAddresses, MiniCluster, ServerType, Step, StepTest,
|
||||
StepTestState,
|
||||
|
|
@ -526,9 +525,6 @@ async fn write_and_query() {
|
|||
vec![
|
||||
Step::Custom(Box::new(|state: &mut StepTestState| {
|
||||
async {
|
||||
// write line protocol to a temp file
|
||||
let lp_file = make_temp_file("m,tag=1 v=2 12345");
|
||||
let lp_file_path = lp_file.path().to_string_lossy().to_string();
|
||||
let router_addr = state.cluster().router().router_http_base().to_string();
|
||||
|
||||
let namespace = state.cluster().namespace();
|
||||
|
|
@ -537,53 +533,48 @@ async fn write_and_query() {
|
|||
// Validate the output of the schema CLI command
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("-v")
|
||||
.arg("-h")
|
||||
.arg(&router_addr)
|
||||
.arg("write")
|
||||
.arg(&namespace)
|
||||
.arg(&lp_file_path)
|
||||
// raw line protocol ('h2o_temperature' measurement)
|
||||
.arg("../test_fixtures/lineproto/air_and_water.lp")
|
||||
// gzipped line protocol ('m0')
|
||||
.arg("../test_fixtures/lineproto/read_filter.lp.gz")
|
||||
// iox formatted parquet ('cpu' measurement)
|
||||
.arg("../test_fixtures/cpu.parquet")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(predicate::str::contains("17 Bytes OK"));
|
||||
// this number is the total size of
|
||||
// uncompressed line protocol stored in all
|
||||
// three files
|
||||
.stdout(predicate::str::contains("1137058 Bytes OK"));
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
Step::Custom(Box::new(|state: &mut StepTestState| {
|
||||
async {
|
||||
let querier_addr = state.cluster().querier().querier_grpc_base().to_string();
|
||||
let namespace = state.cluster().namespace();
|
||||
// data from 'air_and_water.lp'
|
||||
wait_for_query_result(
|
||||
state,
|
||||
"SELECT * from h2o_temperature order by time desc limit 10",
|
||||
"| 51.3 | coyote_creek | CA | 55.1 | 1970-01-01T00:00:01.568756160Z |"
|
||||
).await;
|
||||
|
||||
let max_wait_time = Duration::from_secs(10);
|
||||
let expected = "| 1 | 1970-01-01T00:00:00.000012345Z | 2 |";
|
||||
println!("Waiting for {expected}");
|
||||
// data from 'read_filter.lp.gz'
|
||||
wait_for_query_result(
|
||||
state,
|
||||
"SELECT * from m0 order by time desc limit 10;",
|
||||
"| value1 | value9 | value9 | value49 | value0 | 2021-04-26T13:47:39.727574Z | 1 |"
|
||||
).await;
|
||||
|
||||
// Validate the output of running the query CLI command appears after at most max_wait_time
|
||||
let end = Instant::now() + max_wait_time;
|
||||
while Instant::now() < end {
|
||||
let maybe_result = Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("-h")
|
||||
.arg(&querier_addr)
|
||||
.arg("query")
|
||||
.arg(&namespace)
|
||||
.arg("SELECT * from m")
|
||||
.assert()
|
||||
.success()
|
||||
.try_stdout(predicate::str::contains(expected));
|
||||
|
||||
match maybe_result {
|
||||
Err(e) => {
|
||||
println!("Got err: {}, retrying", e);
|
||||
}
|
||||
Ok(r) => {
|
||||
println!("Success: {:?}", r);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// sleep and try again
|
||||
tokio::time::sleep(Duration::from_millis(500)).await
|
||||
}
|
||||
panic!("Did not find expected output in allotted time");
|
||||
// data from 'cpu.parquet'
|
||||
wait_for_query_result(
|
||||
state,
|
||||
"SELECT * from cpu where cpu = 'cpu2' order by time desc limit 10",
|
||||
"cpu2 | MacBook-Pro-8.hsd1.ma.comcast.net | 2022-09-30T12:55:00Z"
|
||||
).await;
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
|
|
@ -593,6 +584,53 @@ async fn write_and_query() {
|
|||
.await
|
||||
}
|
||||
|
||||
/// Runs the specified query in a loop for up to 10 seconds, waiting
|
||||
/// for the specified output to appear
|
||||
async fn wait_for_query_result(state: &mut StepTestState<'_>, query_sql: &str, expected: &str) {
|
||||
let querier_addr = state.cluster().querier().querier_grpc_base().to_string();
|
||||
let namespace = state.cluster().namespace();
|
||||
|
||||
let max_wait_time = Duration::from_secs(10);
|
||||
println!("Waiting for {expected}");
|
||||
|
||||
// Validate the output of running the query CLI command appears after at most max_wait_time
|
||||
let end = Instant::now() + max_wait_time;
|
||||
while Instant::now() < end {
|
||||
let assert = Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.arg("-h")
|
||||
.arg(&querier_addr)
|
||||
.arg("query")
|
||||
.arg(&namespace)
|
||||
.arg(query_sql)
|
||||
.assert();
|
||||
|
||||
let assert = match assert.try_success() {
|
||||
Err(e) => {
|
||||
println!("Got err running command: {}, retrying", e);
|
||||
continue;
|
||||
}
|
||||
Ok(a) => a,
|
||||
};
|
||||
|
||||
match assert.try_stdout(predicate::str::contains(expected)) {
|
||||
Err(e) => {
|
||||
println!("No match: {}, retrying", e);
|
||||
}
|
||||
Ok(r) => {
|
||||
println!("Success: {:?}", r);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// sleep and try again
|
||||
tokio::time::sleep(Duration::from_secs(1)).await
|
||||
}
|
||||
panic!(
|
||||
"Did not find expected output {} within {:?}",
|
||||
expected, max_wait_time
|
||||
);
|
||||
}
|
||||
|
||||
/// Test the schema cli command
|
||||
#[tokio::test]
|
||||
async fn namespaces_cli() {
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@ async fn ingester_flight_api() {
|
|||
partition_id,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None
|
||||
})
|
||||
},
|
||||
);
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ use futures::FutureExt;
|
|||
use predicates::prelude::*;
|
||||
use test_helpers::assert_contains;
|
||||
use test_helpers_end_to_end::{
|
||||
maybe_skip_integration, run_query, MiniCluster, Step, StepTest, StepTestState, TestConfig,
|
||||
maybe_skip_integration, run_query, try_run_query, GrpcRequestBuilder, MiniCluster, Step,
|
||||
StepTest, StepTestState, TestConfig,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -454,6 +455,87 @@ async fn issue_4631_b() {
|
|||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn oom_protection() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let table_name = "the_table";
|
||||
|
||||
// Set up the cluster ====================================
|
||||
let router_config = TestConfig::new_router(&database_url);
|
||||
let ingester_config = TestConfig::new_ingester(&router_config);
|
||||
let querier_config =
|
||||
TestConfig::new_querier(&ingester_config).with_querier_max_table_query_bytes(1);
|
||||
let mut cluster = MiniCluster::new()
|
||||
.with_router(router_config)
|
||||
.await
|
||||
.with_ingester(ingester_config)
|
||||
.await
|
||||
.with_querier(querier_config)
|
||||
.await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
vec![
|
||||
Step::WriteLineProtocol(format!("{},tag1=A,tag2=B val=42i 123457", table_name)),
|
||||
Step::WaitForReadable,
|
||||
Step::AssertNotPersisted,
|
||||
// SQL query
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let sql = format!("select * from {}", table_name);
|
||||
let err = try_run_query(
|
||||
sql,
|
||||
state.cluster().namespace(),
|
||||
state.cluster().querier().querier_grpc_connection(),
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
if let influxdb_iox_client::flight::Error::GrpcError(status) = err {
|
||||
assert_eq!(
|
||||
status.code(),
|
||||
tonic::Code::ResourceExhausted,
|
||||
"Wrong status code: {}\n\nStatus:\n{}",
|
||||
status.code(),
|
||||
status,
|
||||
);
|
||||
} else {
|
||||
panic!("Not a gRPC error: {err}");
|
||||
}
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
// InfluxRPC/storage query
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let mut storage_client = state.cluster().querier_storage_client();
|
||||
|
||||
let read_filter_request = GrpcRequestBuilder::new()
|
||||
.source(state.cluster())
|
||||
.build_read_filter();
|
||||
|
||||
let status = storage_client
|
||||
.read_filter(read_filter_request)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert_eq!(
|
||||
status.code(),
|
||||
tonic::Code::ResourceExhausted,
|
||||
"Wrong status code: {}\n\nStatus:\n{}",
|
||||
status.code(),
|
||||
status,
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
/// This structure holds information for tests that need to force a parquet file to be persisted
|
||||
struct ForcePersistenceSetup {
|
||||
// Set up a cluster that will will persist quickly
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ format = ["arrow", "arrow_util"]
|
|||
# Workspace dependencies, in alphabetical order
|
||||
arrow_util = { path = "../arrow_util", optional = true }
|
||||
client_util = { path = "../client_util" }
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol"}
|
||||
generated_types = { path = "../generated_types", default-features = false, features = ["data_types_conversions"] }
|
||||
|
||||
# Crates.io dependencies, in alphabetical order
|
||||
|
|
@ -23,9 +24,7 @@ futures-util = { version = "0.3", optional = true }
|
|||
prost = "0.11"
|
||||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
|
||||
tokio = { version = "1.21", features = ["macros", "parking_lot", "rt-multi-thread"] }
|
||||
tokio-stream = "0.1.11"
|
||||
thiserror = "1.0.37"
|
||||
tonic = { version = "0.8" }
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
tokio = { version = "1.21", features = ["macros", "parking_lot", "rt-multi-thread"] }
|
||||
mockito = "0.31"
|
||||
|
|
@ -1,15 +1,16 @@
|
|||
/// Re-export generated_types
|
||||
pub mod generated_types {
|
||||
pub use generated_types::influxdata::pbdata::v1::*;
|
||||
}
|
||||
use std::{fmt::Debug, num::NonZeroUsize, sync::Arc};
|
||||
|
||||
use client_util::{connection::HttpConnection, namespace_translation::split_namespace};
|
||||
use futures_util::{future::BoxFuture, FutureExt, Stream, StreamExt, TryStreamExt};
|
||||
|
||||
use crate::{
|
||||
connection::Connection,
|
||||
error::{translate_response, Error},
|
||||
};
|
||||
use reqwest::Method;
|
||||
use reqwest::{Body, Method};
|
||||
|
||||
/// The default value for the maximum size of each request, in bytes
|
||||
pub const DEFAULT_MAX_REQUEST_PAYLOAD_SIZE_BYTES: Option<usize> = Some(1024 * 1024);
|
||||
|
||||
/// An IOx Write API client.
|
||||
///
|
||||
|
|
@ -37,18 +38,67 @@ use reqwest::Method;
|
|||
/// ```
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Client {
|
||||
inner: HttpConnection,
|
||||
/// The inner client used to actually make requests.
|
||||
///
|
||||
/// Uses a trait for test mocking.
|
||||
///
|
||||
/// Does not expose the trait in the `Client` type to avoid
|
||||
/// exposing an internal implementation detail (the trait) in the
|
||||
/// public interface.
|
||||
inner: Arc<dyn RequestMaker>,
|
||||
|
||||
/// If `Some`, restricts the maximum amount of line protocol
|
||||
/// sent per request to this many bytes. If `None`, does not restrict
|
||||
/// the amount sent per request. Defaults to `Some(1MB)`
|
||||
///
|
||||
/// Splitting the upload size consumes a non trivial amount of CPU
|
||||
/// to find line protocol boundaries. This can be disabled by
|
||||
/// setting `max_request_payload_size_bytes` to `None`.
|
||||
max_request_payload_size_bytes: Option<usize>,
|
||||
|
||||
/// Makes this many concurrent requests at a time. Defaults to 1
|
||||
max_concurrent_uploads: NonZeroUsize,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
/// Creates a new client with the provided connection
|
||||
pub fn new(connection: Connection) -> Self {
|
||||
Self::new_with_maker(Arc::new(connection.into_http_connection()))
|
||||
}
|
||||
|
||||
/// Creates a new client with the provided request maker
|
||||
fn new_with_maker(inner: Arc<dyn RequestMaker>) -> Self {
|
||||
Self {
|
||||
inner: connection.into_http_connection(),
|
||||
inner,
|
||||
max_request_payload_size_bytes: DEFAULT_MAX_REQUEST_PAYLOAD_SIZE_BYTES,
|
||||
max_concurrent_uploads: NonZeroUsize::new(1).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the [LineProtocol] formatted data in `lp_data` to
|
||||
/// Override the default of sending 1MB of line protocol per request.
|
||||
/// If `Some` is specified, restricts the maximum amount of line protocol
|
||||
/// sent per request to this many bytes. If `None`, does not restrict the amount of
|
||||
/// line protocol sent per request.
|
||||
pub fn with_max_request_payload_size_bytes(
|
||||
self,
|
||||
max_request_payload_size_bytes: Option<usize>,
|
||||
) -> Self {
|
||||
Self {
|
||||
max_request_payload_size_bytes,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// The client makes this many concurrent uploads at a
|
||||
/// time. Defaults to 1.
|
||||
pub fn with_max_concurrent_uploads(self, max_concurrent_uploads: NonZeroUsize) -> Self {
|
||||
Self {
|
||||
max_concurrent_uploads,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the [LineProtocol] formatted string in `lp_data` to
|
||||
/// namespace `namespace`.
|
||||
///
|
||||
/// Returns the number of bytes which were written to the database
|
||||
|
|
@ -59,11 +109,24 @@ impl Client {
|
|||
namespace: impl AsRef<str> + Send,
|
||||
lp_data: impl Into<String> + Send,
|
||||
) -> Result<usize, Error> {
|
||||
let lp_data = lp_data.into();
|
||||
let data_len = lp_data.len();
|
||||
let sources = futures_util::stream::iter([lp_data.into()]);
|
||||
|
||||
let write_url = format!("{}api/v2/write", self.inner.uri());
|
||||
self.write_lp_stream(namespace, sources).await
|
||||
}
|
||||
|
||||
/// Write the stream of [LineProtocol] formatted strings in
|
||||
/// `sources` to namespace `namespace`. It is assumed that
|
||||
/// individual lines (points) do not cross these strings
|
||||
///
|
||||
/// Returns the number of bytes, in total, which were written to
|
||||
/// the database
|
||||
///
|
||||
/// [LineProtocol]: https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/#data-types-and-format
|
||||
pub async fn write_lp_stream(
|
||||
&mut self,
|
||||
namespace: impl AsRef<str> + Send,
|
||||
sources: impl Stream<Item = String> + Send,
|
||||
) -> Result<usize, Error> {
|
||||
let (org_id, bucket_id) = split_namespace(namespace.as_ref()).map_err(|e| {
|
||||
Error::invalid_argument(
|
||||
"namespace",
|
||||
|
|
@ -71,47 +134,302 @@ impl Client {
|
|||
)
|
||||
})?;
|
||||
|
||||
let response = self
|
||||
.inner
|
||||
.client()
|
||||
.request(Method::POST, &write_url)
|
||||
.query(&[("bucket", bucket_id), ("org", org_id)])
|
||||
.body(lp_data)
|
||||
.send()
|
||||
let max_concurrent_uploads: usize = self.max_concurrent_uploads.into();
|
||||
let max_request_payload_size_bytes = self.max_request_payload_size_bytes;
|
||||
|
||||
// make a stream and process in parallel
|
||||
let results = sources
|
||||
// split each input source in parallel, if possible
|
||||
.flat_map(|source| {
|
||||
split_lp(
|
||||
source,
|
||||
max_request_payload_size_bytes,
|
||||
max_concurrent_uploads,
|
||||
)
|
||||
})
|
||||
// do the actual write
|
||||
.map(|source| {
|
||||
let org_id = org_id.to_string();
|
||||
let bucket_id = bucket_id.to_string();
|
||||
let inner = Arc::clone(&self.inner);
|
||||
|
||||
tokio::task::spawn(
|
||||
async move { inner.write_source(org_id, bucket_id, source).await },
|
||||
)
|
||||
})
|
||||
// Do the uploads in parallel
|
||||
.buffered(max_concurrent_uploads)
|
||||
.try_collect::<Vec<_>>()
|
||||
// handle panics in tasks
|
||||
.await
|
||||
.map_err(Error::client)?;
|
||||
.map_err(Error::client)?
|
||||
// find / return any errors
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
|
||||
translate_response(response).await?;
|
||||
Ok(results.into_iter().sum())
|
||||
}
|
||||
}
|
||||
|
||||
Ok(data_len)
|
||||
/// Something that knows how to send http data. Exists so it can be
|
||||
/// mocked out for testing
|
||||
trait RequestMaker: Debug + Send + Sync {
|
||||
/// Write the body data to the specified org, bucket, and
|
||||
/// returning the number of bytes written
|
||||
///
|
||||
/// (this is implemented manually to avoid `async_trait`)
|
||||
fn write_source(
|
||||
&self,
|
||||
org_id: String,
|
||||
bucket_id: String,
|
||||
body: String,
|
||||
) -> BoxFuture<'_, Result<usize, Error>>;
|
||||
}
|
||||
|
||||
impl RequestMaker for HttpConnection {
|
||||
fn write_source(
|
||||
&self,
|
||||
org_id: String,
|
||||
bucket_id: String,
|
||||
body: String,
|
||||
) -> BoxFuture<'_, Result<usize, Error>> {
|
||||
let write_url = format!("{}api/v2/write", self.uri());
|
||||
|
||||
async move {
|
||||
let body: Body = body.into();
|
||||
|
||||
let data_len = body.as_bytes().map(|b| b.len()).unwrap_or(0);
|
||||
|
||||
let response = self
|
||||
.client()
|
||||
.request(Method::POST, &write_url)
|
||||
.query(&[("bucket", bucket_id), ("org", org_id)])
|
||||
.body(body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(Error::client)?;
|
||||
|
||||
translate_response(response).await?;
|
||||
|
||||
Ok(data_len)
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
/// splits input line protocol into one or more sizes of at most
|
||||
/// `max_chunk` on line breaks in a separte tokio task
|
||||
fn split_lp(
|
||||
input: String,
|
||||
max_chunk_size: Option<usize>,
|
||||
max_concurrent_uploads: usize,
|
||||
) -> impl Stream<Item = String> {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(max_concurrent_uploads);
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
match max_chunk_size {
|
||||
None => {
|
||||
// ignore errors (means the receiver hung up but nothing to communicate
|
||||
tx.send(input).await.ok();
|
||||
}
|
||||
Some(max_chunk_size) => {
|
||||
// use the actual line protocol parser to split on valid boundaries
|
||||
let mut acc = LineAccumulator::new(max_chunk_size);
|
||||
for l in influxdb_line_protocol::split_lines(&input) {
|
||||
if let Some(chunk) = acc.push(l) {
|
||||
// abort if receiver has hungup
|
||||
if tx.send(chunk).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(chunk) = acc.flush() {
|
||||
tx.send(chunk).await.ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
tokio_stream::wrappers::ReceiverStream::new(rx)
|
||||
}
|
||||
#[derive(Debug)]
|
||||
struct LineAccumulator {
|
||||
current_chunk: String,
|
||||
max_chunk_size: usize,
|
||||
}
|
||||
|
||||
impl LineAccumulator {
|
||||
fn new(max_chunk_size: usize) -> Self {
|
||||
Self {
|
||||
current_chunk: String::with_capacity(max_chunk_size),
|
||||
max_chunk_size,
|
||||
}
|
||||
}
|
||||
|
||||
// Add data `l` to the current chunk being created, returning the
|
||||
// current chunk if complete.
|
||||
fn push(&mut self, l: &str) -> Option<String> {
|
||||
let chunk = if self.current_chunk.len() + l.len() + 1 > self.max_chunk_size {
|
||||
self.flush()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if !self.current_chunk.is_empty() {
|
||||
self.current_chunk += "\n";
|
||||
}
|
||||
|
||||
self.current_chunk += l;
|
||||
chunk
|
||||
}
|
||||
|
||||
/// allocate a new chunk with the right size, returning the currently built chunk if it has non zero length
|
||||
/// `self.current_chunk.len()` is zero
|
||||
fn flush(&mut self) -> Option<String> {
|
||||
if !self.current_chunk.is_empty() {
|
||||
let mut new_chunk = String::with_capacity(self.max_chunk_size);
|
||||
std::mem::swap(&mut new_chunk, &mut self.current_chunk);
|
||||
Some(new_chunk)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Mutex;
|
||||
|
||||
use super::*;
|
||||
use crate::connection::Builder;
|
||||
|
||||
#[tokio::test]
|
||||
/// Ensure the basic plumbing is hooked up correctly
|
||||
async fn basic() {
|
||||
let url = mockito::server_url();
|
||||
|
||||
let connection = Builder::new().build(&url).await.unwrap();
|
||||
async fn test() {
|
||||
let mock = Arc::new(MockRequestMaker::new());
|
||||
|
||||
let namespace = "orgname_bucketname";
|
||||
let data = "m,t=foo f=4";
|
||||
|
||||
let m = mockito::mock("POST", "/api/v2/write?bucket=bucketname&org=orgname")
|
||||
.with_status(201)
|
||||
.match_body(data)
|
||||
.create();
|
||||
let expected = vec![MockRequest {
|
||||
org_id: "orgname".into(),
|
||||
bucket_id: "bucketname".into(),
|
||||
body: data.into(),
|
||||
}];
|
||||
|
||||
let res = Client::new(connection).write_lp(namespace, data).await;
|
||||
|
||||
m.assert();
|
||||
|
||||
let num_bytes = res.expect("Error making write request");
|
||||
let num_bytes = Client::new_with_maker(Arc::clone(&mock) as _)
|
||||
.write_lp(namespace, data)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected, mock.requests());
|
||||
assert_eq!(num_bytes, 11);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_max_request_payload_size() {
|
||||
let mock = Arc::new(MockRequestMaker::new());
|
||||
|
||||
let namespace = "orgname_bucketname";
|
||||
let data = "m,t=foo f=4\n\
|
||||
m,t=bar f=3\n\
|
||||
m,t=fooddddddd f=4";
|
||||
|
||||
// expect the data to be broken up into two chunks:
|
||||
let expected = vec![
|
||||
MockRequest {
|
||||
org_id: "orgname".into(),
|
||||
bucket_id: "bucketname".into(),
|
||||
body: "m,t=foo f=4\nm,t=bar f=3".into(),
|
||||
},
|
||||
MockRequest {
|
||||
org_id: "orgname".into(),
|
||||
bucket_id: "bucketname".into(),
|
||||
body: "m,t=fooddddddd f=4".into(),
|
||||
},
|
||||
];
|
||||
|
||||
let num_bytes = Client::new_with_maker(Arc::clone(&mock) as _)
|
||||
// enough to get first two lines, but not last
|
||||
.with_max_request_payload_size_bytes(Some(30))
|
||||
.write_lp(namespace, data)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected, mock.requests());
|
||||
assert_eq!(num_bytes, 41);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_lp_stream() {
|
||||
let mock = Arc::new(MockRequestMaker::new());
|
||||
|
||||
let namespace = "orgname_bucketname";
|
||||
let data = futures_util::stream::iter(
|
||||
vec!["m,t=foo f=4", "m,t=bar f=3"]
|
||||
.into_iter()
|
||||
.map(|s| s.to_string()),
|
||||
);
|
||||
|
||||
// expect the data to come in two chunks
|
||||
let expected = vec![
|
||||
MockRequest {
|
||||
org_id: "orgname".into(),
|
||||
bucket_id: "bucketname".into(),
|
||||
body: "m,t=foo f=4".into(),
|
||||
},
|
||||
MockRequest {
|
||||
org_id: "orgname".into(),
|
||||
bucket_id: "bucketname".into(),
|
||||
body: "m,t=bar f=3".into(),
|
||||
},
|
||||
];
|
||||
|
||||
let num_bytes = Client::new_with_maker(Arc::clone(&mock) as _)
|
||||
.write_lp_stream(namespace, data)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected, mock.requests());
|
||||
assert_eq!(num_bytes, 22);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
struct MockRequest {
|
||||
org_id: String,
|
||||
bucket_id: String,
|
||||
body: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MockRequestMaker {
|
||||
requests: Mutex<Vec<MockRequest>>,
|
||||
}
|
||||
|
||||
impl MockRequestMaker {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
requests: Mutex::new(vec![]),
|
||||
}
|
||||
}
|
||||
|
||||
/// get a copy of the requests that were made using this mock
|
||||
fn requests(&self) -> Vec<MockRequest> {
|
||||
self.requests.lock().unwrap().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl RequestMaker for MockRequestMaker {
|
||||
fn write_source(
|
||||
&self,
|
||||
org_id: String,
|
||||
bucket_id: String,
|
||||
body: String,
|
||||
) -> BoxFuture<'_, Result<usize, Error>> {
|
||||
let sz = body.len();
|
||||
|
||||
self.requests.lock().unwrap().push(MockRequest {
|
||||
org_id,
|
||||
bucket_id,
|
||||
body,
|
||||
});
|
||||
|
||||
async move { Ok(sz) }.boxed()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ ffi = ["libc"]
|
|||
bytes = "1.2"
|
||||
libc = { version = "0.2", optional = true }
|
||||
nom = { version = "7", default-features = false, features = ["std"] }
|
||||
smallvec = { version = "1.9.0", features = ["union"] }
|
||||
smallvec = { version = "1.10.0", features = ["union"] }
|
||||
snafu = "0.7"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
|||
|
|
@ -529,7 +529,7 @@ pub fn parse_lines(input: &str) -> impl Iterator<Item = Result<ParsedLine<'_>>>
|
|||
/// logic duplication for scanning fields, duplicating it also means
|
||||
/// we can be more sure of the compatibility of the rust parser and
|
||||
/// the canonical Go parser.
|
||||
fn split_lines(input: &str) -> impl Iterator<Item = &str> {
|
||||
pub fn split_lines(input: &str) -> impl Iterator<Item = &str> {
|
||||
// NB: This is ported as closely as possibly from the original Go code:
|
||||
let mut quoted = false;
|
||||
let mut fields = false;
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ version = "0.1.0"
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
sqlparser = "0.24.0"
|
||||
snafu = "0.7.1"
|
||||
sqlparser = "0.25.0"
|
||||
snafu = "0.7.2"
|
||||
|
||||
generated_types = { path = "../generated_types" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
@ -24,7 +24,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
metric = { path = "../metric" }
|
||||
mutable_batch = { path = "../mutable_batch"}
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.12"
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
|
|
@ -45,6 +45,7 @@ write_buffer = { path = "../write_buffer" }
|
|||
write_summary = { path = "../write_summary" }
|
||||
tokio-util = { version = "0.7.4" }
|
||||
trace = { path = "../trace" }
|
||||
rand = "0.8.5"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.5.0"
|
||||
|
|
@ -52,4 +53,4 @@ bitflags = {version = "1.3.2"}
|
|||
once_cell = "1"
|
||||
paste = "1.0.9"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
tokio-stream = {version = "0.1.10", default_features = false }
|
||||
tokio-stream = {version = "0.1.11", default_features = false }
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use crate::{data::partition::PersistingBatch, query::QueryableBatch};
|
|||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(missing_copy_implementations, missing_docs)]
|
||||
pub enum Error {
|
||||
pub(crate) enum Error {
|
||||
#[snafu(display("Error while building logical plan for Ingester's compaction"))]
|
||||
LogicalPlan {
|
||||
source: iox_query::frontend::reorg::Error,
|
||||
|
|
@ -86,11 +86,8 @@ pub(crate) async fn compact_persisting_batch(
|
|||
namespace_id: i64,
|
||||
partition_info: &PartitionInfo,
|
||||
batch: Arc<PersistingBatch>,
|
||||
) -> Result<Option<CompactedStream>> {
|
||||
// Nothing to compact
|
||||
if batch.data.data.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
) -> Result<CompactedStream> {
|
||||
assert!(!batch.data.data.is_empty());
|
||||
|
||||
let namespace_name = &partition_info.namespace_name;
|
||||
let table_name = &partition_info.table_name;
|
||||
|
|
@ -141,11 +138,11 @@ pub(crate) async fn compact_persisting_batch(
|
|||
sort_key: Some(metadata_sort_key),
|
||||
};
|
||||
|
||||
Ok(Some(CompactedStream {
|
||||
Ok(CompactedStream {
|
||||
stream,
|
||||
iox_metadata,
|
||||
sort_key_update,
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
||||
/// Compact a given Queryable Batch
|
||||
|
|
@ -192,8 +189,8 @@ mod tests {
|
|||
create_batches_with_influxtype_same_columns_different_type,
|
||||
create_one_record_batch_with_influxtype_duplicates,
|
||||
create_one_record_batch_with_influxtype_no_duplicates,
|
||||
create_one_row_record_batch_with_influxtype, create_tombstone, make_meta,
|
||||
make_persisting_batch, make_queryable_batch, make_queryable_batch_with_deletes,
|
||||
create_one_row_record_batch_with_influxtype, make_meta, make_persisting_batch,
|
||||
make_queryable_batch,
|
||||
};
|
||||
|
||||
// this test was added to guard against https://github.com/influxdata/influxdb_iox/issues/3782
|
||||
|
|
@ -226,7 +223,6 @@ mod tests {
|
|||
partition_id,
|
||||
uuid,
|
||||
batches,
|
||||
vec![],
|
||||
);
|
||||
|
||||
// verify PK
|
||||
|
|
@ -254,7 +250,6 @@ mod tests {
|
|||
let CompactedStream { stream, .. } =
|
||||
compact_persisting_batch(time_provider, &exc, 1, &partition_info, persisting_batch)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
|
|
@ -297,7 +292,6 @@ mod tests {
|
|||
partition_id,
|
||||
uuid,
|
||||
batches,
|
||||
vec![],
|
||||
);
|
||||
|
||||
// verify PK
|
||||
|
|
@ -328,7 +322,6 @@ mod tests {
|
|||
sort_key_update,
|
||||
} = compact_persisting_batch(time_provider, &exc, 1, &partition_info, persisting_batch)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
|
|
@ -394,7 +387,6 @@ mod tests {
|
|||
partition_id,
|
||||
uuid,
|
||||
batches,
|
||||
vec![],
|
||||
);
|
||||
|
||||
// verify PK
|
||||
|
|
@ -426,7 +418,6 @@ mod tests {
|
|||
sort_key_update,
|
||||
} = compact_persisting_batch(time_provider, &exc, 1, &partition_info, persisting_batch)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
|
|
@ -494,7 +485,6 @@ mod tests {
|
|||
partition_id,
|
||||
uuid,
|
||||
batches,
|
||||
vec![],
|
||||
);
|
||||
|
||||
// verify PK
|
||||
|
|
@ -527,7 +517,6 @@ mod tests {
|
|||
sort_key_update,
|
||||
} = compact_persisting_batch(time_provider, &exc, 1, &partition_info, persisting_batch)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
|
|
@ -595,7 +584,6 @@ mod tests {
|
|||
partition_id,
|
||||
uuid,
|
||||
batches,
|
||||
vec![],
|
||||
);
|
||||
|
||||
// verify PK
|
||||
|
|
@ -629,7 +617,6 @@ mod tests {
|
|||
sort_key_update,
|
||||
} = compact_persisting_batch(time_provider, &exc, 1, &partition_info, persisting_batch)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
|
|
@ -700,7 +687,6 @@ mod tests {
|
|||
partition_id,
|
||||
uuid,
|
||||
batches,
|
||||
vec![],
|
||||
);
|
||||
|
||||
// verify PK
|
||||
|
|
@ -739,7 +725,6 @@ mod tests {
|
|||
sort_key_update,
|
||||
} = compact_persisting_batch(time_provider, &exc, 1, &partition_info, persisting_batch)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
|
|
@ -825,54 +810,6 @@ mod tests {
|
|||
assert_batches_eq!(&expected, &output_batches);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_one_batch_no_dupilcates_with_deletes() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// create input data
|
||||
let batches = create_one_record_batch_with_influxtype_no_duplicates().await;
|
||||
let tombstones = vec![create_tombstone(1, 1, 1, 1, 0, 200000, "tag1=UT")];
|
||||
|
||||
// build queryable batch from the input batches
|
||||
let compact_batch =
|
||||
make_queryable_batch_with_deletes("test_table", 0, 1, batches, tombstones);
|
||||
|
||||
// verify PK
|
||||
let schema = compact_batch.schema();
|
||||
let pk = schema.primary_key();
|
||||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(
|
||||
&schema,
|
||||
compact_batch.data.iter().map(|sb| sb.data.as_ref()),
|
||||
);
|
||||
assert_eq!(sort_key, SortKey::from_columns(["tag1", "time"]));
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
// verify no empty record batches - bug #3782
|
||||
assert_eq!(output_batches.len(), 2);
|
||||
assert_eq!(output_batches[0].num_rows(), 1);
|
||||
assert_eq!(output_batches[1].num_rows(), 1);
|
||||
|
||||
// verify compacted data
|
||||
// row with "tag1=UT" no longer available
|
||||
let expected = vec![
|
||||
"+-----------+------+-----------------------------+",
|
||||
"| field_int | tag1 | time |",
|
||||
"+-----------+------+-----------------------------+",
|
||||
"| 10 | VT | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 1000 | WA | 1970-01-01T00:00:00.000008Z |",
|
||||
"+-----------+------+-----------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &output_batches);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_one_batch_with_duplicates() {
|
||||
// create input data
|
||||
|
|
@ -1019,23 +956,12 @@ mod tests {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_many_batches_different_columns_different_order_with_duplicates_with_deletes(
|
||||
) {
|
||||
async fn test_compact_many_batches_different_columns_different_order_with_duplicates() {
|
||||
// create many-batches input data
|
||||
let batches = create_batches_with_influxtype_different_columns_different_order().await;
|
||||
let tombstones = vec![create_tombstone(
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
100, // delete's seq_number
|
||||
0, // min time of data to get deleted
|
||||
200000, // max time of data to get deleted
|
||||
"tag2=CT and field_int=1000", // delete predicate
|
||||
)];
|
||||
|
||||
// build queryable batch from the input batches
|
||||
let compact_batch =
|
||||
make_queryable_batch_with_deletes("test_table", 0, 1, batches, tombstones);
|
||||
let compact_batch = make_queryable_batch("test_table", 0, 1, batches);
|
||||
|
||||
// verify PK
|
||||
let schema = compact_batch.schema();
|
||||
|
|
@ -1058,7 +984,6 @@ mod tests {
|
|||
|
||||
// verify compacted data
|
||||
// data is sorted and all duplicates are removed
|
||||
// all rows with ("tag2=CT and field_int=1000") are also removed
|
||||
// CORRECT RESULT
|
||||
let expected = vec![
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
|
|
@ -1067,73 +992,15 @@ mod tests {
|
|||
"| 5 | | AL | 1970-01-01T00:00:00.000005Z |",
|
||||
"| 10 | | AL | 1970-01-01T00:00:00.000007Z |",
|
||||
"| 70 | | CT | 1970-01-01T00:00:00.000000100Z |",
|
||||
"| 1000 | | CT | 1970-01-01T00:00:00.000001Z |",
|
||||
"| 100 | | MA | 1970-01-01T00:00:00.000000050Z |",
|
||||
"| 10 | AL | MA | 1970-01-01T00:00:00.000000050Z |",
|
||||
"| 70 | CT | CT | 1970-01-01T00:00:00.000000100Z |",
|
||||
"| 70 | CT | CT | 1970-01-01T00:00:00.000000500Z |",
|
||||
"| 30 | MT | AL | 1970-01-01T00:00:00.000000005Z |",
|
||||
"| 20 | MT | AL | 1970-01-01T00:00:00.000007Z |",
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(&expected, &output_batches);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_many_batches_different_columns_different_order_with_duplicates_with_many_deletes(
|
||||
) {
|
||||
// create many-batches input data
|
||||
let batches = create_batches_with_influxtype_different_columns_different_order().await;
|
||||
let tombstones = vec![
|
||||
create_tombstone(
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
100, // delete's seq_number
|
||||
0, // min time of data to get deleted
|
||||
200000, // max time of data to get deleted
|
||||
"tag2=CT and field_int=1000", // delete predicate
|
||||
),
|
||||
create_tombstone(
|
||||
1, 1, 1, 101, // delete's seq_number
|
||||
0, // min time of data to get deleted
|
||||
200000, // max time of data to get deleted
|
||||
"tag1!=MT", // delete predicate
|
||||
),
|
||||
];
|
||||
|
||||
// build queryable batch from the input batches
|
||||
let compact_batch =
|
||||
make_queryable_batch_with_deletes("test_table", 0, 1, batches, tombstones);
|
||||
|
||||
// verify PK
|
||||
let schema = compact_batch.schema();
|
||||
let pk = schema.primary_key();
|
||||
let expected_pk = vec!["tag1", "tag2", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key = compute_sort_key(
|
||||
&schema,
|
||||
compact_batch.data.iter().map(|sb| sb.data.as_ref()),
|
||||
);
|
||||
assert_eq!(sort_key, SortKey::from_columns(["tag1", "tag2", "time"]));
|
||||
|
||||
// compact
|
||||
let exc = Executor::new(1);
|
||||
let stream = compact(&exc, compact_batch, sort_key).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// verify compacted data
|
||||
// data is sorted and all duplicates are removed
|
||||
// all rows with ("tag2=CT and field_int=1000") and ("tag1!=MT") are also removed
|
||||
let expected = vec![
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
"| field_int | tag1 | tag2 | time |",
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
"| 30 | MT | AL | 1970-01-01T00:00:00.000000005Z |",
|
||||
"| 20 | MT | AL | 1970-01-01T00:00:00.000007Z |",
|
||||
"| 1000 | MT | CT | 1970-01-01T00:00:00.000001Z |",
|
||||
"| 1000 | MT | CT | 1970-01-01T00:00:00.000002Z |",
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
];
|
||||
|
||||
|
|
@ -1142,31 +1009,12 @@ mod tests {
|
|||
|
||||
// BUG
|
||||
#[tokio::test]
|
||||
async fn test_compact_many_batches_different_columns_different_order_with_duplicates_with_many_deletes_2(
|
||||
) {
|
||||
async fn test_compact_many_batches_different_columns_different_order_with_duplicates2() {
|
||||
// create many-batches input data
|
||||
let batches = create_batches_with_influxtype_different_columns_different_order().await;
|
||||
let tombstones = vec![
|
||||
create_tombstone(
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
100, // delete's seq_number
|
||||
0, // min time of data to get deleted
|
||||
200000, // max time of data to get deleted
|
||||
"tag2=CT and field_int=1000", // delete predicate
|
||||
),
|
||||
create_tombstone(
|
||||
1, 1, 1, 101, // delete's seq_number
|
||||
0, // min time of data to get deleted
|
||||
200000, // max time of data to get deleted
|
||||
"tag1=MT", // delete predicate
|
||||
),
|
||||
];
|
||||
|
||||
// build queryable batch from the input batches
|
||||
let compact_batch =
|
||||
make_queryable_batch_with_deletes("test_table", 0, 1, batches, tombstones);
|
||||
let compact_batch = make_queryable_batch("test_table", 0, 1, batches);
|
||||
|
||||
// verify PK
|
||||
let schema = compact_batch.schema();
|
||||
|
|
@ -1189,29 +1037,22 @@ mod tests {
|
|||
|
||||
// verify compacted data
|
||||
// data is sorted and all duplicates are removed
|
||||
// all rows with ("tag2=CT and field_int=1000") and ("tag1=MT") are also removed
|
||||
// CORRECT RESULT
|
||||
// let expected = vec![
|
||||
// "+-----------+------+------+--------------------------------+",
|
||||
// "| field_int | tag1 | tag2 | time |",
|
||||
// "+-----------+------+------+--------------------------------+",
|
||||
// "| 5 | | AL | 1970-01-01T00:00:00.000005Z |",
|
||||
// "| 10 | | AL | 1970-01-01T00:00:00.000007Z |",
|
||||
// "| 70 | | CT | 1970-01-01T00:00:00.000000100Z |",
|
||||
// "| 100 | | MA | 1970-01-01T00:00:00.000000050Z |",
|
||||
// "| 10 | AL | MA | 1970-01-01T00:00:00.000000050Z |",
|
||||
// "| 70 | CT | CT | 1970-01-01T00:00:00.000000100Z |",
|
||||
// "| 70 | CT | CT | 1970-01-01T00:00:00.000000500Z |",
|
||||
// "+-----------+------+------+--------------------------------+",
|
||||
// ];
|
||||
// current WRONMG result: "tag1 is null" is also eliminated
|
||||
let expected = vec![
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
"| field_int | tag1 | tag2 | time |",
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
"| 5 | | AL | 1970-01-01T00:00:00.000005Z |",
|
||||
"| 10 | | AL | 1970-01-01T00:00:00.000007Z |",
|
||||
"| 70 | | CT | 1970-01-01T00:00:00.000000100Z |",
|
||||
"| 1000 | | CT | 1970-01-01T00:00:00.000001Z |",
|
||||
"| 100 | | MA | 1970-01-01T00:00:00.000000050Z |",
|
||||
"| 10 | AL | MA | 1970-01-01T00:00:00.000000050Z |",
|
||||
"| 70 | CT | CT | 1970-01-01T00:00:00.000000100Z |",
|
||||
"| 70 | CT | CT | 1970-01-01T00:00:00.000000500Z |",
|
||||
"| 30 | MT | AL | 1970-01-01T00:00:00.000000005Z |",
|
||||
"| 20 | MT | AL | 1970-01-01T00:00:00.000007Z |",
|
||||
"| 1000 | MT | CT | 1970-01-01T00:00:00.000001Z |",
|
||||
"| 1000 | MT | CT | 1970-01-01T00:00:00.000002Z |",
|
||||
"+-----------+------+------+--------------------------------+",
|
||||
];
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,12 @@
|
|||
//! Data for the lifecycle of the Ingester
|
||||
|
||||
use std::{collections::BTreeMap, pin::Pin, sync::Arc};
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use arrow::{error::ArrowError, record_batch::RecordBatch};
|
||||
use arrow_util::optimize::{optimize_record_batch, optimize_schema};
|
||||
use async_trait::async_trait;
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::{PartitionId, SequenceNumber, ShardId, ShardIndex};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use data_types::{NamespaceId, PartitionId, SequenceNumber, ShardId, ShardIndex, TableId};
|
||||
|
||||
use dml::DmlOperation;
|
||||
use futures::{Stream, StreamExt};
|
||||
use iox_catalog::interface::{get_table_schema_by_id, Catalog};
|
||||
use iox_query::exec::Executor;
|
||||
use iox_time::SystemProvider;
|
||||
|
|
@ -25,16 +22,12 @@ use crate::{
|
|||
lifecycle::LifecycleHandle,
|
||||
};
|
||||
|
||||
pub mod namespace;
|
||||
pub(crate) mod namespace;
|
||||
pub mod partition;
|
||||
mod query_dedup;
|
||||
pub mod shard;
|
||||
pub mod table;
|
||||
pub(crate) mod shard;
|
||||
pub(crate) mod table;
|
||||
|
||||
use self::{
|
||||
partition::{resolver::PartitionProvider, PartitionStatus},
|
||||
shard::ShardData,
|
||||
};
|
||||
use self::{partition::resolver::PartitionProvider, shard::ShardData, table::TableName};
|
||||
|
||||
#[cfg(test)]
|
||||
mod triggers;
|
||||
|
|
@ -51,9 +44,6 @@ pub enum Error {
|
|||
#[snafu(display("Table {} not found in buffer", table_name))]
|
||||
TableNotFound { table_name: String },
|
||||
|
||||
#[snafu(display("Table must be specified in delete"))]
|
||||
TableNotPresent,
|
||||
|
||||
#[snafu(display("Error accessing catalog: {}", source))]
|
||||
Catalog {
|
||||
source: iox_catalog::interface::Error,
|
||||
|
|
@ -186,7 +176,7 @@ impl IngesterData {
|
|||
.get(&shard_id)
|
||||
.context(ShardNotFoundSnafu { shard_id })?;
|
||||
shard_data
|
||||
.buffer_operation(dml_operation, &self.catalog, lifecycle_handle, &self.exec)
|
||||
.buffer_operation(dml_operation, &self.catalog, lifecycle_handle)
|
||||
.await
|
||||
}
|
||||
|
||||
|
|
@ -220,7 +210,13 @@ impl IngesterData {
|
|||
#[async_trait]
|
||||
pub trait Persister: Send + Sync + 'static {
|
||||
/// Persits the partition ID. Will retry forever until it succeeds.
|
||||
async fn persist(&self, partition_id: PartitionId);
|
||||
async fn persist(
|
||||
&self,
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
);
|
||||
|
||||
/// Updates the shard's `min_unpersisted_sequence_number` in the catalog.
|
||||
/// This number represents the minimum that might be unpersisted, which is the
|
||||
|
|
@ -235,7 +231,69 @@ pub trait Persister: Send + Sync + 'static {
|
|||
|
||||
#[async_trait]
|
||||
impl Persister for IngesterData {
|
||||
async fn persist(&self, partition_id: PartitionId) {
|
||||
async fn persist(
|
||||
&self,
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
) {
|
||||
// lookup the state from the ingester data. If something isn't found,
|
||||
// it's unexpected. Crash so someone can take a look.
|
||||
let shard_data = self
|
||||
.shards
|
||||
.get(&shard_id)
|
||||
.unwrap_or_else(|| panic!("shard state for {shard_id} not in ingester data"));
|
||||
let namespace = shard_data
|
||||
.namespace_by_id(namespace_id)
|
||||
.unwrap_or_else(|| panic!("namespace {namespace_id} not in shard {shard_id} state"));
|
||||
|
||||
let partition_key;
|
||||
let batch;
|
||||
{
|
||||
let table_data = namespace.table_id(table_id).unwrap_or_else(|| {
|
||||
panic!("table {table_id} in namespace {namespace_id} not in shard {shard_id} state")
|
||||
});
|
||||
|
||||
let mut guard = table_data.write().await;
|
||||
let partition = guard.get_partition(partition_id).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"partition {partition_id} in table {table_id} in namespace {namespace_id} not in shard {shard_id} state"
|
||||
)
|
||||
});
|
||||
|
||||
partition_key = partition.partition_key().clone();
|
||||
batch = partition.snapshot_to_persisting_batch();
|
||||
};
|
||||
|
||||
debug!(%shard_id, %namespace_id, %table_id, %partition_id, %partition_key, "persisting partition");
|
||||
|
||||
// Check if there is any data to persist.
|
||||
let batch = match batch {
|
||||
Some(v) if !v.data.data.is_empty() => v,
|
||||
_ => {
|
||||
warn!(
|
||||
%shard_id,
|
||||
%namespace_id,
|
||||
%table_id,
|
||||
%partition_id,
|
||||
%partition_key,
|
||||
"partition marked for persistence contains no data"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// lookup column IDs from catalog
|
||||
// TODO: this can be removed once the ingester uses column IDs internally as well
|
||||
let table_schema = Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("get table schema", || async {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
get_table_schema_by_id(table_id, repos.as_mut()).await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
// lookup the partition_info from the catalog
|
||||
let partition_info = Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("get partition_info_by_id", || async {
|
||||
|
|
@ -243,217 +301,159 @@ impl Persister for IngesterData {
|
|||
repos.partitions().partition_info_by_id(partition_id).await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
.expect("retry forever").unwrap_or_else(|| panic!("partition {partition_id} in table {table_id} in namespace {namespace_id} in shard {shard_id} has no partition info in catalog"));
|
||||
|
||||
// lookup the state from the ingester data. If something isn't found, it's unexpected. Crash
|
||||
// so someone can take a look.
|
||||
let partition_info = partition_info
|
||||
.unwrap_or_else(|| panic!("partition {} not found in catalog", partition_id));
|
||||
let shard_data = self
|
||||
.shards
|
||||
.get(&partition_info.partition.shard_id)
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"shard state for {} not in ingester data",
|
||||
partition_info.partition.shard_id
|
||||
)
|
||||
}); //{
|
||||
let namespace = shard_data
|
||||
.namespace(&partition_info.namespace_name)
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"namespace {} not in shard {} state",
|
||||
partition_info.namespace_name, partition_info.partition.shard_id
|
||||
)
|
||||
});
|
||||
debug!(?partition_id, ?partition_info, "persisting partition");
|
||||
// do the CPU intensive work of compaction, de-duplication and sorting
|
||||
let CompactedStream {
|
||||
stream: record_stream,
|
||||
iox_metadata,
|
||||
sort_key_update,
|
||||
} = compact_persisting_batch(
|
||||
Arc::new(SystemProvider::new()),
|
||||
&self.exec,
|
||||
namespace.namespace_id().get(),
|
||||
&partition_info,
|
||||
Arc::clone(&batch),
|
||||
)
|
||||
.await
|
||||
.expect("unable to compact persisting batch");
|
||||
|
||||
// lookup column IDs from catalog
|
||||
// TODO: this can be removed once the ingester uses column IDs internally as well
|
||||
let table_schema = Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("get table schema", || async {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let table = repos
|
||||
.tables()
|
||||
.get_by_namespace_and_name(namespace.namespace_id(), &partition_info.table_name)
|
||||
.await?
|
||||
.expect("table not found in catalog");
|
||||
get_table_schema_by_id(table.id, repos.as_mut()).await
|
||||
})
|
||||
// Save the compacted data to a parquet file in object storage.
|
||||
//
|
||||
// This call retries until it completes.
|
||||
let (md, file_size) = self
|
||||
.store
|
||||
.upload(record_stream, &iox_metadata)
|
||||
.await
|
||||
.expect("retry forever");
|
||||
.expect("unexpected fatal persist error");
|
||||
|
||||
let persisting_batch = namespace
|
||||
.snapshot_to_persisting(
|
||||
&partition_info.table_name,
|
||||
&partition_info.partition.partition_key,
|
||||
)
|
||||
.await;
|
||||
|
||||
if let Some(persisting_batch) = persisting_batch {
|
||||
// do the CPU intensive work of compaction, de-duplication and sorting
|
||||
let compacted_stream = match compact_persisting_batch(
|
||||
Arc::new(SystemProvider::new()),
|
||||
&self.exec,
|
||||
namespace.namespace_id().get(),
|
||||
&partition_info,
|
||||
Arc::clone(&persisting_batch),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(e) => {
|
||||
// this should never error out. if it does, we need to crash hard so
|
||||
// someone can take a look.
|
||||
panic!("unable to compact persisting batch with error: {:?}", e);
|
||||
}
|
||||
Ok(Some(r)) => r,
|
||||
Ok(None) => {
|
||||
warn!("persist called with no data");
|
||||
return;
|
||||
}
|
||||
};
|
||||
let CompactedStream {
|
||||
stream: record_stream,
|
||||
iox_metadata,
|
||||
sort_key_update,
|
||||
} = compacted_stream;
|
||||
|
||||
// Save the compacted data to a parquet file in object storage.
|
||||
//
|
||||
// This call retries until it completes.
|
||||
let (md, file_size) = self
|
||||
.store
|
||||
.upload(record_stream, &iox_metadata)
|
||||
.await
|
||||
.expect("unexpected fatal persist error");
|
||||
|
||||
// Update the sort key in the catalog if there are
|
||||
// additional columns BEFORE adding parquet file to the
|
||||
// catalog. If the order is reversed, the querier or
|
||||
// compactor may see a parquet file with an inconsistent
|
||||
// sort key. https://github.com/influxdata/influxdb_iox/issues/5090
|
||||
if let Some(new_sort_key) = sort_key_update {
|
||||
let sort_key = new_sort_key.to_columns().collect::<Vec<_>>();
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("update_sort_key", || async {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let _partition = repos
|
||||
.partitions()
|
||||
.update_sort_key(partition_id, &sort_key)
|
||||
.await?;
|
||||
// compiler insisted on getting told the type of the error :shrug:
|
||||
Ok(()) as Result<(), iox_catalog::interface::Error>
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
debug!(
|
||||
?partition_id,
|
||||
table = partition_info.table_name,
|
||||
?new_sort_key,
|
||||
"adjusted sort key during batch compact & persist"
|
||||
);
|
||||
}
|
||||
|
||||
// Add the parquet file to the catalog until succeed
|
||||
let parquet_file = iox_metadata.to_parquet_file(partition_id, file_size, &md, |name| {
|
||||
table_schema.columns.get(name).expect("Unknown column").id
|
||||
});
|
||||
|
||||
// Assert partitions are persisted in-order.
|
||||
//
|
||||
// It is an invariant that partitions are persisted in order so that
|
||||
// both the per-shard, and per-partition watermarks are correctly
|
||||
// advanced and accurate.
|
||||
if let Some(last_persist) = partition_info.partition.persisted_sequence_number {
|
||||
assert!(
|
||||
parquet_file.max_sequence_number > last_persist,
|
||||
"out of order partition persistence, persisting {}, previously persisted {}",
|
||||
parquet_file.max_sequence_number.get(),
|
||||
last_persist.get(),
|
||||
);
|
||||
}
|
||||
|
||||
// Add the parquet file to the catalog.
|
||||
//
|
||||
// This has the effect of allowing the queriers to "discover" the
|
||||
// parquet file by polling / querying the catalog.
|
||||
// Update the sort key in the catalog if there are
|
||||
// additional columns BEFORE adding parquet file to the
|
||||
// catalog. If the order is reversed, the querier or
|
||||
// compactor may see a parquet file with an inconsistent
|
||||
// sort key. https://github.com/influxdata/influxdb_iox/issues/5090
|
||||
if let Some(new_sort_key) = sort_key_update {
|
||||
let sort_key = new_sort_key.to_columns().collect::<Vec<_>>();
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("add parquet file to catalog", || async {
|
||||
.retry_all_errors("update_sort_key", || async {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let parquet_file = repos.parquet_files().create(parquet_file.clone()).await?;
|
||||
debug!(
|
||||
?partition_id,
|
||||
table_id=?parquet_file.table_id,
|
||||
parquet_file_id=?parquet_file.id,
|
||||
table_name=%iox_metadata.table_name,
|
||||
"parquet file written to catalog"
|
||||
);
|
||||
let _partition = repos
|
||||
.partitions()
|
||||
.update_sort_key(partition_id, &sort_key)
|
||||
.await?;
|
||||
// compiler insisted on getting told the type of the error :shrug:
|
||||
Ok(()) as Result<(), iox_catalog::interface::Error>
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
// Update the per-partition persistence watermark, so that new
|
||||
// ingester instances skip the just-persisted ops during replay.
|
||||
//
|
||||
// This could be transactional with the above parquet insert to
|
||||
// maintain catalog consistency, though in practice it is an
|
||||
// unnecessary overhead - the system can tolerate replaying the ops
|
||||
// that lead to this parquet file being generated, and tolerate
|
||||
// creating a parquet file containing duplicate data (remedied by
|
||||
// compaction).
|
||||
//
|
||||
// This means it is possible to observe a parquet file with a
|
||||
// max_persisted_sequence_number >
|
||||
// partition.persisted_sequence_number, either in-between these
|
||||
// catalog updates, or for however long it takes a crashed ingester
|
||||
// to restart and replay the ops, and re-persist a file containing
|
||||
// the same (or subset of) data.
|
||||
//
|
||||
// The above is also true of the per-shard persist marker that
|
||||
// governs the ingester's replay start point, which is
|
||||
// non-transactionally updated after all partitions have persisted.
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("set partition persist marker", || async {
|
||||
self.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.update_persisted_sequence_number(
|
||||
parquet_file.partition_id,
|
||||
parquet_file.max_sequence_number,
|
||||
)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
// Record metrics
|
||||
let attributes = Attributes::from([(
|
||||
"shard_id",
|
||||
format!("{}", partition_info.partition.shard_id).into(),
|
||||
)]);
|
||||
self.persisted_file_size_bytes
|
||||
.recorder(attributes)
|
||||
.record(file_size as u64);
|
||||
|
||||
// and remove the persisted data from memory
|
||||
namespace
|
||||
.mark_persisted(
|
||||
&partition_info.table_name,
|
||||
&partition_info.partition.partition_key,
|
||||
iox_metadata.max_sequence_number,
|
||||
)
|
||||
.await;
|
||||
debug!(
|
||||
?partition_id,
|
||||
table_name=%partition_info.table_name,
|
||||
partition_key=%partition_info.partition.partition_key,
|
||||
max_sequence_number=%iox_metadata.max_sequence_number.get(),
|
||||
"marked partition as persisted"
|
||||
table = partition_info.table_name,
|
||||
?new_sort_key,
|
||||
"adjusted sort key during batch compact & persist"
|
||||
);
|
||||
}
|
||||
|
||||
// Add the parquet file to the catalog until succeed
|
||||
let parquet_file = iox_metadata.to_parquet_file(partition_id, file_size, &md, |name| {
|
||||
table_schema.columns.get(name).expect("Unknown column").id
|
||||
});
|
||||
|
||||
// Assert partitions are persisted in-order.
|
||||
//
|
||||
// It is an invariant that partitions are persisted in order so that
|
||||
// both the per-shard, and per-partition watermarks are correctly
|
||||
// advanced and accurate.
|
||||
if let Some(last_persist) = partition_info.partition.persisted_sequence_number {
|
||||
assert!(
|
||||
parquet_file.max_sequence_number > last_persist,
|
||||
"out of order partition persistence, persisting {}, previously persisted {}",
|
||||
parquet_file.max_sequence_number.get(),
|
||||
last_persist.get(),
|
||||
);
|
||||
}
|
||||
|
||||
// Add the parquet file to the catalog.
|
||||
//
|
||||
// This has the effect of allowing the queriers to "discover" the
|
||||
// parquet file by polling / querying the catalog.
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("add parquet file to catalog", || async {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let parquet_file = repos.parquet_files().create(parquet_file.clone()).await?;
|
||||
debug!(
|
||||
?partition_id,
|
||||
table_id=?parquet_file.table_id,
|
||||
parquet_file_id=?parquet_file.id,
|
||||
table_name=%iox_metadata.table_name,
|
||||
"parquet file written to catalog"
|
||||
);
|
||||
// compiler insisted on getting told the type of the error :shrug:
|
||||
Ok(()) as Result<(), iox_catalog::interface::Error>
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
// Update the per-partition persistence watermark, so that new
|
||||
// ingester instances skip the just-persisted ops during replay.
|
||||
//
|
||||
// This could be transactional with the above parquet insert to
|
||||
// maintain catalog consistency, though in practice it is an
|
||||
// unnecessary overhead - the system can tolerate replaying the ops
|
||||
// that lead to this parquet file being generated, and tolerate
|
||||
// creating a parquet file containing duplicate data (remedied by
|
||||
// compaction).
|
||||
//
|
||||
// This means it is possible to observe a parquet file with a
|
||||
// max_persisted_sequence_number >
|
||||
// partition.persisted_sequence_number, either in-between these
|
||||
// catalog updates, or for however long it takes a crashed ingester
|
||||
// to restart and replay the ops, and re-persist a file containing
|
||||
// the same (or subset of) data.
|
||||
//
|
||||
// The above is also true of the per-shard persist marker that
|
||||
// governs the ingester's replay start point, which is
|
||||
// non-transactionally updated after all partitions have persisted.
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("set partition persist marker", || async {
|
||||
self.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.update_persisted_sequence_number(
|
||||
parquet_file.partition_id,
|
||||
parquet_file.max_sequence_number,
|
||||
)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
// Record metrics
|
||||
let attributes = Attributes::from([(
|
||||
"shard_id",
|
||||
format!("{}", partition_info.partition.shard_id).into(),
|
||||
)]);
|
||||
self.persisted_file_size_bytes
|
||||
.recorder(attributes)
|
||||
.record(file_size as u64);
|
||||
|
||||
// and remove the persisted data from memory
|
||||
let table_name = TableName::from(&partition_info.table_name);
|
||||
namespace
|
||||
.mark_persisted(
|
||||
&table_name,
|
||||
&partition_info.partition.partition_key,
|
||||
iox_metadata.max_sequence_number,
|
||||
)
|
||||
.await;
|
||||
debug!(
|
||||
?partition_id,
|
||||
%table_name,
|
||||
partition_key=%partition_info.partition.partition_key,
|
||||
max_sequence_number=%iox_metadata.max_sequence_number.get(),
|
||||
"marked partition as persisted"
|
||||
);
|
||||
}
|
||||
|
||||
async fn update_min_unpersisted_sequence_number(
|
||||
|
|
@ -475,172 +475,24 @@ impl Persister for IngesterData {
|
|||
}
|
||||
}
|
||||
|
||||
/// Stream of snapshots.
|
||||
///
|
||||
/// Every snapshot is a dedicated [`SendableRecordBatchStream`].
|
||||
pub(crate) type SnapshotStream =
|
||||
Pin<Box<dyn Stream<Item = Result<SendableRecordBatchStream, ArrowError>> + Send>>;
|
||||
|
||||
/// Response data for a single partition.
|
||||
pub(crate) struct IngesterQueryPartition {
|
||||
/// Stream of snapshots.
|
||||
snapshots: SnapshotStream,
|
||||
|
||||
/// Partition ID.
|
||||
id: PartitionId,
|
||||
|
||||
/// Partition persistence status.
|
||||
status: PartitionStatus,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for IngesterQueryPartition {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("IngesterQueryPartition")
|
||||
.field("snapshots", &"<SNAPSHOT STREAM>")
|
||||
.field("id", &self.id)
|
||||
.field("status", &self.status)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl IngesterQueryPartition {
|
||||
pub(crate) fn new(snapshots: SnapshotStream, id: PartitionId, status: PartitionStatus) -> Self {
|
||||
Self {
|
||||
snapshots,
|
||||
id,
|
||||
status,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stream of partitions in this response.
|
||||
pub(crate) type IngesterQueryPartitionStream =
|
||||
Pin<Box<dyn Stream<Item = Result<IngesterQueryPartition, ArrowError>> + Send>>;
|
||||
|
||||
/// Response streams for querier<>ingester requests.
|
||||
///
|
||||
/// The data structure is constructed to allow lazy/streaming data generation. For easier
|
||||
/// consumption according to the wire protocol, use the [`flatten`](Self::flatten) method.
|
||||
pub struct IngesterQueryResponse {
|
||||
/// Stream of partitions.
|
||||
partitions: IngesterQueryPartitionStream,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for IngesterQueryResponse {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("IngesterQueryResponse")
|
||||
.field("partitions", &"<PARTITION STREAM>")
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl IngesterQueryResponse {
|
||||
/// Make a response
|
||||
pub(crate) fn new(partitions: IngesterQueryPartitionStream) -> Self {
|
||||
Self { partitions }
|
||||
}
|
||||
|
||||
/// Flattens the data according to the wire protocol.
|
||||
pub fn flatten(self) -> FlatIngesterQueryResponseStream {
|
||||
self.partitions
|
||||
.flat_map(|partition_res| match partition_res {
|
||||
Ok(partition) => {
|
||||
let head = futures::stream::once(async move {
|
||||
Ok(FlatIngesterQueryResponse::StartPartition {
|
||||
partition_id: partition.id,
|
||||
status: partition.status,
|
||||
})
|
||||
});
|
||||
let tail = partition
|
||||
.snapshots
|
||||
.flat_map(|snapshot_res| match snapshot_res {
|
||||
Ok(snapshot) => {
|
||||
let schema = Arc::new(optimize_schema(&snapshot.schema()));
|
||||
|
||||
let schema_captured = Arc::clone(&schema);
|
||||
let head = futures::stream::once(async {
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot {
|
||||
schema: schema_captured,
|
||||
})
|
||||
});
|
||||
|
||||
let tail = snapshot.map(move |batch_res| match batch_res {
|
||||
Ok(batch) => Ok(FlatIngesterQueryResponse::RecordBatch {
|
||||
batch: optimize_record_batch(&batch, Arc::clone(&schema))?,
|
||||
}),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
|
||||
head.chain(tail).boxed()
|
||||
}
|
||||
Err(e) => futures::stream::once(async { Err(e) }).boxed(),
|
||||
});
|
||||
|
||||
head.chain(tail).boxed()
|
||||
}
|
||||
Err(e) => futures::stream::once(async { Err(e) }).boxed(),
|
||||
})
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
/// Flattened version of [`IngesterQueryResponse`].
|
||||
pub(crate) type FlatIngesterQueryResponseStream =
|
||||
Pin<Box<dyn Stream<Item = Result<FlatIngesterQueryResponse, ArrowError>> + Send>>;
|
||||
|
||||
/// Element within the flat wire protocol.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum FlatIngesterQueryResponse {
|
||||
/// Start a new partition.
|
||||
StartPartition {
|
||||
/// Partition ID.
|
||||
partition_id: PartitionId,
|
||||
|
||||
/// Partition persistence status.
|
||||
status: PartitionStatus,
|
||||
},
|
||||
|
||||
/// Start a new snapshot.
|
||||
///
|
||||
/// The snapshot belongs to the partition of the last [`StartPartition`](Self::StartPartition)
|
||||
/// message.
|
||||
StartSnapshot {
|
||||
/// Snapshot schema.
|
||||
schema: Arc<arrow::datatypes::Schema>,
|
||||
},
|
||||
|
||||
/// Add a record batch to the snapshot that was announced by the last
|
||||
/// [`StartSnapshot`](Self::StartSnapshot) message.
|
||||
RecordBatch {
|
||||
/// Record batch.
|
||||
batch: RecordBatch,
|
||||
},
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{
|
||||
ops::DerefMut,
|
||||
sync::Arc,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
};
|
||||
use std::{ops::DerefMut, sync::Arc, time::Duration};
|
||||
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, CompactionLevel, DeletePredicate, NamespaceSchema, NonEmptyString,
|
||||
ParquetFileParams, Sequence, Timestamp, TimestampRange,
|
||||
};
|
||||
use datafusion::physical_plan::RecordBatchStream;
|
||||
|
||||
use dml::{DmlDelete, DmlMeta, DmlWrite};
|
||||
use futures::TryStreamExt;
|
||||
use iox_catalog::{mem::MemCatalog, validate_or_insert_schema};
|
||||
use iox_time::Time;
|
||||
use metric::{MetricObserver, Observation};
|
||||
use mutable_batch_lp::{lines_to_batches, test_helpers::lp_to_mutable_batch};
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use object_store::memory::InMemory;
|
||||
use schema::selection::Selection;
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
|
|
@ -804,17 +656,20 @@ mod tests {
|
|||
// limits)
|
||||
assert!(!should_pause);
|
||||
|
||||
let partition_id = {
|
||||
let (table_id, partition_id) = {
|
||||
let sd = data.shards.get(&shard1.id).unwrap();
|
||||
let n = sd.namespace("foo").unwrap();
|
||||
let mem_table = n.table_data("mem").unwrap();
|
||||
assert!(n.table_data("mem").is_some());
|
||||
let n = sd.namespace(&"foo".into()).unwrap();
|
||||
let mem_table = n.table_data(&"mem".into()).unwrap();
|
||||
assert!(n.table_data(&"mem".into()).is_some());
|
||||
let mem_table = mem_table.write().await;
|
||||
let p = mem_table.partition_data.get(&"1970-01-01".into()).unwrap();
|
||||
p.id()
|
||||
let p = mem_table
|
||||
.get_partition_by_key(&"1970-01-01".into())
|
||||
.unwrap();
|
||||
(mem_table.table_id(), p.partition_id())
|
||||
};
|
||||
|
||||
data.persist(partition_id).await;
|
||||
data.persist(shard1.id, namespace.id, table_id, partition_id)
|
||||
.await;
|
||||
|
||||
// verify that a file got put into object store
|
||||
let file_paths: Vec<_> = object_store
|
||||
|
|
@ -945,17 +800,20 @@ mod tests {
|
|||
assert_progress(&data, shard_index, expected_progress).await;
|
||||
|
||||
let sd = data.shards.get(&shard1.id).unwrap();
|
||||
let n = sd.namespace("foo").unwrap();
|
||||
let n = sd.namespace(&"foo".into()).unwrap();
|
||||
let partition_id;
|
||||
let table_id;
|
||||
{
|
||||
let mem_table = n.table_data("mem").unwrap();
|
||||
assert!(n.table_data("cpu").is_some());
|
||||
let mem_table = mem_table.write().await;
|
||||
let p = mem_table.partition_data.get(&"1970-01-01".into()).unwrap();
|
||||
let mem_table = n.table_data(&"mem".into()).unwrap();
|
||||
assert!(n.table_data(&"cpu".into()).is_some());
|
||||
|
||||
let mem_table = mem_table.write().await;
|
||||
table_id = mem_table.table_id();
|
||||
partition_id = p.id();
|
||||
|
||||
let p = mem_table
|
||||
.get_partition_by_key(&"1970-01-01".into())
|
||||
.unwrap();
|
||||
partition_id = p.partition_id();
|
||||
}
|
||||
{
|
||||
// verify the partition doesn't have a sort key before any data has been persisted
|
||||
|
|
@ -969,7 +827,8 @@ mod tests {
|
|||
assert!(partition_info.partition.sort_key.is_empty());
|
||||
}
|
||||
|
||||
data.persist(partition_id).await;
|
||||
data.persist(shard1.id, namespace.id, table_id, partition_id)
|
||||
.await;
|
||||
|
||||
// verify that a file got put into object store
|
||||
let file_paths: Vec<_> = object_store
|
||||
|
|
@ -1061,7 +920,7 @@ mod tests {
|
|||
.unwrap();
|
||||
assert_eq!(partition_info.partition.sort_key, vec!["time"]);
|
||||
|
||||
let mem_table = n.table_data("mem").unwrap();
|
||||
let mem_table = n.table_data(&"mem".into()).unwrap();
|
||||
let mem_table = mem_table.read().await;
|
||||
|
||||
// verify that the parquet_max_sequence_number got updated
|
||||
|
|
@ -1177,7 +1036,7 @@ mod tests {
|
|||
|
||||
// Get the namespace
|
||||
let sd = data.shards.get(&shard1.id).unwrap();
|
||||
let n = sd.namespace("foo").unwrap();
|
||||
let n = sd.namespace(&"foo".into()).unwrap();
|
||||
|
||||
let expected_progress = ShardProgress::new().with_buffered(SequenceNumber::new(1));
|
||||
assert_progress(&data, shard_index, expected_progress).await;
|
||||
|
|
@ -1336,23 +1195,28 @@ mod tests {
|
|||
Arc::clone(&metrics),
|
||||
Arc::new(SystemProvider::new()),
|
||||
);
|
||||
let exec = Executor::new(1);
|
||||
|
||||
let partition_provider = Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog)));
|
||||
|
||||
let data = NamespaceData::new(namespace.id, shard.id, partition_provider, &*metrics);
|
||||
let data = NamespaceData::new(
|
||||
namespace.id,
|
||||
"foo".into(),
|
||||
shard.id,
|
||||
partition_provider,
|
||||
&*metrics,
|
||||
);
|
||||
|
||||
// w1 should be ignored because the per-partition replay offset is set
|
||||
// to 1 already, so it shouldn't be buffered and the buffer should
|
||||
// remain empty.
|
||||
let should_pause = data
|
||||
.buffer_operation(DmlOperation::Write(w1), &catalog, &manager.handle(), &exec)
|
||||
.buffer_operation(DmlOperation::Write(w1), &catalog, &manager.handle())
|
||||
.await
|
||||
.unwrap();
|
||||
{
|
||||
let table_data = data.table_data("mem").unwrap();
|
||||
let table_data = data.table_data(&"mem".into()).unwrap();
|
||||
let table = table_data.read().await;
|
||||
let p = table.partition_data.get(&"1970-01-01".into()).unwrap();
|
||||
let p = table.get_partition_by_key(&"1970-01-01".into()).unwrap();
|
||||
assert_eq!(
|
||||
p.max_persisted_sequence_number(),
|
||||
Some(SequenceNumber::new(1))
|
||||
|
|
@ -1362,13 +1226,13 @@ mod tests {
|
|||
assert!(!should_pause);
|
||||
|
||||
// w2 should be in the buffer
|
||||
data.buffer_operation(DmlOperation::Write(w2), &catalog, &manager.handle(), &exec)
|
||||
data.buffer_operation(DmlOperation::Write(w2), &catalog, &manager.handle())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table_data = data.table_data("mem").unwrap();
|
||||
let table_data = data.table_data(&"mem".into()).unwrap();
|
||||
let table = table_data.read().await;
|
||||
let partition = table.partition_data.get(&"1970-01-01".into()).unwrap();
|
||||
let partition = table.get_partition_by_key(&"1970-01-01".into()).unwrap();
|
||||
assert_eq!(
|
||||
partition.data.buffer.as_ref().unwrap().min_sequence_number,
|
||||
SequenceNumber::new(2)
|
||||
|
|
@ -1454,19 +1318,6 @@ mod tests {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
data.shard(shard1.id)
|
||||
.unwrap()
|
||||
.namespace(&namespace.name)
|
||||
.unwrap()
|
||||
.table_data("mem")
|
||||
.unwrap()
|
||||
.read()
|
||||
.await
|
||||
.tombstone_max_sequence_number(),
|
||||
None,
|
||||
);
|
||||
|
||||
let predicate = DeletePredicate {
|
||||
range: TimestampRange::new(1, 2),
|
||||
exprs: vec![],
|
||||
|
|
@ -1485,19 +1336,6 @@ mod tests {
|
|||
data.buffer_operation(shard1.id, DmlOperation::Delete(d1), &manager.handle())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
data.shard(shard1.id)
|
||||
.unwrap()
|
||||
.namespace(&namespace.name)
|
||||
.unwrap()
|
||||
.table_data("mem")
|
||||
.unwrap()
|
||||
.read()
|
||||
.await
|
||||
.tombstone_max_sequence_number(),
|
||||
Some(SequenceNumber::new(2)),
|
||||
);
|
||||
}
|
||||
|
||||
/// Verifies that the progress in data is the same as expected_progress
|
||||
|
|
@ -1513,132 +1351,4 @@ mod tests {
|
|||
|
||||
assert_eq!(progresses, expected_progresses);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ingester_query_response_flatten() {
|
||||
let batch_1_1 = lp_to_batch("table x=1 0");
|
||||
let batch_1_2 = lp_to_batch("table x=2 1");
|
||||
let batch_2 = lp_to_batch("table y=1 10");
|
||||
let batch_3 = lp_to_batch("table z=1 10");
|
||||
|
||||
let schema_1 = batch_1_1.schema();
|
||||
let schema_2 = batch_2.schema();
|
||||
let schema_3 = batch_3.schema();
|
||||
|
||||
let response = IngesterQueryResponse::new(Box::pin(futures::stream::iter([
|
||||
Ok(IngesterQueryPartition::new(
|
||||
Box::pin(futures::stream::iter([
|
||||
Ok(Box::pin(TestRecordBatchStream::new(
|
||||
vec![
|
||||
Ok(batch_1_1.clone()),
|
||||
Err(ArrowError::NotYetImplemented("not yet implemeneted".into())),
|
||||
Ok(batch_1_2.clone()),
|
||||
],
|
||||
Arc::clone(&schema_1),
|
||||
)) as _),
|
||||
Err(ArrowError::InvalidArgumentError("invalid arg".into())),
|
||||
Ok(Box::pin(TestRecordBatchStream::new(
|
||||
vec![Ok(batch_2.clone())],
|
||||
Arc::clone(&schema_2),
|
||||
)) as _),
|
||||
Ok(Box::pin(TestRecordBatchStream::new(vec![], Arc::clone(&schema_3))) as _),
|
||||
])),
|
||||
PartitionId::new(2),
|
||||
PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: Some(SequenceNumber::new(1)),
|
||||
},
|
||||
)),
|
||||
Err(ArrowError::IoError("some io error".into())),
|
||||
Ok(IngesterQueryPartition::new(
|
||||
Box::pin(futures::stream::iter([])),
|
||||
PartitionId::new(1),
|
||||
PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
},
|
||||
)),
|
||||
])));
|
||||
|
||||
let actual: Vec<_> = response.flatten().collect().await;
|
||||
let expected = vec![
|
||||
Ok(FlatIngesterQueryResponse::StartPartition {
|
||||
partition_id: PartitionId::new(2),
|
||||
status: PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: Some(SequenceNumber::new(1)),
|
||||
},
|
||||
}),
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot { schema: schema_1 }),
|
||||
Ok(FlatIngesterQueryResponse::RecordBatch { batch: batch_1_1 }),
|
||||
Err(ArrowError::NotYetImplemented("not yet implemeneted".into())),
|
||||
Ok(FlatIngesterQueryResponse::RecordBatch { batch: batch_1_2 }),
|
||||
Err(ArrowError::InvalidArgumentError("invalid arg".into())),
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot { schema: schema_2 }),
|
||||
Ok(FlatIngesterQueryResponse::RecordBatch { batch: batch_2 }),
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot { schema: schema_3 }),
|
||||
Err(ArrowError::IoError("some io error".into())),
|
||||
Ok(FlatIngesterQueryResponse::StartPartition {
|
||||
partition_id: PartitionId::new(1),
|
||||
status: PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
assert_eq!(actual.len(), expected.len());
|
||||
for (actual, expected) in actual.into_iter().zip(expected) {
|
||||
match (actual, expected) {
|
||||
(Ok(actual), Ok(expected)) => {
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
(Err(_), Err(_)) => {
|
||||
// cannot compare `ArrowError`, but it's unlikely that someone changed the error
|
||||
}
|
||||
(Ok(_), Err(_)) => panic!("Actual is Ok but expected is Err"),
|
||||
(Err(_), Ok(_)) => panic!("Actual is Err but expected is Ok"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lp_to_batch(lp: &str) -> RecordBatch {
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Selection::All).unwrap()
|
||||
}
|
||||
|
||||
pub struct TestRecordBatchStream {
|
||||
schema: SchemaRef,
|
||||
batches: Vec<Result<RecordBatch, ArrowError>>,
|
||||
}
|
||||
|
||||
impl TestRecordBatchStream {
|
||||
pub fn new(batches: Vec<Result<RecordBatch, ArrowError>>, schema: SchemaRef) -> Self {
|
||||
Self { schema, batches }
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for TestRecordBatchStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
Arc::clone(&self.schema)
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Stream for TestRecordBatchStream {
|
||||
type Item = Result<RecordBatch, ArrowError>;
|
||||
|
||||
fn poll_next(
|
||||
mut self: std::pin::Pin<&mut Self>,
|
||||
_: &mut Context<'_>,
|
||||
) -> Poll<Option<Self::Item>> {
|
||||
if self.batches.is_empty() {
|
||||
Poll::Ready(None)
|
||||
} else {
|
||||
Poll::Ready(Some(self.batches.remove(0)))
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(self.batches.len(), Some(self.batches.len()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,36 +1,91 @@
|
|||
//! Namespace level data buffer structures.
|
||||
|
||||
use std::{
|
||||
collections::{btree_map::Entry, BTreeMap},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use data_types::{NamespaceId, PartitionKey, SequenceNumber, ShardId};
|
||||
use data_types::{NamespaceId, PartitionKey, SequenceNumber, ShardId, TableId};
|
||||
use dml::DmlOperation;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::exec::Executor;
|
||||
use metric::U64Counter;
|
||||
use observability_deps::tracing::warn;
|
||||
use parking_lot::RwLock;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
use write_summary::ShardProgress;
|
||||
|
||||
#[cfg(test)]
|
||||
use super::triggers::TestTriggers;
|
||||
use super::{
|
||||
partition::{resolver::PartitionProvider, PersistingBatch},
|
||||
table::TableData,
|
||||
partition::resolver::PartitionProvider,
|
||||
table::{TableData, TableName},
|
||||
};
|
||||
use crate::lifecycle::LifecycleHandle;
|
||||
|
||||
/// A double-referenced map where [`TableData`] can be looked up by name, or ID.
|
||||
#[derive(Debug, Default)]
|
||||
struct DoubleRef {
|
||||
// TODO(4880): this can be removed when IDs are sent over the wire.
|
||||
by_name: HashMap<TableName, Arc<tokio::sync::RwLock<TableData>>>,
|
||||
by_id: HashMap<TableId, Arc<tokio::sync::RwLock<TableData>>>,
|
||||
}
|
||||
|
||||
impl DoubleRef {
|
||||
fn insert(&mut self, t: TableData) -> Arc<tokio::sync::RwLock<TableData>> {
|
||||
let name = t.table_name().clone();
|
||||
let id = t.table_id();
|
||||
|
||||
let t = Arc::new(tokio::sync::RwLock::new(t));
|
||||
self.by_name.insert(name, Arc::clone(&t));
|
||||
self.by_id.insert(id, Arc::clone(&t));
|
||||
t
|
||||
}
|
||||
|
||||
fn by_name(&self, name: &TableName) -> Option<Arc<tokio::sync::RwLock<TableData>>> {
|
||||
self.by_name.get(name).map(Arc::clone)
|
||||
}
|
||||
|
||||
fn by_id(&self, id: TableId) -> Option<Arc<tokio::sync::RwLock<TableData>>> {
|
||||
self.by_id.get(&id).map(Arc::clone)
|
||||
}
|
||||
}
|
||||
|
||||
/// The string name / identifier of a Namespace.
|
||||
///
|
||||
/// A reference-counted, cheap clone-able string.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) struct NamespaceName(Arc<str>);
|
||||
|
||||
impl<T> From<T> for NamespaceName
|
||||
where
|
||||
T: AsRef<str>,
|
||||
{
|
||||
fn from(v: T) -> Self {
|
||||
Self(Arc::from(v.as_ref()))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for NamespaceName {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for NamespaceName {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Data of a Namespace that belongs to a given Shard
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NamespaceData {
|
||||
namespace_id: NamespaceId,
|
||||
namespace_name: NamespaceName,
|
||||
|
||||
/// The catalog ID of the shard this namespace is being populated from.
|
||||
shard_id: ShardId,
|
||||
|
||||
tables: RwLock<BTreeMap<String, Arc<tokio::sync::RwLock<TableData>>>>,
|
||||
tables: RwLock<DoubleRef>,
|
||||
table_count: U64Counter,
|
||||
|
||||
/// The resolver of `(shard_id, table_id, partition_key)` to
|
||||
|
|
@ -87,8 +142,9 @@ pub(crate) struct NamespaceData {
|
|||
|
||||
impl NamespaceData {
|
||||
/// Initialize new tables with default partition template of daily
|
||||
pub fn new(
|
||||
pub(super) fn new(
|
||||
namespace_id: NamespaceId,
|
||||
namespace_name: NamespaceName,
|
||||
shard_id: ShardId,
|
||||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
metrics: &metric::Registry,
|
||||
|
|
@ -102,6 +158,7 @@ impl NamespaceData {
|
|||
|
||||
Self {
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
shard_id,
|
||||
tables: Default::default(),
|
||||
table_count,
|
||||
|
|
@ -120,7 +177,6 @@ impl NamespaceData {
|
|||
dml_operation: DmlOperation,
|
||||
catalog: &Arc<dyn Catalog>,
|
||||
lifecycle_handle: &dyn LifecycleHandle,
|
||||
executor: &Executor,
|
||||
) -> Result<bool, super::Error> {
|
||||
let sequence_number = dml_operation
|
||||
.meta()
|
||||
|
|
@ -146,6 +202,7 @@ impl NamespaceData {
|
|||
.clone();
|
||||
|
||||
for (t, b) in write.into_tables() {
|
||||
let t = TableName::from(t);
|
||||
let table_data = match self.table_data(&t) {
|
||||
Some(t) => t,
|
||||
None => self.insert_table(&t, catalog).await?,
|
||||
|
|
@ -171,19 +228,17 @@ impl NamespaceData {
|
|||
Ok(pause_writes)
|
||||
}
|
||||
DmlOperation::Delete(delete) => {
|
||||
let table_name = delete.table_name().context(super::TableNotPresentSnafu)?;
|
||||
let table_data = match self.table_data(table_name) {
|
||||
Some(t) => t,
|
||||
None => self.insert_table(table_name, catalog).await?,
|
||||
};
|
||||
// Deprecated delete support:
|
||||
// https://github.com/influxdata/influxdb_iox/issues/5825
|
||||
warn!(
|
||||
shard_id=%self.shard_id,
|
||||
namespace_name=%self.namespace_name,
|
||||
namespace_id=%self.namespace_id,
|
||||
table_name=?delete.table_name(),
|
||||
sequence_number=?delete.meta().sequence(),
|
||||
"discarding unsupported delete op"
|
||||
);
|
||||
|
||||
let mut table_data = table_data.write().await;
|
||||
|
||||
table_data
|
||||
.buffer_delete(delete.predicate(), sequence_number, &**catalog, executor)
|
||||
.await?;
|
||||
|
||||
// don't pause writes since deletes don't count towards memory limits
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
|
@ -194,16 +249,16 @@ impl NamespaceData {
|
|||
#[cfg(test)] // Only used in tests
|
||||
pub(crate) async fn snapshot(
|
||||
&self,
|
||||
table_name: &str,
|
||||
table_name: &TableName,
|
||||
partition_key: &PartitionKey,
|
||||
) -> Option<(
|
||||
Vec<Arc<super::partition::SnapshotBatch>>,
|
||||
Option<Arc<PersistingBatch>>,
|
||||
Option<Arc<super::partition::PersistingBatch>>,
|
||||
)> {
|
||||
if let Some(t) = self.table_data(table_name) {
|
||||
let mut t = t.write().await;
|
||||
|
||||
return t.partition_data.get_mut(partition_key).map(|p| {
|
||||
return t.get_partition_by_key_mut(partition_key).map(|p| {
|
||||
p.data
|
||||
.generate_snapshot()
|
||||
.expect("snapshot on mutable batch should never fail");
|
||||
|
|
@ -217,17 +272,17 @@ impl NamespaceData {
|
|||
/// Snapshots the mutable buffer for the partition, which clears it out and then moves all
|
||||
/// snapshots over to a persisting batch, which is returned. If there is no data to snapshot
|
||||
/// or persist, None will be returned.
|
||||
#[cfg(test)] // Only used in tests
|
||||
pub(crate) async fn snapshot_to_persisting(
|
||||
&self,
|
||||
table_name: &str,
|
||||
table_name: &TableName,
|
||||
partition_key: &PartitionKey,
|
||||
) -> Option<Arc<PersistingBatch>> {
|
||||
) -> Option<Arc<super::partition::PersistingBatch>> {
|
||||
if let Some(table_data) = self.table_data(table_name) {
|
||||
let mut table_data = table_data.write().await;
|
||||
|
||||
return table_data
|
||||
.partition_data
|
||||
.get_mut(partition_key)
|
||||
.get_partition_by_key_mut(partition_key)
|
||||
.and_then(|partition_data| partition_data.snapshot_to_persisting_batch());
|
||||
}
|
||||
|
||||
|
|
@ -237,45 +292,55 @@ impl NamespaceData {
|
|||
/// Gets the buffered table data
|
||||
pub(crate) fn table_data(
|
||||
&self,
|
||||
table_name: &str,
|
||||
table_name: &TableName,
|
||||
) -> Option<Arc<tokio::sync::RwLock<TableData>>> {
|
||||
let t = self.tables.read();
|
||||
t.get(table_name).cloned()
|
||||
t.by_name(table_name)
|
||||
}
|
||||
|
||||
/// Return the table data by ID.
|
||||
pub(crate) fn table_id(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Option<Arc<tokio::sync::RwLock<TableData>>> {
|
||||
let t = self.tables.read();
|
||||
t.by_id(table_id)
|
||||
}
|
||||
|
||||
/// Inserts the table or returns it if it happens to be inserted by some other thread
|
||||
async fn insert_table(
|
||||
&self,
|
||||
table_name: &str,
|
||||
table_name: &TableName,
|
||||
catalog: &Arc<dyn Catalog>,
|
||||
) -> Result<Arc<tokio::sync::RwLock<TableData>>, super::Error> {
|
||||
let mut repos = catalog.repositories().await;
|
||||
|
||||
let info = repos
|
||||
.tables()
|
||||
.get_table_persist_info(self.shard_id, self.namespace_id, table_name)
|
||||
.await
|
||||
.context(super::CatalogSnafu)?
|
||||
.context(super::TableNotFoundSnafu { table_name })?;
|
||||
.ok_or_else(|| super::Error::TableNotFound {
|
||||
table_name: table_name.to_string(),
|
||||
})?;
|
||||
|
||||
let mut t = self.tables.write();
|
||||
|
||||
let data = match t.entry(table_name.to_string()) {
|
||||
Entry::Vacant(v) => {
|
||||
let v = v.insert(Arc::new(tokio::sync::RwLock::new(TableData::new(
|
||||
Ok(match t.by_name(table_name) {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
self.table_count.inc(1);
|
||||
|
||||
// Insert the table and then return a ref to it.
|
||||
t.insert(TableData::new(
|
||||
info.table_id,
|
||||
table_name,
|
||||
table_name.clone(),
|
||||
self.shard_id,
|
||||
self.namespace_id,
|
||||
info.tombstone_max_sequence_number,
|
||||
Arc::clone(&self.partition_provider),
|
||||
))));
|
||||
self.table_count.inc(1);
|
||||
Arc::clone(v)
|
||||
))
|
||||
}
|
||||
Entry::Occupied(v) => Arc::clone(v.get()),
|
||||
};
|
||||
|
||||
Ok(data)
|
||||
})
|
||||
}
|
||||
|
||||
/// Walks down the table and partition and clears the persisting batch. The sequence number is
|
||||
|
|
@ -283,13 +348,13 @@ impl NamespaceData {
|
|||
/// data buffer.
|
||||
pub(super) async fn mark_persisted(
|
||||
&self,
|
||||
table_name: &str,
|
||||
table_name: &TableName,
|
||||
partition_key: &PartitionKey,
|
||||
sequence_number: SequenceNumber,
|
||||
) {
|
||||
if let Some(t) = self.table_data(table_name) {
|
||||
let mut t = t.write().await;
|
||||
let partition = t.partition_data.get_mut(partition_key);
|
||||
let partition = t.get_partition_by_key_mut(partition_key);
|
||||
|
||||
if let Some(p) = partition {
|
||||
p.mark_persisted(sequence_number);
|
||||
|
|
@ -299,7 +364,7 @@ impl NamespaceData {
|
|||
|
||||
/// Return progress from this Namespace
|
||||
pub(super) async fn progress(&self) -> ShardProgress {
|
||||
let tables: Vec<_> = self.tables.read().values().map(Arc::clone).collect();
|
||||
let tables: Vec<_> = self.tables.read().by_id.values().map(Arc::clone).collect();
|
||||
|
||||
// Consolidate progtress across partitions.
|
||||
let mut progress = ShardProgress::new()
|
||||
|
|
@ -323,6 +388,12 @@ impl NamespaceData {
|
|||
pub(super) fn table_count(&self) -> &U64Counter {
|
||||
&self.table_count
|
||||
}
|
||||
|
||||
/// Returns the [`NamespaceName`] for this namespace.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn namespace_name(&self) -> &NamespaceName {
|
||||
&self.namespace_name
|
||||
}
|
||||
}
|
||||
|
||||
/// RAAI struct that sets buffering sequence number on creation and clears it on free
|
||||
|
|
@ -357,3 +428,92 @@ impl<'a> Drop for ScopedSequenceNumber<'a> {
|
|||
*buffering_sequence_number = None;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::{PartitionId, ShardIndex};
|
||||
use metric::{Attributes, Metric};
|
||||
|
||||
use crate::{
|
||||
data::partition::{resolver::MockPartitionProvider, PartitionData, SortKeyState},
|
||||
lifecycle::mock_handle::MockLifecycleHandle,
|
||||
test_util::{make_write_op, populate_catalog},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
const SHARD_INDEX: ShardIndex = ShardIndex::new(24);
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "platanos";
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_double_ref() {
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
// Configure the mock partition provider to return a partition for this
|
||||
// table ID.
|
||||
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
|
||||
PartitionData::new(
|
||||
PartitionId::new(0),
|
||||
PartitionKey::from("banana-split"),
|
||||
shard_id,
|
||||
ns_id,
|
||||
table_id,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
),
|
||||
));
|
||||
|
||||
let ns = NamespaceData::new(
|
||||
ns_id,
|
||||
NAMESPACE_NAME.into(),
|
||||
shard_id,
|
||||
partition_provider,
|
||||
&*metrics,
|
||||
);
|
||||
|
||||
// Assert the namespace name was stored
|
||||
assert_eq!(&**ns.namespace_name(), NAMESPACE_NAME);
|
||||
|
||||
// Assert the namespace does not contain the test data
|
||||
assert!(ns.table_data(&TABLE_NAME.into()).is_none());
|
||||
assert!(ns.table_id(table_id).is_none());
|
||||
|
||||
// Write some test data
|
||||
ns.buffer_operation(
|
||||
DmlOperation::Write(make_write_op(
|
||||
&PartitionKey::from("banana-split"),
|
||||
SHARD_INDEX,
|
||||
NAMESPACE_NAME,
|
||||
0,
|
||||
r#"bananas,city=Medford day="sun",temp=55 22"#,
|
||||
)),
|
||||
&catalog,
|
||||
&MockLifecycleHandle::default(),
|
||||
)
|
||||
.await
|
||||
.expect("buffer op should succeed");
|
||||
|
||||
// Both forms of referencing the table should succeed
|
||||
assert!(ns.table_data(&TABLE_NAME.into()).is_some());
|
||||
assert!(ns.table_id(table_id).is_some());
|
||||
|
||||
// And the table counter metric should increase
|
||||
let tables = metrics
|
||||
.get_instrument::<Metric<U64Counter>>("ingester_tables_total")
|
||||
.expect("failed to read metric")
|
||||
.get_observer(&Attributes::from([]))
|
||||
.expect("failed to get observer")
|
||||
.fetch();
|
||||
assert_eq!(tables, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,18 +3,21 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::{
|
||||
NamespaceId, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId, Tombstone,
|
||||
};
|
||||
use iox_query::exec::Executor;
|
||||
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId};
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::selection::Selection;
|
||||
use observability_deps::tracing::*;
|
||||
use schema::{selection::Selection, sort::SortKey};
|
||||
use snafu::ResultExt;
|
||||
use uuid::Uuid;
|
||||
use write_summary::ShardProgress;
|
||||
|
||||
use self::buffer::{BufferBatch, DataBuffer};
|
||||
use crate::{data::query_dedup::query, query::QueryableBatch};
|
||||
use self::{
|
||||
buffer::{BufferBatch, DataBuffer},
|
||||
resolver::DeferredSortKey,
|
||||
};
|
||||
use crate::{querier_handler::PartitionStatus, query::QueryableBatch};
|
||||
|
||||
use super::table::TableName;
|
||||
|
||||
mod buffer;
|
||||
pub mod resolver;
|
||||
|
|
@ -28,20 +31,6 @@ pub(crate) struct UnpersistedPartitionData {
|
|||
pub(crate) partition_status: PartitionStatus,
|
||||
}
|
||||
|
||||
/// Status of a partition that has unpersisted data.
|
||||
///
|
||||
/// Note that this structure is specific to a partition (which itself is bound to a table and
|
||||
/// shard)!
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub struct PartitionStatus {
|
||||
/// Max sequence number persisted
|
||||
pub parquet_max_sequence_number: Option<SequenceNumber>,
|
||||
|
||||
/// Max sequence number for a tombstone
|
||||
pub tombstone_max_sequence_number: Option<SequenceNumber>,
|
||||
}
|
||||
|
||||
/// PersistingBatch contains all needed info and data for creating
|
||||
/// a parquet file for given set of SnapshotBatches
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
|
|
@ -132,7 +121,28 @@ impl SnapshotBatch {
|
|||
}
|
||||
}
|
||||
|
||||
/// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard
|
||||
/// The load state of the [`SortKey`] for a given partition.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum SortKeyState {
|
||||
/// The [`SortKey`] has not yet been fetched from the catalog, and will be
|
||||
/// lazy loaded (or loaded in the background) by a call to
|
||||
/// [`DeferredSortKey::get()`].
|
||||
Deferred(DeferredSortKey),
|
||||
/// The sort key is known and specified.
|
||||
Provided(Option<SortKey>),
|
||||
}
|
||||
|
||||
impl SortKeyState {
|
||||
async fn get(&self) -> Option<SortKey> {
|
||||
match self {
|
||||
Self::Deferred(v) => v.get().await,
|
||||
Self::Provided(v) => v.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Data of an IOx Partition of a given Table of a Namespace that belongs to a
|
||||
/// given Shard
|
||||
#[derive(Debug)]
|
||||
pub struct PartitionData {
|
||||
/// The catalog ID of the partition this buffer is for.
|
||||
|
|
@ -140,12 +150,23 @@ pub struct PartitionData {
|
|||
/// The string partition key for this partition.
|
||||
partition_key: PartitionKey,
|
||||
|
||||
/// The sort key of this partition.
|
||||
///
|
||||
/// This can known, in which case this field will contain a
|
||||
/// [`SortKeyState::Provided`] with the [`SortKey`], or unknown with a value
|
||||
/// of [`SortKeyState::Deferred`] causing it to be loaded from the catalog
|
||||
/// (potentially) in the background or at read time.
|
||||
///
|
||||
/// Callers should use [`Self::sort_key()`] to be abstracted away from these
|
||||
/// fetch details.
|
||||
sort_key: SortKeyState,
|
||||
|
||||
/// The shard, namespace & table IDs for this partition.
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
/// The name of the table this partition is part of.
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
|
||||
pub(super) data: DataBuffer,
|
||||
|
||||
|
|
@ -156,18 +177,21 @@ pub struct PartitionData {
|
|||
|
||||
impl PartitionData {
|
||||
/// Initialize a new partition data buffer
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn new(
|
||||
id: PartitionId,
|
||||
partition_key: PartitionKey,
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
sort_key: SortKeyState,
|
||||
max_persisted_sequence_number: Option<SequenceNumber>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
partition_key,
|
||||
sort_key,
|
||||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
|
|
@ -209,100 +233,36 @@ impl PartitionData {
|
|||
sequence_number: SequenceNumber,
|
||||
mb: MutableBatch,
|
||||
) -> Result<(), super::Error> {
|
||||
match &mut self.data.buffer {
|
||||
let (min_sequence_number, max_sequence_number) = match &mut self.data.buffer {
|
||||
Some(buf) => {
|
||||
buf.max_sequence_number = sequence_number.max(buf.max_sequence_number);
|
||||
buf.data.extend_from(&mb).context(super::BufferWriteSnafu)?;
|
||||
(buf.min_sequence_number, buf.max_sequence_number)
|
||||
}
|
||||
None => {
|
||||
self.data.buffer = Some(BufferBatch {
|
||||
min_sequence_number: sequence_number,
|
||||
max_sequence_number: sequence_number,
|
||||
data: mb,
|
||||
})
|
||||
});
|
||||
(sequence_number, sequence_number)
|
||||
}
|
||||
}
|
||||
};
|
||||
trace!(
|
||||
min_sequence_number=?min_sequence_number,
|
||||
max_sequence_number=?max_sequence_number,
|
||||
"buffered write"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Buffers a new tombstone:
|
||||
/// . All the data in the `buffer` and `snapshots` will be replaced with one
|
||||
/// tombstone-applied snapshot
|
||||
/// . The tombstone is only added in the `deletes_during_persisting` if the `persisting`
|
||||
/// exists
|
||||
pub(super) async fn buffer_tombstone(&mut self, executor: &Executor, tombstone: Tombstone) {
|
||||
self.data.add_tombstone(tombstone.clone());
|
||||
|
||||
// ----------------------------------------------------------
|
||||
// First apply the tombstone on all in-memory & non-persisting data
|
||||
// Make a QueryableBatch for all buffer + snapshots + the given tombstone
|
||||
let max_sequence_number = tombstone.sequence_number;
|
||||
let query_batch = match self.data.snapshot_to_queryable_batch(
|
||||
&self.table_name,
|
||||
self.id,
|
||||
Some(tombstone.clone()),
|
||||
) {
|
||||
Some(query_batch) if !query_batch.is_empty() => query_batch,
|
||||
_ => {
|
||||
// No need to proceed further
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let (min_sequence_number, _) = query_batch.min_max_sequence_numbers();
|
||||
assert!(min_sequence_number <= max_sequence_number);
|
||||
|
||||
// Run query on the QueryableBatch to apply the tombstone.
|
||||
let stream = match query(executor, Arc::new(query_batch)).await {
|
||||
Err(e) => {
|
||||
// this should never error out. if it does, we need to crash hard so
|
||||
// someone can take a look.
|
||||
panic!("unable to apply tombstones on snapshots: {:?}", e);
|
||||
}
|
||||
Ok(stream) => stream,
|
||||
};
|
||||
let record_batches = match datafusion::physical_plan::common::collect(stream).await {
|
||||
Err(e) => {
|
||||
// this should never error out. if it does, we need to crash hard so
|
||||
// someone can take a look.
|
||||
panic!("unable to collect record batches: {:?}", e);
|
||||
}
|
||||
Ok(batches) => batches,
|
||||
};
|
||||
|
||||
// Merge all result record batches into one record batch
|
||||
// and make a snapshot for it
|
||||
let snapshot = if !record_batches.is_empty() {
|
||||
let record_batch =
|
||||
arrow::compute::concat_batches(&record_batches[0].schema(), &record_batches)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!("unable to concat record batches: {:?}", e);
|
||||
});
|
||||
let snapshot = SnapshotBatch {
|
||||
min_sequence_number,
|
||||
max_sequence_number,
|
||||
data: Arc::new(record_batch),
|
||||
};
|
||||
|
||||
Some(Arc::new(snapshot))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------
|
||||
// Add the tombstone-applied data back in as one snapshot
|
||||
if let Some(snapshot) = snapshot {
|
||||
self.data.snapshots.push(snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the progress from this Partition
|
||||
pub(super) fn progress(&self) -> ShardProgress {
|
||||
self.data.progress()
|
||||
}
|
||||
|
||||
pub(super) fn id(&self) -> PartitionId {
|
||||
pub(super) fn partition_id(&self) -> PartitionId {
|
||||
self.id
|
||||
}
|
||||
|
||||
|
|
@ -347,6 +307,13 @@ impl PartitionData {
|
|||
pub fn namespace_id(&self) -> NamespaceId {
|
||||
self.namespace_id
|
||||
}
|
||||
|
||||
/// Return the [`SortKey`] for this partition.
|
||||
///
|
||||
/// NOTE: this MAY involve querying the catalog with unbounded retries.
|
||||
pub async fn sort_key(&self) -> Option<SortKey> {
|
||||
self.sort_key.get().await
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -355,7 +322,6 @@ mod tests {
|
|||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::create_tombstone;
|
||||
|
||||
#[test]
|
||||
fn snapshot_buffer_different_but_compatible_schemas() {
|
||||
|
|
@ -366,6 +332,7 @@ mod tests {
|
|||
NamespaceId::new(42),
|
||||
TableId::new(1),
|
||||
"foo".into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
);
|
||||
|
||||
|
|
@ -401,7 +368,7 @@ mod tests {
|
|||
|
||||
// Test deletes mixed with writes on a single parittion
|
||||
#[tokio::test]
|
||||
async fn writes_and_deletes() {
|
||||
async fn writes() {
|
||||
// Make a partition with empty DataBuffer
|
||||
let s_id = 1;
|
||||
let t_id = 1;
|
||||
|
|
@ -413,9 +380,9 @@ mod tests {
|
|||
NamespaceId::new(42),
|
||||
TableId::new(t_id),
|
||||
"restaurant".into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
);
|
||||
let exec = Executor::new(1);
|
||||
|
||||
// ------------------------------------------
|
||||
// Fill `buffer`
|
||||
|
|
@ -438,42 +405,8 @@ mod tests {
|
|||
SequenceNumber::new(2)
|
||||
);
|
||||
assert_eq!(p.data.snapshots.len(), 0);
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 0);
|
||||
assert_eq!(p.data.persisting, None);
|
||||
|
||||
// ------------------------------------------
|
||||
// Delete
|
||||
// --- seq_num: 3
|
||||
let ts = create_tombstone(
|
||||
1, // tombstone id
|
||||
t_id, // table id
|
||||
s_id, // shard id
|
||||
3, // delete's seq_number
|
||||
0, // min time of data to get deleted
|
||||
20, // max time of data to get deleted
|
||||
"day=thu", // delete predicate
|
||||
);
|
||||
// one row will get deleted, the other is moved to snapshot
|
||||
p.buffer_tombstone(&exec, ts).await;
|
||||
|
||||
// verify data
|
||||
assert!(p.data.buffer.is_none()); // always empty after delete
|
||||
assert_eq!(p.data.snapshots.len(), 1); // one snpashot if there is data
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 0);
|
||||
assert_eq!(p.data.persisting, None);
|
||||
// snapshot only has one row since the other one got deleted
|
||||
let data = (*p.data.snapshots[0].data).clone();
|
||||
let expected = vec![
|
||||
"+--------+-----+------+--------------------------------+",
|
||||
"| city | day | temp | time |",
|
||||
"+--------+-----+------+--------------------------------+",
|
||||
"| Boston | fri | 50 | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+--------+-----+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &[data]);
|
||||
assert_eq!(p.data.snapshots[0].min_sequence_number.get(), 1);
|
||||
assert_eq!(p.data.snapshots[0].max_sequence_number.get(), 3);
|
||||
|
||||
// ------------------------------------------
|
||||
// Fill `buffer`
|
||||
// --- seq_num: 4
|
||||
|
|
@ -493,50 +426,15 @@ mod tests {
|
|||
// verify data
|
||||
assert_eq!(
|
||||
p.data.buffer.as_ref().unwrap().min_sequence_number,
|
||||
SequenceNumber::new(4)
|
||||
SequenceNumber::new(1)
|
||||
);
|
||||
assert_eq!(
|
||||
p.data.buffer.as_ref().unwrap().max_sequence_number,
|
||||
SequenceNumber::new(5)
|
||||
);
|
||||
assert_eq!(p.data.snapshots.len(), 1); // existing sanpshot
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 0);
|
||||
assert_eq!(p.data.snapshots.len(), 0);
|
||||
assert_eq!(p.data.persisting, None);
|
||||
|
||||
// ------------------------------------------
|
||||
// Delete
|
||||
// --- seq_num: 6
|
||||
let ts = create_tombstone(
|
||||
2, // tombstone id
|
||||
t_id, // table id
|
||||
s_id, // shard id
|
||||
6, // delete's seq_number
|
||||
10, // min time of data to get deleted
|
||||
50, // max time of data to get deleted
|
||||
"city=Boston", // delete predicate
|
||||
);
|
||||
// two rows will get deleted, one from existing snapshot, one from the buffer being moved
|
||||
// to snpashot
|
||||
p.buffer_tombstone(&exec, ts).await;
|
||||
|
||||
// verify data
|
||||
assert!(p.data.buffer.is_none()); // always empty after delete
|
||||
assert_eq!(p.data.snapshots.len(), 1); // one snpashot
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 0);
|
||||
assert_eq!(p.data.persisting, None);
|
||||
// snapshot only has two rows since the other 2 rows with city=Boston have got deleted
|
||||
let data = (*p.data.snapshots[0].data).clone();
|
||||
let expected = vec![
|
||||
"+---------+-----+------+--------------------------------+",
|
||||
"| city | day | temp | time |",
|
||||
"+---------+-----+------+--------------------------------+",
|
||||
"| Andover | tue | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| Medford | sun | 55 | 1970-01-01T00:00:00.000000022Z |",
|
||||
"+---------+-----+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &[data]);
|
||||
assert_eq!(p.data.snapshots[0].min_sequence_number.get(), 1);
|
||||
assert_eq!(p.data.snapshots[0].max_sequence_number.get(), 6);
|
||||
assert!(p.data.buffer.is_some());
|
||||
|
||||
// ------------------------------------------
|
||||
// Persisting
|
||||
|
|
@ -545,32 +443,12 @@ mod tests {
|
|||
// verify data
|
||||
assert!(p.data.buffer.is_none()); // always empty after issuing persit
|
||||
assert_eq!(p.data.snapshots.len(), 0); // always empty after issuing persit
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 0); // deletes not happen yet
|
||||
assert_eq!(p.data.persisting, Some(Arc::clone(&p_batch)));
|
||||
|
||||
// ------------------------------------------
|
||||
// Delete
|
||||
// --- seq_num: 7
|
||||
let ts = create_tombstone(
|
||||
3, // tombstone id
|
||||
t_id, // table id
|
||||
s_id, // shard id
|
||||
7, // delete's seq_number
|
||||
10, // min time of data to get deleted
|
||||
50, // max time of data to get deleted
|
||||
"temp=55", // delete predicate
|
||||
);
|
||||
// if a query come while persisting, the row with temp=55 will be deleted before
|
||||
// data is sent back to Querier
|
||||
p.buffer_tombstone(&exec, ts).await;
|
||||
|
||||
// verify data
|
||||
assert!(p.data.buffer.is_none()); // always empty after delete
|
||||
// no snpashots becasue buffer has not data yet and the
|
||||
// snapshot was empty too
|
||||
assert_eq!(p.data.snapshots.len(), 0);
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 1); // tombstone added since data is
|
||||
// persisting
|
||||
assert!(p.data.buffer.is_none());
|
||||
assert_eq!(p.data.snapshots.len(), 0); // no snpashots becasue buffer has not data yet and the
|
||||
// snapshot was empty too
|
||||
assert_eq!(p.data.persisting, Some(Arc::clone(&p_batch)));
|
||||
|
||||
// ------------------------------------------
|
||||
|
|
@ -591,7 +469,6 @@ mod tests {
|
|||
SequenceNumber::new(8)
|
||||
); // 1 newly added mutable batch of 3 rows of data
|
||||
assert_eq!(p.data.snapshots.len(), 0); // still empty
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 1);
|
||||
assert_eq!(p.data.persisting, Some(Arc::clone(&p_batch)));
|
||||
|
||||
// ------------------------------------------
|
||||
|
|
@ -600,7 +477,6 @@ mod tests {
|
|||
// verify data
|
||||
assert!(p.data.buffer.is_none()); // empty after snapshot
|
||||
assert_eq!(p.data.snapshots.len(), 1); // data moved from buffer
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 1);
|
||||
assert_eq!(p.data.persisting, Some(Arc::clone(&p_batch)));
|
||||
// snapshot has three rows moved from buffer
|
||||
let data = (*p.data.snapshots[0].data).clone();
|
||||
|
|
@ -616,41 +492,5 @@ mod tests {
|
|||
assert_batches_sorted_eq!(&expected, &[data]);
|
||||
assert_eq!(p.data.snapshots[0].min_sequence_number.get(), 8);
|
||||
assert_eq!(p.data.snapshots[0].max_sequence_number.get(), 8);
|
||||
|
||||
// ------------------------------------------
|
||||
// Delete
|
||||
// --- seq_num: 9
|
||||
let ts = create_tombstone(
|
||||
4, // tombstone id
|
||||
t_id, // table id
|
||||
s_id, // shard id
|
||||
9, // delete's seq_number
|
||||
10, // min time of data to get deleted
|
||||
50, // max time of data to get deleted
|
||||
"temp=60", // delete predicate
|
||||
);
|
||||
// the row with temp=60 will be removed from the sanphot
|
||||
p.buffer_tombstone(&exec, ts).await;
|
||||
|
||||
// verify data
|
||||
assert!(p.data.buffer.is_none()); // always empty after delete
|
||||
assert_eq!(p.data.snapshots.len(), 1); // new snapshot of the existing with delete applied
|
||||
assert_eq!(p.data.deletes_during_persisting().len(), 2); // one more tombstone added make it 2
|
||||
assert_eq!(p.data.persisting, Some(Arc::clone(&p_batch)));
|
||||
// snapshot has only 2 rows because the row with tem=60 was removed
|
||||
let data = (*p.data.snapshots[0].data).clone();
|
||||
let expected = vec![
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
"| city | day | temp | time |",
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
"| Wilmington | sun | 55 | 1970-01-01T00:00:00.000000035Z |",
|
||||
"| Boston | sun | 62 | 1970-01-01T00:00:00.000000038Z |",
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &[data]);
|
||||
assert_eq!(p.data.snapshots[0].min_sequence_number.get(), 8);
|
||||
assert_eq!(p.data.snapshots[0].max_sequence_number.get(), 9);
|
||||
|
||||
exec.join().await;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,13 +2,15 @@
|
|||
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::{PartitionId, SequenceNumber, ShardId, TableId, Tombstone};
|
||||
use data_types::{PartitionId, SequenceNumber, ShardId, TableId};
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::selection::Selection;
|
||||
use snafu::ResultExt;
|
||||
use uuid::Uuid;
|
||||
use write_summary::ShardProgress;
|
||||
|
||||
use crate::data::table::TableName;
|
||||
|
||||
use super::{PersistingBatch, QueryableBatch, SnapshotBatch};
|
||||
|
||||
/// Data of an IOx partition split into batches
|
||||
|
|
@ -38,14 +40,6 @@ pub(crate) struct DataBuffer {
|
|||
/// Buffer of incoming writes
|
||||
pub(crate) buffer: Option<BufferBatch>,
|
||||
|
||||
/// Buffer of tombstones whose time range may overlap with this partition.
|
||||
/// All tombstones were already applied to corresponding snapshots. This list
|
||||
/// only keep the ones that come during persisting. The reason
|
||||
/// we keep them becasue if a query comes, we need to apply these tombstones
|
||||
/// on the persiting data before sending it to the Querier
|
||||
/// When the `persiting` is done and removed, this list will get empty, too
|
||||
deletes_during_persisting: Vec<Tombstone>,
|
||||
|
||||
/// Data in `buffer` will be moved to a `snapshot` when one of these happens:
|
||||
/// . A background persist is called
|
||||
/// . A read request from Querier
|
||||
|
|
@ -70,14 +64,6 @@ pub(crate) struct DataBuffer {
|
|||
}
|
||||
|
||||
impl DataBuffer {
|
||||
/// Add a new tombstones into the [`DataBuffer`].
|
||||
pub(super) fn add_tombstone(&mut self, tombstone: Tombstone) {
|
||||
// Only keep this tombstone if some data is being persisted
|
||||
if self.persisting.is_some() {
|
||||
self.deletes_during_persisting.push(tombstone);
|
||||
}
|
||||
}
|
||||
|
||||
/// If a [`BufferBatch`] exists, convert it to a [`SnapshotBatch`] and add
|
||||
/// it to the list of snapshots.
|
||||
///
|
||||
|
|
@ -109,9 +95,8 @@ impl DataBuffer {
|
|||
/// Both buffer and snapshots will be empty after this
|
||||
pub(super) fn snapshot_to_queryable_batch(
|
||||
&mut self,
|
||||
table_name: &Arc<str>,
|
||||
table_name: &TableName,
|
||||
partition_id: PartitionId,
|
||||
tombstone: Option<Tombstone>,
|
||||
) -> Option<QueryableBatch> {
|
||||
self.generate_snapshot()
|
||||
.expect("This mutable batch snapshot error should be impossible.");
|
||||
|
|
@ -119,21 +104,11 @@ impl DataBuffer {
|
|||
let mut data = vec![];
|
||||
std::mem::swap(&mut data, &mut self.snapshots);
|
||||
|
||||
let mut tombstones = vec![];
|
||||
if let Some(tombstone) = tombstone {
|
||||
tombstones.push(tombstone);
|
||||
}
|
||||
|
||||
// only produce batch if there is any data
|
||||
if data.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(QueryableBatch::new(
|
||||
Arc::clone(table_name),
|
||||
partition_id,
|
||||
data,
|
||||
tombstones,
|
||||
))
|
||||
Some(QueryableBatch::new(table_name.clone(), partition_id, data))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -164,15 +139,13 @@ impl DataBuffer {
|
|||
shard_id: ShardId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
table_name: &Arc<str>,
|
||||
table_name: &TableName,
|
||||
) -> Option<Arc<PersistingBatch>> {
|
||||
if self.persisting.is_some() {
|
||||
panic!("Unable to snapshot while persisting. This is an unexpected state.")
|
||||
}
|
||||
|
||||
if let Some(queryable_batch) =
|
||||
self.snapshot_to_queryable_batch(table_name, partition_id, None)
|
||||
{
|
||||
if let Some(queryable_batch) = self.snapshot_to_queryable_batch(table_name, partition_id) {
|
||||
let persisting_batch = Arc::new(PersistingBatch {
|
||||
shard_id,
|
||||
table_id,
|
||||
|
|
@ -197,12 +170,7 @@ impl DataBuffer {
|
|||
};
|
||||
|
||||
// persisting data
|
||||
let mut queryable_batch = (*persisting.data).clone();
|
||||
|
||||
// Add new tombstones if any
|
||||
queryable_batch.add_tombstones(&self.deletes_during_persisting);
|
||||
|
||||
Some(queryable_batch)
|
||||
Some((*persisting.data).clone())
|
||||
}
|
||||
|
||||
/// Return the progress in this DataBuffer
|
||||
|
|
@ -239,12 +207,6 @@ impl DataBuffer {
|
|||
|
||||
pub(crate) fn mark_persisted(&mut self) {
|
||||
self.persisting = None;
|
||||
self.deletes_during_persisting.clear()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn deletes_during_persisting(&self) -> &[Tombstone] {
|
||||
self.deletes_during_persisting.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,18 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use backoff::BackoffConfig;
|
||||
use data_types::{
|
||||
NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId,
|
||||
};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use crate::data::partition::PartitionData;
|
||||
use crate::data::{
|
||||
partition::{resolver::DeferredSortKey, PartitionData, SortKeyState},
|
||||
table::TableName,
|
||||
};
|
||||
|
||||
use super::r#trait::PartitionProvider;
|
||||
|
||||
|
|
@ -43,6 +48,18 @@ struct Entry {
|
|||
/// Each cache hit _removes_ the entry from the cache - this eliminates the
|
||||
/// memory overhead for items that were hit. This is the expected (only valid!)
|
||||
/// usage pattern.
|
||||
///
|
||||
/// # Deferred Sort Key Loading
|
||||
///
|
||||
/// This cache does NOT cache the [`SortKey`] for each [`PartitionData`], as the
|
||||
/// sort key can be large and is likely unique per table, and thus not
|
||||
/// share-able across instances / prohibitively expensive to cache.
|
||||
///
|
||||
/// Instead cached instances are returned with a deferred sort key resolver
|
||||
/// which attempts to fetch the sort key in the background some time after
|
||||
/// construction.
|
||||
///
|
||||
/// [`SortKey`]: schema::sort::SortKey
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct PartitionCache<T> {
|
||||
// The inner delegate called for a cache miss.
|
||||
|
|
@ -59,13 +76,31 @@ pub(crate) struct PartitionCache<T> {
|
|||
/// a faster search for cache misses.
|
||||
#[allow(clippy::type_complexity)]
|
||||
entries: Mutex<HashMap<PartitionKey, HashMap<ShardId, HashMap<TableId, Entry>>>>,
|
||||
|
||||
/// Data needed to construct the [`DeferredSortKey`] for cached entries.
|
||||
catalog: Arc<dyn Catalog>,
|
||||
backoff_config: BackoffConfig,
|
||||
/// The maximum amount of time a [`DeferredSortKey`] may wait until
|
||||
/// pre-fetching the sort key in the background.
|
||||
max_smear: Duration,
|
||||
}
|
||||
|
||||
impl<T> PartitionCache<T> {
|
||||
/// Initialise a [`PartitionCache`] containing the specified partitions.
|
||||
///
|
||||
/// Any cache miss is passed through to `inner`.
|
||||
pub(crate) fn new<P>(inner: T, partitions: P) -> Self
|
||||
///
|
||||
/// Any cache hit returns a [`PartitionData`] configured with a
|
||||
/// [`SortKeyState::Deferred`] for deferred key loading in the background.
|
||||
/// The [`DeferredSortKey`] is initialised with the given `catalog`,
|
||||
/// `backoff_config`, and `max_smear` maximal load wait duration.
|
||||
pub(crate) fn new<P>(
|
||||
inner: T,
|
||||
partitions: P,
|
||||
max_smear: Duration,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
backoff_config: BackoffConfig,
|
||||
) -> Self
|
||||
where
|
||||
P: IntoIterator<Item = Partition>,
|
||||
{
|
||||
|
|
@ -97,6 +132,9 @@ impl<T> PartitionCache<T> {
|
|||
Self {
|
||||
entries: Mutex::new(entries),
|
||||
inner,
|
||||
catalog,
|
||||
backoff_config,
|
||||
max_smear,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -154,7 +192,7 @@ where
|
|||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
) -> PartitionData {
|
||||
// Use the cached PartitionKey instead of the caller's partition_key,
|
||||
// instead preferring to reuse the already-shared Arc<str> in the cache.
|
||||
|
|
@ -171,6 +209,12 @@ where
|
|||
namespace_id,
|
||||
table_id,
|
||||
table_name,
|
||||
SortKeyState::Deferred(DeferredSortKey::new(
|
||||
cached.partition_id,
|
||||
self.max_smear,
|
||||
Arc::clone(&__self.catalog),
|
||||
self.backoff_config.clone(),
|
||||
)),
|
||||
cached.max_sequence_number,
|
||||
);
|
||||
}
|
||||
|
|
@ -186,6 +230,8 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
|
||||
use crate::data::partition::resolver::MockPartitionProvider;
|
||||
|
||||
use super::*;
|
||||
|
|
@ -197,6 +243,22 @@ mod tests {
|
|||
const TABLE_ID: TableId = TableId::new(3);
|
||||
const TABLE_NAME: &str = "platanos";
|
||||
|
||||
fn new_cache<P>(
|
||||
inner: MockPartitionProvider,
|
||||
partitions: P,
|
||||
) -> PartitionCache<MockPartitionProvider>
|
||||
where
|
||||
P: IntoIterator<Item = Partition>,
|
||||
{
|
||||
PartitionCache::new(
|
||||
inner,
|
||||
partitions,
|
||||
Duration::from_secs(10_000_000),
|
||||
Arc::new(MemCatalog::new(Arc::new(metric::Registry::default()))),
|
||||
BackoffConfig::default(),
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_miss() {
|
||||
let data = PartitionData::new(
|
||||
|
|
@ -206,11 +268,12 @@ mod tests {
|
|||
NAMESPACE_ID,
|
||||
TABLE_ID,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
);
|
||||
let inner = MockPartitionProvider::default().with_partition(data);
|
||||
|
||||
let cache = PartitionCache::new(inner, []);
|
||||
let cache = new_cache(inner, []);
|
||||
let got = cache
|
||||
.get_partition(
|
||||
PARTITION_KEY.into(),
|
||||
|
|
@ -221,7 +284,7 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.id(), PARTITION_ID);
|
||||
assert_eq!(got.partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.shard_id(), SHARD_ID);
|
||||
assert_eq!(got.table_id(), TABLE_ID);
|
||||
assert_eq!(got.table_name(), TABLE_NAME);
|
||||
|
|
@ -238,11 +301,11 @@ mod tests {
|
|||
shard_id: SHARD_ID,
|
||||
table_id: TABLE_ID,
|
||||
partition_key: stored_partition_key.clone(),
|
||||
sort_key: Default::default(),
|
||||
sort_key: vec!["dos".to_string(), "bananas".to_string()],
|
||||
persisted_sequence_number: Default::default(),
|
||||
};
|
||||
|
||||
let cache = PartitionCache::new(inner, [partition]);
|
||||
let cache = new_cache(inner, [partition]);
|
||||
|
||||
let callers_partition_key = PartitionKey::from(PARTITION_KEY);
|
||||
let got = cache
|
||||
|
|
@ -255,7 +318,7 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.id(), PARTITION_ID);
|
||||
assert_eq!(got.partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.shard_id(), SHARD_ID);
|
||||
assert_eq!(got.table_id(), TABLE_ID);
|
||||
assert_eq!(got.table_name(), TABLE_NAME);
|
||||
|
|
@ -274,7 +337,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_miss_partition_jey() {
|
||||
async fn test_miss_partition_key() {
|
||||
let other_key = PartitionKey::from("test");
|
||||
let other_key_id = PartitionId::new(99);
|
||||
let inner = MockPartitionProvider::default().with_partition(PartitionData::new(
|
||||
|
|
@ -284,6 +347,7 @@ mod tests {
|
|||
NAMESPACE_ID,
|
||||
TABLE_ID,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
));
|
||||
|
||||
|
|
@ -296,7 +360,7 @@ mod tests {
|
|||
persisted_sequence_number: Default::default(),
|
||||
};
|
||||
|
||||
let cache = PartitionCache::new(inner, [partition]);
|
||||
let cache = new_cache(inner, [partition]);
|
||||
let got = cache
|
||||
.get_partition(
|
||||
other_key.clone(),
|
||||
|
|
@ -307,7 +371,7 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.id(), other_key_id);
|
||||
assert_eq!(got.partition_id(), other_key_id);
|
||||
assert_eq!(got.shard_id(), SHARD_ID);
|
||||
assert_eq!(got.table_id(), TABLE_ID);
|
||||
assert_eq!(got.table_name(), TABLE_NAME);
|
||||
|
|
@ -323,6 +387,7 @@ mod tests {
|
|||
NAMESPACE_ID,
|
||||
other_table,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
));
|
||||
|
||||
|
|
@ -335,7 +400,7 @@ mod tests {
|
|||
persisted_sequence_number: Default::default(),
|
||||
};
|
||||
|
||||
let cache = PartitionCache::new(inner, [partition]);
|
||||
let cache = new_cache(inner, [partition]);
|
||||
let got = cache
|
||||
.get_partition(
|
||||
PARTITION_KEY.into(),
|
||||
|
|
@ -346,7 +411,7 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.id(), PARTITION_ID);
|
||||
assert_eq!(got.partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.shard_id(), SHARD_ID);
|
||||
assert_eq!(got.table_id(), other_table);
|
||||
assert_eq!(got.table_name(), TABLE_NAME);
|
||||
|
|
@ -362,6 +427,7 @@ mod tests {
|
|||
NAMESPACE_ID,
|
||||
TABLE_ID,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
));
|
||||
|
||||
|
|
@ -374,7 +440,7 @@ mod tests {
|
|||
persisted_sequence_number: Default::default(),
|
||||
};
|
||||
|
||||
let cache = PartitionCache::new(inner, [partition]);
|
||||
let cache = new_cache(inner, [partition]);
|
||||
let got = cache
|
||||
.get_partition(
|
||||
PARTITION_KEY.into(),
|
||||
|
|
@ -385,7 +451,7 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.id(), PARTITION_ID);
|
||||
assert_eq!(got.partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.shard_id(), other_shard);
|
||||
assert_eq!(got.table_id(), TABLE_ID);
|
||||
assert_eq!(got.table_name(), TABLE_NAME);
|
||||
|
|
|
|||
|
|
@ -9,7 +9,10 @@ use data_types::{NamespaceId, Partition, PartitionKey, ShardId, TableId};
|
|||
use iox_catalog::interface::Catalog;
|
||||
use observability_deps::tracing::debug;
|
||||
|
||||
use crate::data::partition::PartitionData;
|
||||
use crate::data::{
|
||||
partition::{PartitionData, SortKeyState},
|
||||
table::TableName,
|
||||
};
|
||||
|
||||
use super::r#trait::PartitionProvider;
|
||||
|
||||
|
|
@ -55,7 +58,7 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
) -> PartitionData {
|
||||
debug!(
|
||||
%partition_key,
|
||||
|
|
@ -78,6 +81,7 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
namespace_id,
|
||||
table_id,
|
||||
table_name,
|
||||
SortKeyState::Provided(p.sort_key()),
|
||||
p.persisted_sequence_number,
|
||||
)
|
||||
}
|
||||
|
|
@ -131,7 +135,7 @@ mod tests {
|
|||
};
|
||||
|
||||
let callers_partition_key = PartitionKey::from(PARTITION_KEY);
|
||||
let table_name = TABLE_NAME.into();
|
||||
let table_name = TableName::from(TABLE_NAME);
|
||||
let resolver = CatalogPartitionResolver::new(Arc::clone(&catalog));
|
||||
let got = resolver
|
||||
.get_partition(
|
||||
|
|
@ -139,11 +143,12 @@ mod tests {
|
|||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
Arc::clone(&table_name),
|
||||
table_name.clone(),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(got.namespace_id(), namespace_id);
|
||||
assert_eq!(*got.table_name(), *table_name);
|
||||
assert_eq!(got.sort_key().await, None);
|
||||
assert_eq!(got.max_persisted_sequence_number(), None);
|
||||
assert!(got.partition_key.ptr_eq(&callers_partition_key));
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
//! A mock [`PartitionProvider`] to inject [`PartitionData`] for tests.
|
||||
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, ShardId, TableId};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use crate::data::partition::PartitionData;
|
||||
use crate::data::{partition::PartitionData, table::TableName};
|
||||
|
||||
use super::r#trait::PartitionProvider;
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ impl PartitionProvider for MockPartitionProvider {
|
|||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
) -> PartitionData {
|
||||
let p = self
|
||||
.partitions
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@ pub use r#trait::*;
|
|||
mod catalog;
|
||||
pub use catalog::*;
|
||||
|
||||
mod sort_key;
|
||||
pub(crate) use sort_key::*;
|
||||
|
||||
#[cfg(test)]
|
||||
mod mock;
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,331 @@
|
|||
//! A optimised resolver of a partition [`SortKey`].
|
||||
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::PartitionId;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use parking_lot::Mutex;
|
||||
use rand::Rng;
|
||||
use schema::sort::SortKey;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
/// The states of a [`DeferredSortKey`] instance.
|
||||
#[derive(Debug)]
|
||||
enum State {
|
||||
/// The value has not yet been fetched by the background task.
|
||||
Unresolved,
|
||||
/// The value was fetched by the background task and is read to be consumed.
|
||||
Resolved(Option<SortKey>),
|
||||
}
|
||||
|
||||
/// A resolver of [`SortKey`] from the catalog for a given partition.
|
||||
///
|
||||
/// This implementation combines lazy / deferred loading of the [`SortKey`] from
|
||||
/// the [`Catalog`], and a background timer that pre-fetches the [`SortKey`]
|
||||
/// after some random duration of time. Combined, these behaviours smear the
|
||||
/// [`SortKey`] queries across the allowable time range, avoiding a large number
|
||||
/// of queries from executing when multiple [`SortKey`] are needed in the system
|
||||
/// at one point in time.
|
||||
///
|
||||
/// If the [`DeferredSortKey`] is dropped and the background task is still
|
||||
/// incomplete (sleeping / actively fetching the [`SortKey`]) it is aborted
|
||||
/// immediately. The background task exists once it has successfully fetched the
|
||||
/// [`SortKey`].
|
||||
///
|
||||
/// # Stale Cached Values
|
||||
///
|
||||
/// This is effectively a cache that is pre-warmed in the background - this
|
||||
/// necessitates that the caller can tolerate, or determine, stale values.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct DeferredSortKey {
|
||||
value: Arc<Mutex<State>>,
|
||||
partition_id: PartitionId,
|
||||
|
||||
handle: JoinHandle<()>,
|
||||
|
||||
backoff_config: BackoffConfig,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
}
|
||||
|
||||
impl DeferredSortKey {
|
||||
/// Construct a [`DeferredSortKey`] instance that fetches the [`SortKey`]
|
||||
/// for the specified `partition_id`.
|
||||
///
|
||||
/// The background task will wait a uniformly random duration of time
|
||||
/// between `[0, max_smear)` before attempting to pre-fetch the [`SortKey`]
|
||||
/// from `catalog`.
|
||||
pub(crate) fn new(
|
||||
partition_id: PartitionId,
|
||||
max_smear: Duration,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
backoff_config: BackoffConfig,
|
||||
) -> Self {
|
||||
// Init the value container the background thread populates.
|
||||
let value = Arc::new(Mutex::new(State::Unresolved));
|
||||
|
||||
// Select random duration from a uniform distribution, up to the
|
||||
// configured maximum.
|
||||
let wait_for = rand::thread_rng().gen_range(Duration::ZERO..max_smear);
|
||||
|
||||
// Spawn the background task, sleeping for the random duration of time
|
||||
// before fetching the sort key.
|
||||
let handle = tokio::spawn({
|
||||
let value = Arc::clone(&value);
|
||||
let catalog = Arc::clone(&catalog);
|
||||
let backoff_config = backoff_config.clone();
|
||||
async move {
|
||||
// Sleep for the random duration
|
||||
tokio::time::sleep(wait_for).await;
|
||||
// Fetch the sort key from the catalog
|
||||
let v = fetch(partition_id, &*catalog, &backoff_config).await;
|
||||
// And attempt to update the value container, if it hasn't
|
||||
// already resolved
|
||||
let mut state = value.lock();
|
||||
*state = match *state {
|
||||
State::Unresolved => State::Resolved(v),
|
||||
State::Resolved(_) => return,
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
Self {
|
||||
value,
|
||||
partition_id,
|
||||
handle,
|
||||
backoff_config,
|
||||
catalog,
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the [`SortKey`] for the partition.
|
||||
///
|
||||
/// If the [`SortKey`] was pre-fetched in the background, it is returned
|
||||
/// immediately. If the [`SortKey`] has not yet been resolved, this call
|
||||
/// blocks while it is read from the [`Catalog`].
|
||||
///
|
||||
/// # Concurrency
|
||||
///
|
||||
/// If this method requires resolving the [`SortKey`], N concurrent callers
|
||||
/// will cause N queries against the catalog.
|
||||
///
|
||||
/// # Await Safety
|
||||
///
|
||||
/// Cancelling the future returned by calling [`Self::get()`] before
|
||||
/// completion will leave [`Self`] without a background task. The next call
|
||||
/// to [`Self::get()`] will incur a catalog query (see concurrency above).
|
||||
pub(crate) async fn get(&self) -> Option<SortKey> {
|
||||
{
|
||||
let state = self.value.lock();
|
||||
|
||||
// If there is a resolved value, return it.
|
||||
if let State::Resolved(v) = &*state {
|
||||
return v.clone();
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise resolve the value immediately, aborting the background
|
||||
// task.
|
||||
self.handle.abort();
|
||||
let sort_key = fetch(self.partition_id, &*self.catalog, &self.backoff_config).await;
|
||||
|
||||
{
|
||||
let mut state = self.value.lock();
|
||||
*state = State::Resolved(sort_key.clone());
|
||||
}
|
||||
|
||||
sort_key
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DeferredSortKey {
|
||||
fn drop(&mut self) {
|
||||
// Attempt to abort the background task, regardless of it having
|
||||
// completed or not.
|
||||
self.handle.abort()
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch the [`SortKey`] from the [`Catalog`] for `partition_id`, retrying
|
||||
/// endlessly when errors occur.
|
||||
async fn fetch(
|
||||
partition_id: PartitionId,
|
||||
catalog: &dyn Catalog,
|
||||
backoff_config: &BackoffConfig,
|
||||
) -> Option<SortKey> {
|
||||
Backoff::new(backoff_config)
|
||||
.retry_all_errors("fetch partition sort key", || async {
|
||||
let s = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.get_by_id(partition_id)
|
||||
.await?
|
||||
.expect("resolving sort key for non-existent partition")
|
||||
.sort_key();
|
||||
|
||||
Result::<_, iox_catalog::interface::Error>::Ok(s)
|
||||
})
|
||||
.await
|
||||
.expect("retry forever")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::ShardIndex;
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
|
||||
use crate::test_util::populate_catalog;
|
||||
|
||||
use super::*;
|
||||
|
||||
const SHARD_INDEX: ShardIndex = ShardIndex::new(24);
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "platanos";
|
||||
const PARTITION_KEY: &str = "platanos";
|
||||
|
||||
// A test that (most likely) exercises the "read on demand" code path.
|
||||
//
|
||||
// The background task is configured to run some time between now, and
|
||||
// 10,000,000 seconds in the future - it most likely doesn't get to complete
|
||||
// before the get() call is issued.
|
||||
//
|
||||
// If this test flakes, it is POSSIBLE but UNLIKELY that the background task
|
||||
// has completed and the get() call reads a pre-fetched value.
|
||||
#[tokio::test]
|
||||
async fn test_read_demand() {
|
||||
const LONG_LONG_TIME: Duration = Duration::from_secs(10_000_000);
|
||||
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
let backoff_config = BackoffConfig::default();
|
||||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, _ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
let partition_id = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(PARTITION_KEY.into(), shard_id, table_id)
|
||||
.await
|
||||
.expect("should create")
|
||||
.id;
|
||||
|
||||
// Read the just-created sort key (None)
|
||||
let fetched = DeferredSortKey::new(
|
||||
partition_id,
|
||||
Duration::from_secs(36_000_000),
|
||||
Arc::clone(&catalog),
|
||||
backoff_config.clone(),
|
||||
)
|
||||
.get()
|
||||
.await;
|
||||
assert!(fetched.is_none());
|
||||
|
||||
// Set the sort key
|
||||
let catalog_state = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.update_sort_key(partition_id, &["uno", "dos", "bananas"])
|
||||
.await
|
||||
.expect("should update existing partition key");
|
||||
|
||||
// Read the updated sort key
|
||||
let fetched = DeferredSortKey::new(
|
||||
partition_id,
|
||||
LONG_LONG_TIME,
|
||||
Arc::clone(&catalog),
|
||||
backoff_config,
|
||||
)
|
||||
.get()
|
||||
.await;
|
||||
|
||||
assert!(fetched.is_some());
|
||||
assert_eq!(fetched, catalog_state.sort_key());
|
||||
}
|
||||
|
||||
// A test that deterministically exercises the "background pre-fetch" code path.
|
||||
#[tokio::test]
|
||||
async fn test_read_pre_fetched() {
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
let backoff_config = BackoffConfig::default();
|
||||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, _ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
let partition_id = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(PARTITION_KEY.into(), shard_id, table_id)
|
||||
.await
|
||||
.expect("should create")
|
||||
.id;
|
||||
|
||||
// Read the just-created sort key (None)
|
||||
let fetcher = DeferredSortKey::new(
|
||||
partition_id,
|
||||
Duration::from_nanos(1),
|
||||
Arc::clone(&catalog),
|
||||
backoff_config.clone(),
|
||||
);
|
||||
|
||||
// Spin, waiting for the background task to show as complete.
|
||||
async {
|
||||
loop {
|
||||
if fetcher.handle.is_finished() {
|
||||
return;
|
||||
}
|
||||
|
||||
tokio::task::yield_now().await
|
||||
}
|
||||
}
|
||||
.with_timeout_panic(Duration::from_secs(5))
|
||||
.await;
|
||||
|
||||
assert!(fetcher.get().await.is_none());
|
||||
|
||||
// Set the sort key
|
||||
let catalog_state = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.update_sort_key(partition_id, &["uno", "dos", "bananas"])
|
||||
.await
|
||||
.expect("should update existing partition key");
|
||||
|
||||
// Read the updated sort key
|
||||
let fetcher = DeferredSortKey::new(
|
||||
partition_id,
|
||||
Duration::from_nanos(1),
|
||||
Arc::clone(&catalog),
|
||||
backoff_config.clone(),
|
||||
);
|
||||
|
||||
// Spin, waiting for the background task to show as complete.
|
||||
async {
|
||||
loop {
|
||||
if fetcher.handle.is_finished() {
|
||||
return;
|
||||
}
|
||||
|
||||
tokio::task::yield_now().await
|
||||
}
|
||||
}
|
||||
.with_timeout_panic(Duration::from_secs(5))
|
||||
.await;
|
||||
|
||||
let fetched = fetcher.get().await;
|
||||
assert!(fetched.is_some());
|
||||
assert_eq!(fetched, catalog_state.sort_key());
|
||||
}
|
||||
}
|
||||
|
|
@ -3,7 +3,7 @@ use std::{fmt::Debug, sync::Arc};
|
|||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, ShardId, TableId};
|
||||
|
||||
use crate::data::partition::PartitionData;
|
||||
use crate::data::{partition::PartitionData, table::TableName};
|
||||
|
||||
/// An infallible resolver of [`PartitionData`] for the specified shard, table,
|
||||
/// and partition key, returning an initialised [`PartitionData`] buffer for it.
|
||||
|
|
@ -20,7 +20,7 @@ pub trait PartitionProvider: Send + Sync + Debug {
|
|||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
) -> PartitionData;
|
||||
}
|
||||
|
||||
|
|
@ -35,7 +35,7 @@ where
|
|||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
) -> PartitionData {
|
||||
(**self)
|
||||
.get_partition(partition_key, shard_id, namespace_id, table_id, table_name)
|
||||
|
|
@ -49,7 +49,7 @@ mod tests {
|
|||
|
||||
use data_types::PartitionId;
|
||||
|
||||
use crate::data::partition::resolver::MockPartitionProvider;
|
||||
use crate::data::partition::{resolver::MockPartitionProvider, SortKeyState};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
@ -59,7 +59,7 @@ mod tests {
|
|||
let shard_id = ShardId::new(42);
|
||||
let namespace_id = NamespaceId::new(1234);
|
||||
let table_id = TableId::new(24);
|
||||
let table_name = "platanos".into();
|
||||
let table_name = TableName::from("platanos");
|
||||
let partition = PartitionId::new(4242);
|
||||
let data = PartitionData::new(
|
||||
partition,
|
||||
|
|
@ -67,22 +67,17 @@ mod tests {
|
|||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
Arc::clone(&table_name),
|
||||
table_name.clone(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
);
|
||||
|
||||
let mock = Arc::new(MockPartitionProvider::default().with_partition(data));
|
||||
|
||||
let got = mock
|
||||
.get_partition(
|
||||
key,
|
||||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
Arc::clone(&table_name),
|
||||
)
|
||||
.get_partition(key, shard_id, namespace_id, table_id, table_name.clone())
|
||||
.await;
|
||||
assert_eq!(got.id(), partition);
|
||||
assert_eq!(got.partition_id(), partition);
|
||||
assert_eq!(got.namespace_id(), namespace_id);
|
||||
assert_eq!(*got.table_name(), *table_name);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,159 +0,0 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use iox_query::{
|
||||
exec::{Executor, ExecutorType},
|
||||
QueryChunk, QueryChunkMeta, ScanPlanBuilder,
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::query::QueryableBatch;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(missing_copy_implementations, missing_docs)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error creating plan for querying Ingester data to send to Querier"))]
|
||||
Frontend {
|
||||
source: iox_query::frontend::common::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error building logical plan for querying Ingester data to send to Querier"))]
|
||||
LogicalPlan { source: DataFusionError },
|
||||
|
||||
#[snafu(display(
|
||||
"Error building physical plan for querying Ingester data to send to Querier: {}",
|
||||
source
|
||||
))]
|
||||
PhysicalPlan { source: DataFusionError },
|
||||
|
||||
#[snafu(display(
|
||||
"Error executing the query for getting Ingester data to send to Querier: {}",
|
||||
source
|
||||
))]
|
||||
ExecutePlan { source: DataFusionError },
|
||||
}
|
||||
|
||||
/// A specialized `Error` for Ingester's Query errors
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Query a given Queryable Batch, applying selection and filters as appropriate
|
||||
/// Return stream of record batches
|
||||
pub(crate) async fn query(
|
||||
executor: &Executor,
|
||||
data: Arc<QueryableBatch>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
// Build logical plan for filtering data
|
||||
// Note that this query will also apply the delete predicates that go with the QueryableBatch
|
||||
|
||||
// TODO: Since we have different type of servers (router,
|
||||
// ingester, compactor, and querier), we may want to add more
|
||||
// types into the ExecutorType to have better log and resource
|
||||
// managment
|
||||
let ctx = executor.new_context(ExecutorType::Query);
|
||||
|
||||
// Creates an execution plan for a scan and filter data of a single chunk
|
||||
let schema = data.schema();
|
||||
let table_name = data.table_name().to_string();
|
||||
|
||||
debug!(%table_name, "Creating single chunk scan plan");
|
||||
|
||||
let logical_plan = ScanPlanBuilder::new(schema, ctx.child_ctx("scan_and_filter planning"))
|
||||
.with_chunks([data as _])
|
||||
.build()
|
||||
.context(FrontendSnafu)?
|
||||
.plan_builder
|
||||
.build()
|
||||
.context(LogicalPlanSnafu)?;
|
||||
|
||||
debug!(%table_name, plan=%logical_plan.display_indent_schema(),
|
||||
"created single chunk scan plan");
|
||||
|
||||
// Build physical plan
|
||||
let physical_plan = ctx
|
||||
.create_physical_plan(&logical_plan)
|
||||
.await
|
||||
.context(PhysicalPlanSnafu {})?;
|
||||
|
||||
// Execute the plan and return the filtered stream
|
||||
let output_stream = ctx
|
||||
.execute_stream(physical_plan)
|
||||
.await
|
||||
.context(ExecutePlanSnafu {})?;
|
||||
|
||||
Ok(output_stream)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow_util::assert_batches_eq;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::{
|
||||
create_one_record_batch_with_influxtype_no_duplicates, create_tombstone,
|
||||
make_queryable_batch, make_queryable_batch_with_deletes,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// create input data
|
||||
let batches = create_one_record_batch_with_influxtype_no_duplicates().await;
|
||||
|
||||
// build queryable batch from the input batches
|
||||
let batch = make_queryable_batch("test_table", 0, 1, batches);
|
||||
|
||||
// query without filters
|
||||
let exc = Executor::new(1);
|
||||
let stream = query(&exc, batch).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// verify data: all rows and columns should be returned
|
||||
let expected = vec![
|
||||
"+-----------+------+-----------------------------+",
|
||||
"| field_int | tag1 | time |",
|
||||
"+-----------+------+-----------------------------+",
|
||||
"| 70 | UT | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 10 | VT | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 1000 | WA | 1970-01-01T00:00:00.000008Z |",
|
||||
"+-----------+------+-----------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &output_batches);
|
||||
|
||||
exc.join().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_with_delete() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// create input data
|
||||
let batches = create_one_record_batch_with_influxtype_no_duplicates().await;
|
||||
let tombstones = vec![create_tombstone(1, 1, 1, 1, 0, 200000, "tag1=UT")];
|
||||
|
||||
// build queryable batch from the input batches
|
||||
let batch = make_queryable_batch_with_deletes("test_table", 0, 1, batches, tombstones);
|
||||
|
||||
let exc = Executor::new(1);
|
||||
let stream = query(&exc, batch).await.unwrap();
|
||||
let output_batches = datafusion::physical_plan::common::collect(stream)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// verify data:
|
||||
let expected = vec![
|
||||
"+-----------+------+-----------------------------+",
|
||||
"| field_int | tag1 | time |",
|
||||
"+-----------+------+-----------------------------+",
|
||||
"| 10 | VT | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 1000 | WA | 1970-01-01T00:00:00.000008Z |",
|
||||
"+-----------+------+-----------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &output_batches);
|
||||
|
||||
exc.join().await;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,22 +1,49 @@
|
|||
//! Shard level data buffer structures.
|
||||
|
||||
use std::{
|
||||
collections::{btree_map::Entry, BTreeMap},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use data_types::{ShardId, ShardIndex};
|
||||
use data_types::{NamespaceId, ShardId, ShardIndex};
|
||||
use dml::DmlOperation;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::exec::Executor;
|
||||
use metric::U64Counter;
|
||||
use parking_lot::RwLock;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use write_summary::ShardProgress;
|
||||
|
||||
use super::{namespace::NamespaceData, partition::resolver::PartitionProvider};
|
||||
use super::{
|
||||
namespace::{NamespaceData, NamespaceName},
|
||||
partition::resolver::PartitionProvider,
|
||||
};
|
||||
use crate::lifecycle::LifecycleHandle;
|
||||
|
||||
/// A double-referenced map where [`NamespaceData`] can be looked up by name, or
|
||||
/// ID.
|
||||
#[derive(Debug, Default)]
|
||||
struct DoubleRef {
|
||||
// TODO(4880): this can be removed when IDs are sent over the wire.
|
||||
by_name: HashMap<NamespaceName, Arc<NamespaceData>>,
|
||||
by_id: HashMap<NamespaceId, Arc<NamespaceData>>,
|
||||
}
|
||||
|
||||
impl DoubleRef {
|
||||
fn insert(&mut self, name: NamespaceName, ns: NamespaceData) -> Arc<NamespaceData> {
|
||||
let id = ns.namespace_id();
|
||||
|
||||
let ns = Arc::new(ns);
|
||||
self.by_name.insert(name, Arc::clone(&ns));
|
||||
self.by_id.insert(id, Arc::clone(&ns));
|
||||
ns
|
||||
}
|
||||
|
||||
fn by_name(&self, name: &NamespaceName) -> Option<Arc<NamespaceData>> {
|
||||
self.by_name.get(name).map(Arc::clone)
|
||||
}
|
||||
|
||||
fn by_id(&self, id: NamespaceId) -> Option<Arc<NamespaceData>> {
|
||||
self.by_id.get(&id).map(Arc::clone)
|
||||
}
|
||||
}
|
||||
|
||||
/// Data of a Shard
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ShardData {
|
||||
|
|
@ -32,7 +59,7 @@ pub(crate) struct ShardData {
|
|||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
|
||||
// New namespaces can come in at any time so we need to be able to add new ones
|
||||
namespaces: RwLock<BTreeMap<String, Arc<NamespaceData>>>,
|
||||
namespaces: RwLock<DoubleRef>,
|
||||
|
||||
metrics: Arc<metric::Registry>,
|
||||
namespace_count: U64Counter,
|
||||
|
|
@ -72,9 +99,8 @@ impl ShardData {
|
|||
dml_operation: DmlOperation,
|
||||
catalog: &Arc<dyn Catalog>,
|
||||
lifecycle_handle: &dyn LifecycleHandle,
|
||||
executor: &Executor,
|
||||
) -> Result<bool, super::Error> {
|
||||
let namespace_data = match self.namespace(dml_operation.namespace()) {
|
||||
let namespace_data = match self.namespace(&NamespaceName::from(dml_operation.namespace())) {
|
||||
Some(d) => d,
|
||||
None => {
|
||||
self.insert_namespace(dml_operation.namespace(), &**catalog)
|
||||
|
|
@ -83,14 +109,24 @@ impl ShardData {
|
|||
};
|
||||
|
||||
namespace_data
|
||||
.buffer_operation(dml_operation, catalog, lifecycle_handle, executor)
|
||||
.buffer_operation(dml_operation, catalog, lifecycle_handle)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Gets the namespace data out of the map
|
||||
pub(crate) fn namespace(&self, namespace: &str) -> Option<Arc<NamespaceData>> {
|
||||
pub(crate) fn namespace(&self, namespace: &NamespaceName) -> Option<Arc<NamespaceData>> {
|
||||
let n = self.namespaces.read();
|
||||
n.get(namespace).cloned()
|
||||
n.by_name(namespace)
|
||||
}
|
||||
|
||||
/// Gets the namespace data out of the map
|
||||
pub(crate) fn namespace_by_id(&self, namespace_id: NamespaceId) -> Option<Arc<NamespaceData>> {
|
||||
// TODO: this should be the default once IDs are pushed over the wire.
|
||||
//
|
||||
// At which point the map should be indexed by IDs, instead of namespace
|
||||
// names.
|
||||
let n = self.namespaces.read();
|
||||
n.by_id(namespace_id)
|
||||
}
|
||||
|
||||
/// Retrieves the namespace from the catalog and initializes an empty buffer, or
|
||||
|
|
@ -101,6 +137,8 @@ impl ShardData {
|
|||
catalog: &dyn Catalog,
|
||||
) -> Result<Arc<NamespaceData>, super::Error> {
|
||||
let mut repos = catalog.repositories().await;
|
||||
|
||||
let ns_name = NamespaceName::from(namespace);
|
||||
let namespace = repos
|
||||
.namespaces()
|
||||
.get_by_name(namespace)
|
||||
|
|
@ -110,26 +148,35 @@ impl ShardData {
|
|||
|
||||
let mut n = self.namespaces.write();
|
||||
|
||||
let data = match n.entry(namespace.name) {
|
||||
Entry::Vacant(v) => {
|
||||
let v = v.insert(Arc::new(NamespaceData::new(
|
||||
namespace.id,
|
||||
self.shard_id,
|
||||
Arc::clone(&self.partition_provider),
|
||||
&*self.metrics,
|
||||
)));
|
||||
Ok(match n.by_name(&ns_name) {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
self.namespace_count.inc(1);
|
||||
Arc::clone(v)
|
||||
}
|
||||
Entry::Occupied(v) => Arc::clone(v.get()),
|
||||
};
|
||||
|
||||
Ok(data)
|
||||
// Insert the table and then return a ref to it.
|
||||
n.insert(
|
||||
ns_name.clone(),
|
||||
NamespaceData::new(
|
||||
namespace.id,
|
||||
ns_name,
|
||||
self.shard_id,
|
||||
Arc::clone(&self.partition_provider),
|
||||
&*self.metrics,
|
||||
),
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the progress of this shard
|
||||
pub(super) async fn progress(&self) -> ShardProgress {
|
||||
let namespaces: Vec<_> = self.namespaces.read().values().map(Arc::clone).collect();
|
||||
let namespaces: Vec<_> = self
|
||||
.namespaces
|
||||
.read()
|
||||
.by_id
|
||||
.values()
|
||||
.map(Arc::clone)
|
||||
.collect();
|
||||
|
||||
let mut progress = ShardProgress::new();
|
||||
|
||||
|
|
@ -144,3 +191,89 @@ impl ShardData {
|
|||
self.shard_index
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::{PartitionId, PartitionKey, ShardIndex};
|
||||
use metric::{Attributes, Metric};
|
||||
|
||||
use crate::{
|
||||
data::partition::{resolver::MockPartitionProvider, PartitionData, SortKeyState},
|
||||
lifecycle::mock_handle::MockLifecycleHandle,
|
||||
test_util::{make_write_op, populate_catalog},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
const SHARD_INDEX: ShardIndex = ShardIndex::new(24);
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "platanos";
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shard_double_ref() {
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
// Configure the mock partition provider to return a partition for this
|
||||
// table ID.
|
||||
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
|
||||
PartitionData::new(
|
||||
PartitionId::new(0),
|
||||
PartitionKey::from("banana-split"),
|
||||
shard_id,
|
||||
ns_id,
|
||||
table_id,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
),
|
||||
));
|
||||
|
||||
let shard = ShardData::new(
|
||||
SHARD_INDEX,
|
||||
shard_id,
|
||||
partition_provider,
|
||||
Arc::clone(&metrics),
|
||||
);
|
||||
|
||||
// Assert the namespace does not contain the test data
|
||||
assert!(shard.namespace(&NAMESPACE_NAME.into()).is_none());
|
||||
assert!(shard.namespace_by_id(ns_id).is_none());
|
||||
|
||||
// Write some test data
|
||||
shard
|
||||
.buffer_operation(
|
||||
DmlOperation::Write(make_write_op(
|
||||
&PartitionKey::from("banana-split"),
|
||||
SHARD_INDEX,
|
||||
NAMESPACE_NAME,
|
||||
0,
|
||||
r#"bananas,city=Medford day="sun",temp=55 22"#,
|
||||
)),
|
||||
&catalog,
|
||||
&MockLifecycleHandle::default(),
|
||||
)
|
||||
.await
|
||||
.expect("buffer op should succeed");
|
||||
|
||||
// Both forms of referencing the table should succeed
|
||||
assert!(shard.namespace(&NAMESPACE_NAME.into()).is_some());
|
||||
assert!(shard.namespace_by_id(ns_id).is_some());
|
||||
|
||||
// And the table counter metric should increase
|
||||
let tables = metrics
|
||||
.get_instrument::<Metric<U64Counter>>("ingester_namespaces_total")
|
||||
.expect("failed to read metric")
|
||||
.get_observer(&Attributes::from([]))
|
||||
.expect("failed to get observer")
|
||||
.fetch();
|
||||
assert_eq!(tables, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,41 +1,94 @@
|
|||
//! Table level data buffer structures.
|
||||
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use data_types::{
|
||||
DeletePredicate, NamespaceId, PartitionKey, SequenceNumber, ShardId, TableId, Timestamp,
|
||||
};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::exec::Executor;
|
||||
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId};
|
||||
use mutable_batch::MutableBatch;
|
||||
use snafu::ResultExt;
|
||||
use observability_deps::tracing::*;
|
||||
use write_summary::ShardProgress;
|
||||
|
||||
use super::partition::{
|
||||
resolver::PartitionProvider, PartitionData, PartitionStatus, UnpersistedPartitionData,
|
||||
};
|
||||
use crate::lifecycle::LifecycleHandle;
|
||||
use super::partition::{resolver::PartitionProvider, PartitionData, UnpersistedPartitionData};
|
||||
use crate::{lifecycle::LifecycleHandle, querier_handler::PartitionStatus};
|
||||
|
||||
/// A double-referenced map where [`PartitionData`] can be looked up by
|
||||
/// [`PartitionKey`], or ID.
|
||||
#[derive(Debug, Default)]
|
||||
struct DoubleRef {
|
||||
// TODO(4880): this can be removed when IDs are sent over the wire.
|
||||
by_key: HashMap<PartitionKey, PartitionData>,
|
||||
by_id: HashMap<PartitionId, PartitionKey>,
|
||||
}
|
||||
|
||||
impl DoubleRef {
|
||||
fn insert(&mut self, ns: PartitionData) {
|
||||
let id = ns.partition_id();
|
||||
let key = ns.partition_key().clone();
|
||||
|
||||
assert!(self.by_key.insert(key.clone(), ns).is_none());
|
||||
assert!(self.by_id.insert(id, key).is_none());
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn by_key(&self, key: &PartitionKey) -> Option<&PartitionData> {
|
||||
self.by_key.get(key)
|
||||
}
|
||||
|
||||
fn by_key_mut(&mut self, key: &PartitionKey) -> Option<&mut PartitionData> {
|
||||
self.by_key.get_mut(key)
|
||||
}
|
||||
|
||||
fn by_id_mut(&mut self, id: PartitionId) -> Option<&mut PartitionData> {
|
||||
let key = self.by_id.get(&id)?.clone();
|
||||
self.by_key_mut(&key)
|
||||
}
|
||||
}
|
||||
|
||||
/// The string name / identifier of a Table.
|
||||
///
|
||||
/// A reference-counted, cheap clone-able string.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct TableName(Arc<str>);
|
||||
|
||||
impl<T> From<T> for TableName
|
||||
where
|
||||
T: AsRef<str>,
|
||||
{
|
||||
fn from(v: T) -> Self {
|
||||
Self(Arc::from(v.as_ref()))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for TableName {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for TableName {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Data of a Table in a given Namesapce that belongs to a given Shard
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct TableData {
|
||||
table_id: TableId,
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
|
||||
/// The catalog ID of the shard & namespace this table is being populated
|
||||
/// from.
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
|
||||
// the max sequence number for a tombstone associated with this table
|
||||
tombstone_max_sequence_number: Option<SequenceNumber>,
|
||||
|
||||
/// An abstract constructor of [`PartitionData`] instances for a given
|
||||
/// `(key, shard, table)` triplet.
|
||||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
|
||||
// Map pf partition key to its data
|
||||
pub(super) partition_data: BTreeMap<PartitionKey, PartitionData>,
|
||||
// Map of partition key to its data
|
||||
partition_data: DoubleRef,
|
||||
}
|
||||
|
||||
impl TableData {
|
||||
|
|
@ -51,18 +104,16 @@ impl TableData {
|
|||
/// for the first time.
|
||||
pub(super) fn new(
|
||||
table_id: TableId,
|
||||
table_name: &str,
|
||||
table_name: TableName,
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
tombstone_max_sequence_number: Option<SequenceNumber>,
|
||||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_id,
|
||||
table_name: table_name.into(),
|
||||
table_name,
|
||||
shard_id,
|
||||
namespace_id,
|
||||
tombstone_max_sequence_number,
|
||||
partition_data: Default::default(),
|
||||
partition_provider,
|
||||
}
|
||||
|
|
@ -71,18 +122,13 @@ impl TableData {
|
|||
/// Return parquet_max_sequence_number
|
||||
pub(super) fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
|
||||
self.partition_data
|
||||
.by_key
|
||||
.values()
|
||||
.map(|p| p.max_persisted_sequence_number())
|
||||
.max()
|
||||
.flatten()
|
||||
}
|
||||
|
||||
/// Return tombstone_max_sequence_number
|
||||
#[allow(dead_code)] // Used in tests
|
||||
pub(super) fn tombstone_max_sequence_number(&self) -> Option<SequenceNumber> {
|
||||
self.tombstone_max_sequence_number
|
||||
}
|
||||
|
||||
// buffers the table write and returns true if the lifecycle manager indicates that
|
||||
// ingest should be paused.
|
||||
pub(super) async fn buffer_table_write(
|
||||
|
|
@ -92,7 +138,7 @@ impl TableData {
|
|||
partition_key: PartitionKey,
|
||||
lifecycle_handle: &dyn LifecycleHandle,
|
||||
) -> Result<bool, super::Error> {
|
||||
let partition_data = match self.partition_data.get_mut(&partition_key) {
|
||||
let partition_data = match self.partition_data.by_key.get_mut(&partition_key) {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
let p = self
|
||||
|
|
@ -102,86 +148,87 @@ impl TableData {
|
|||
self.shard_id,
|
||||
self.namespace_id,
|
||||
self.table_id,
|
||||
Arc::clone(&self.table_name),
|
||||
self.table_name.clone(),
|
||||
)
|
||||
.await;
|
||||
// Add the partition to the map.
|
||||
assert!(self
|
||||
.partition_data
|
||||
.insert(partition_key.clone(), p)
|
||||
.is_none());
|
||||
self.partition_data.get_mut(&partition_key).unwrap()
|
||||
// Add the double-referenced partition to the map.
|
||||
self.partition_data.insert(p);
|
||||
self.partition_data.by_key_mut(&partition_key).unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
// skip the write if it has already been persisted
|
||||
if let Some(max) = partition_data.max_persisted_sequence_number() {
|
||||
if max >= sequence_number {
|
||||
trace!(
|
||||
shard_id=%self.shard_id,
|
||||
op_sequence_number=?sequence_number,
|
||||
"skipping already-persisted write"
|
||||
);
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
let size = batch.size();
|
||||
let rows = batch.rows();
|
||||
partition_data.buffer_write(sequence_number, batch)?;
|
||||
|
||||
// Record the write as having been buffered.
|
||||
//
|
||||
// This should happen AFTER the write is applied, because buffering the
|
||||
// op may fail which would lead to a write being recorded, but not
|
||||
// applied.
|
||||
let should_pause = lifecycle_handle.log_write(
|
||||
partition_data.id(),
|
||||
partition_data.partition_id(),
|
||||
self.shard_id,
|
||||
self.namespace_id,
|
||||
self.table_id,
|
||||
sequence_number,
|
||||
batch.size(),
|
||||
batch.rows(),
|
||||
size,
|
||||
rows,
|
||||
);
|
||||
partition_data.buffer_write(sequence_number, batch)?;
|
||||
|
||||
Ok(should_pause)
|
||||
}
|
||||
|
||||
pub(super) async fn buffer_delete(
|
||||
/// Return the [`PartitionData`] for the specified ID.
|
||||
#[allow(unused)]
|
||||
pub(crate) fn get_partition(
|
||||
&mut self,
|
||||
predicate: &DeletePredicate,
|
||||
sequence_number: SequenceNumber,
|
||||
catalog: &dyn Catalog,
|
||||
executor: &Executor,
|
||||
) -> Result<(), super::Error> {
|
||||
let min_time = Timestamp::new(predicate.range.start());
|
||||
let max_time = Timestamp::new(predicate.range.end());
|
||||
partition_id: PartitionId,
|
||||
) -> Option<&mut PartitionData> {
|
||||
self.partition_data.by_id_mut(partition_id)
|
||||
}
|
||||
|
||||
let mut repos = catalog.repositories().await;
|
||||
let tombstone = repos
|
||||
.tombstones()
|
||||
.create_or_get(
|
||||
self.table_id,
|
||||
self.shard_id,
|
||||
sequence_number,
|
||||
min_time,
|
||||
max_time,
|
||||
&predicate.expr_sql_string(),
|
||||
)
|
||||
.await
|
||||
.context(super::CatalogSnafu)?;
|
||||
/// Return the [`PartitionData`] for the specified partition key.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn get_partition_by_key(
|
||||
&self,
|
||||
partition_key: &PartitionKey,
|
||||
) -> Option<&PartitionData> {
|
||||
self.partition_data.by_key(partition_key)
|
||||
}
|
||||
|
||||
// remember "persisted" state
|
||||
self.tombstone_max_sequence_number = Some(sequence_number);
|
||||
|
||||
// modify one partition at a time
|
||||
for data in self.partition_data.values_mut() {
|
||||
data.buffer_tombstone(executor, tombstone.clone()).await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
/// Return the [`PartitionData`] for the specified partition key.
|
||||
pub(crate) fn get_partition_by_key_mut(
|
||||
&mut self,
|
||||
partition_key: &PartitionKey,
|
||||
) -> Option<&mut PartitionData> {
|
||||
self.partition_data.by_key_mut(partition_key)
|
||||
}
|
||||
|
||||
pub(crate) fn unpersisted_partition_data(&self) -> Vec<UnpersistedPartitionData> {
|
||||
self.partition_data
|
||||
.by_key
|
||||
.values()
|
||||
.map(|p| UnpersistedPartitionData {
|
||||
partition_id: p.id(),
|
||||
partition_id: p.partition_id(),
|
||||
non_persisted: p
|
||||
.get_non_persisting_data()
|
||||
.expect("get_non_persisting should always work"),
|
||||
persisting: p.get_persisting_data(),
|
||||
partition_status: PartitionStatus {
|
||||
parquet_max_sequence_number: p.max_persisted_sequence_number(),
|
||||
tombstone_max_sequence_number: self.tombstone_max_sequence_number,
|
||||
},
|
||||
})
|
||||
.collect()
|
||||
|
|
@ -196,14 +243,223 @@ impl TableData {
|
|||
};
|
||||
|
||||
self.partition_data
|
||||
.by_key
|
||||
.values()
|
||||
.fold(progress, |progress, partition_data| {
|
||||
progress.combine(partition_data.progress())
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
/// Returns the table ID for this partition.
|
||||
pub(super) fn table_id(&self) -> TableId {
|
||||
self.table_id
|
||||
}
|
||||
|
||||
/// Returns the name of this table.
|
||||
pub(crate) fn table_name(&self) -> &TableName {
|
||||
&self.table_name
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{PartitionId, ShardIndex};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use mutable_batch::writer;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use schema::{InfluxColumnType, InfluxFieldType};
|
||||
|
||||
use crate::{
|
||||
data::{
|
||||
partition::{resolver::MockPartitionProvider, PartitionData, SortKeyState},
|
||||
Error,
|
||||
},
|
||||
lifecycle::mock_handle::{MockLifecycleCall, MockLifecycleHandle},
|
||||
test_util::populate_catalog,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
const SHARD_INDEX: ShardIndex = ShardIndex::new(24);
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "platanos";
|
||||
const PARTITION_KEY: &str = "platanos";
|
||||
const PARTITION_ID: PartitionId = PartitionId::new(0);
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_partition_double_ref() {
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
// Configure the mock partition provider to return a partition for this
|
||||
// table ID.
|
||||
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
|
||||
PartitionData::new(
|
||||
PARTITION_ID,
|
||||
PARTITION_KEY.into(),
|
||||
shard_id,
|
||||
ns_id,
|
||||
table_id,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
),
|
||||
));
|
||||
|
||||
let mut table = TableData::new(
|
||||
table_id,
|
||||
TABLE_NAME.into(),
|
||||
shard_id,
|
||||
ns_id,
|
||||
partition_provider,
|
||||
);
|
||||
|
||||
let batch = lines_to_batches(r#"bananas,bat=man value=24 42"#, 0)
|
||||
.unwrap()
|
||||
.remove(TABLE_NAME)
|
||||
.unwrap();
|
||||
|
||||
// Assert the table does not contain the test partition
|
||||
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_none());
|
||||
assert!(table.partition_data.by_id_mut(PARTITION_ID).is_none());
|
||||
|
||||
// Write some test data
|
||||
let pause = table
|
||||
.buffer_table_write(
|
||||
SequenceNumber::new(42),
|
||||
batch,
|
||||
PARTITION_KEY.into(),
|
||||
&MockLifecycleHandle::default(),
|
||||
)
|
||||
.await
|
||||
.expect("buffer op should succeed");
|
||||
assert!(!pause);
|
||||
|
||||
// Referencing the partition should succeed
|
||||
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_some());
|
||||
assert!(table.partition_data.by_id_mut(PARTITION_ID).is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bad_write_memory_counting() {
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
// Configure the mock partition provider to return a partition for this
|
||||
// table ID.
|
||||
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
|
||||
PartitionData::new(
|
||||
PARTITION_ID,
|
||||
PARTITION_KEY.into(),
|
||||
shard_id,
|
||||
ns_id,
|
||||
table_id,
|
||||
TABLE_NAME.into(),
|
||||
SortKeyState::Provided(None),
|
||||
None,
|
||||
),
|
||||
));
|
||||
|
||||
let mut table = TableData::new(
|
||||
table_id,
|
||||
TABLE_NAME.into(),
|
||||
shard_id,
|
||||
ns_id,
|
||||
partition_provider,
|
||||
);
|
||||
|
||||
let batch = lines_to_batches(r#"bananas,bat=man value=24 42"#, 0)
|
||||
.unwrap()
|
||||
.remove(TABLE_NAME)
|
||||
.unwrap();
|
||||
|
||||
// Initialise the mock lifecycle handle and use it to inspect the calls
|
||||
// made to the lifecycle manager during buffering.
|
||||
let handle = MockLifecycleHandle::default();
|
||||
|
||||
// Assert the table does not contain the test partition
|
||||
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_none());
|
||||
|
||||
// Write some test data
|
||||
let pause = table
|
||||
.buffer_table_write(
|
||||
SequenceNumber::new(42),
|
||||
batch,
|
||||
PARTITION_KEY.into(),
|
||||
&handle,
|
||||
)
|
||||
.await
|
||||
.expect("buffer op should succeed");
|
||||
assert!(!pause);
|
||||
|
||||
// Referencing the partition should succeed
|
||||
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_some());
|
||||
|
||||
// And the lifecycle handle was called with the expected values
|
||||
assert_eq!(
|
||||
handle.get_log_calls(),
|
||||
&[MockLifecycleCall {
|
||||
partition_id: PARTITION_ID,
|
||||
shard_id,
|
||||
namespace_id: ns_id,
|
||||
table_id,
|
||||
sequence_number: SequenceNumber::new(42),
|
||||
bytes_written: 1131,
|
||||
rows_written: 1,
|
||||
}]
|
||||
);
|
||||
|
||||
// Attempt to buffer the second op that contains a type conflict - this
|
||||
// should return an error, and not make a call to the lifecycle handle
|
||||
// (as no data was buffered)
|
||||
//
|
||||
// Note the type of value was numeric previously, and here it is a string.
|
||||
let batch = lines_to_batches(r#"bananas,bat=man value="platanos" 42"#, 0)
|
||||
.unwrap()
|
||||
.remove(TABLE_NAME)
|
||||
.unwrap();
|
||||
|
||||
let err = table
|
||||
.buffer_table_write(
|
||||
SequenceNumber::new(42),
|
||||
batch,
|
||||
PARTITION_KEY.into(),
|
||||
&handle,
|
||||
)
|
||||
.await
|
||||
.expect_err("type conflict should error");
|
||||
|
||||
// The buffer op should return a column type error
|
||||
assert_matches!(
|
||||
err,
|
||||
Error::BufferWrite {
|
||||
source: mutable_batch::Error::WriterError {
|
||||
source: writer::Error::TypeMismatch {
|
||||
existing: InfluxColumnType::Field(InfluxFieldType::Float),
|
||||
inserted: InfluxColumnType::Field(InfluxFieldType::String),
|
||||
column: col_name,
|
||||
}
|
||||
},
|
||||
} => { assert_eq!(col_name, "value") }
|
||||
);
|
||||
|
||||
// And the lifecycle handle should not be called.
|
||||
//
|
||||
// It still contains the first call, so the desired length is 1
|
||||
// indicating no second call was made.
|
||||
assert_eq!(handle.get_log_calls().len(), 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,17 +30,24 @@ use crate::{
|
|||
data::{
|
||||
partition::resolver::{CatalogPartitionResolver, PartitionCache, PartitionProvider},
|
||||
shard::ShardData,
|
||||
IngesterData, IngesterQueryResponse,
|
||||
IngesterData,
|
||||
},
|
||||
lifecycle::{run_lifecycle_manager, LifecycleConfig, LifecycleManager},
|
||||
poison::PoisonCabinet,
|
||||
querier_handler::prepare_data_to_querier,
|
||||
querier_handler::{prepare_data_to_querier, IngesterQueryResponse},
|
||||
stream_handler::{
|
||||
handler::SequencedStreamHandler, sink_adaptor::IngestSinkAdaptor,
|
||||
sink_instrumentation::SinkInstrumentation, PeriodicWatermarkFetcher,
|
||||
},
|
||||
};
|
||||
|
||||
/// The maximum duration of time between creating a [`PartitionData`] and its
|
||||
/// [`SortKey`] being fetched from the catalog.
|
||||
///
|
||||
/// [`PartitionData`]: crate::data::partition::PartitionData
|
||||
/// [`SortKey`]: schema::sort::SortKey
|
||||
const SORT_KEY_PRE_FETCH: Duration = Duration::from_secs(30);
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(missing_copy_implementations, missing_docs)]
|
||||
pub enum Error {
|
||||
|
|
@ -160,7 +167,13 @@ impl IngestHandlerImpl {
|
|||
|
||||
// Build the partition provider.
|
||||
let partition_provider = CatalogPartitionResolver::new(Arc::clone(&catalog));
|
||||
let partition_provider = PartitionCache::new(partition_provider, recent_partitions);
|
||||
let partition_provider = PartitionCache::new(
|
||||
partition_provider,
|
||||
recent_partitions,
|
||||
SORT_KEY_PRE_FETCH,
|
||||
Arc::clone(&catalog),
|
||||
BackoffConfig::default(),
|
||||
);
|
||||
let partition_provider: Arc<dyn PartitionProvider> = Arc::new(partition_provider);
|
||||
|
||||
// build the initial ingester data state
|
||||
|
|
@ -432,7 +445,7 @@ mod tests {
|
|||
use write_buffer::mock::{MockBufferForReading, MockBufferSharedState};
|
||||
|
||||
use super::*;
|
||||
use crate::data::partition::SnapshotBatch;
|
||||
use crate::data::{partition::SnapshotBatch, table::TableName};
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_from_write_buffer_write_to_mutable_buffer() {
|
||||
|
|
@ -499,13 +512,16 @@ mod tests {
|
|||
// give the writes some time to go through the buffer. Exit once we've verified there's
|
||||
// data in there from both writes.
|
||||
tokio::time::timeout(Duration::from_secs(2), async {
|
||||
let ns_name = ingester.namespace.name.into();
|
||||
let table_name = TableName::from("a");
|
||||
loop {
|
||||
let mut has_measurement = false;
|
||||
|
||||
if let Some(data) = ingester.ingester.data.shard(ingester.shard.id) {
|
||||
if let Some(data) = data.namespace(&ingester.namespace.name) {
|
||||
if let Some(data) = data.namespace(&ns_name) {
|
||||
// verify there's data in the buffer
|
||||
if let Some((b, _)) = data.snapshot("a", &"1970-01-01".into()).await {
|
||||
if let Some((b, _)) = data.snapshot(&table_name, &"1970-01-01".into()).await
|
||||
{
|
||||
if let Some(b) = b.first() {
|
||||
if b.data.num_rows() > 0 {
|
||||
has_measurement = true;
|
||||
|
|
@ -740,13 +756,16 @@ mod tests {
|
|||
// give the writes some time to go through the buffer. Exit once we've verified there's
|
||||
// data in there
|
||||
tokio::time::timeout(Duration::from_secs(1), async move {
|
||||
let ns_name = namespace.name.into();
|
||||
let table_name = TableName::from("cpu");
|
||||
loop {
|
||||
let mut has_measurement = false;
|
||||
|
||||
if let Some(data) = ingester.data.shard(shard.id) {
|
||||
if let Some(data) = data.namespace(&namespace.name) {
|
||||
if let Some(data) = data.namespace(&ns_name) {
|
||||
// verify there's data in the buffer
|
||||
if let Some((b, _)) = data.snapshot("cpu", &"1970-01-01".into()).await {
|
||||
if let Some((b, _)) = data.snapshot(&table_name, &"1970-01-01".into()).await
|
||||
{
|
||||
if let Some(b) = b.first() {
|
||||
custom_batch_verification(b);
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ use std::{collections::BTreeMap, sync::Arc, time::Duration};
|
|||
use data_types::{NamespaceId, PartitionId, SequenceNumber, ShardId, TableId};
|
||||
use iox_time::{Time, TimeProvider};
|
||||
use metric::{Metric, U64Counter};
|
||||
use observability_deps::tracing::{error, info, warn};
|
||||
use observability_deps::tracing::{error, info, trace, warn};
|
||||
use parking_lot::Mutex;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracker::TrackedFutureExt;
|
||||
|
|
@ -97,6 +97,18 @@ impl LifecycleHandle for LifecycleHandleImpl {
|
|||
stats.last_write = now;
|
||||
stats.rows_written += rows_written;
|
||||
|
||||
trace!(
|
||||
shard_id=%stats.shard_id,
|
||||
partition_id=%stats.partition_id,
|
||||
namespace_id=%stats.namespace_id,
|
||||
table_id=%stats.table_id,
|
||||
first_write=%stats.first_write,
|
||||
last_write=%stats.last_write,
|
||||
bytes_written=%stats.bytes_written,
|
||||
first_sequence_number=?stats.first_sequence_number,
|
||||
"logged write"
|
||||
);
|
||||
|
||||
s.total_bytes += bytes_written;
|
||||
|
||||
// Pause if the server has exceeded the configured memory limit.
|
||||
|
|
@ -234,7 +246,7 @@ struct LifecycleStats {
|
|||
}
|
||||
|
||||
/// The stats for a partition
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[derive(Debug, Clone)]
|
||||
struct PartitionLifecycleStats {
|
||||
/// The shard this partition is under
|
||||
shard_id: ShardId,
|
||||
|
|
@ -469,6 +481,18 @@ impl LifecycleManager {
|
|||
let persist_tasks: Vec<_> = to_persist
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
// BUG: TOCTOU: memory usage released may be incorrect.
|
||||
//
|
||||
// Here the amount of memory to be reduced is acquired, but this
|
||||
// code does not prevent continued writes adding more data to
|
||||
// the partition in another thread.
|
||||
//
|
||||
// This may lead to more actual data being persisted than the
|
||||
// call below returns to the server pool - this would slowly
|
||||
// starve the ingester of memory it thinks it has.
|
||||
//
|
||||
// See https://github.com/influxdata/influxdb_iox/issues/5777
|
||||
|
||||
// Mark this partition as being persisted, and remember the
|
||||
// memory allocation it had accumulated.
|
||||
let partition_memory_usage = self
|
||||
|
|
@ -483,7 +507,9 @@ impl LifecycleManager {
|
|||
|
||||
let state = Arc::clone(&self.state);
|
||||
tokio::task::spawn(async move {
|
||||
persister.persist(s.partition_id).await;
|
||||
persister
|
||||
.persist(s.shard_id, s.namespace_id, s.table_id, s.partition_id)
|
||||
.await;
|
||||
// Now the data has been uploaded and the memory it was
|
||||
// using has been freed, released the memory capacity back
|
||||
// the ingester.
|
||||
|
|
@ -524,6 +550,12 @@ impl LifecycleManager {
|
|||
.map(|s| s.first_sequence_number)
|
||||
.min()
|
||||
.unwrap_or(sequence_number);
|
||||
trace!(
|
||||
min_unpersisted_sequence_number=?min,
|
||||
shard_id=%shard_id,
|
||||
sequence_number=?sequence_number,
|
||||
"updated min_unpersisted_sequence_number for persisted shard"
|
||||
);
|
||||
persister
|
||||
.update_min_unpersisted_sequence_number(shard_id, min)
|
||||
.await;
|
||||
|
|
@ -602,7 +634,13 @@ mod tests {
|
|||
|
||||
#[async_trait]
|
||||
impl Persister for TestPersister {
|
||||
async fn persist(&self, partition_id: PartitionId) {
|
||||
async fn persist(
|
||||
&self,
|
||||
_shard_id: ShardId,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
) {
|
||||
let mut p = self.persist_called.lock();
|
||||
p.insert(partition_id);
|
||||
}
|
||||
|
|
@ -662,8 +700,16 @@ mod tests {
|
|||
|
||||
#[async_trait]
|
||||
impl Persister for PausablePersister {
|
||||
async fn persist(&self, partition_id: PartitionId) {
|
||||
self.inner.persist(partition_id).await;
|
||||
async fn persist(
|
||||
&self,
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
) {
|
||||
self.inner
|
||||
.persist(shard_id, namespace_id, table_id, partition_id)
|
||||
.await;
|
||||
if let Some(event) = self.event(partition_id) {
|
||||
event.before.wait().await;
|
||||
event.after.wait().await;
|
||||
|
|
|
|||
|
|
@ -1,26 +1,66 @@
|
|||
//! A mock [`LifecycleHandle`] impl for testing.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::{NamespaceId, PartitionId, SequenceNumber, ShardId, TableId};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use super::LifecycleHandle;
|
||||
|
||||
/// Special [`LifecycleHandle`] that never persists and always accepts more data.
|
||||
///
|
||||
/// This is useful to control persists manually.
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct NoopLifecycleHandle;
|
||||
/// A set of arguments captured from a call to
|
||||
/// [`MockLifecycleHandle::log_write()`].
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct MockLifecycleCall {
|
||||
pub partition_id: PartitionId,
|
||||
pub shard_id: ShardId,
|
||||
pub namespace_id: NamespaceId,
|
||||
pub table_id: TableId,
|
||||
pub sequence_number: SequenceNumber,
|
||||
pub bytes_written: usize,
|
||||
pub rows_written: usize,
|
||||
}
|
||||
|
||||
impl LifecycleHandle for NoopLifecycleHandle {
|
||||
/// A mock [`LifecycleHandle`] implementation that records calls made to
|
||||
/// [`Self::log_write()`] and never blocks ingest, always accepting more data.
|
||||
///
|
||||
/// # Cloning
|
||||
///
|
||||
/// Cloning a [`MockLifecycleHandle`] will clone the inner state - calls to all
|
||||
/// cloned instances are reported in a call to [`Self::get_log_calls()`].
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct MockLifecycleHandle {
|
||||
log_calls: Arc<Mutex<Vec<MockLifecycleCall>>>,
|
||||
}
|
||||
|
||||
impl MockLifecycleHandle {
|
||||
/// Returns the ordered [`Self::log_write()`] calls made to this mock.
|
||||
pub fn get_log_calls(&self) -> Vec<MockLifecycleCall> {
|
||||
self.log_calls.lock().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl LifecycleHandle for MockLifecycleHandle {
|
||||
fn log_write(
|
||||
&self,
|
||||
_partition_id: PartitionId,
|
||||
_shard_id: ShardId,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_id: TableId,
|
||||
_sequence_number: SequenceNumber,
|
||||
_bytes_written: usize,
|
||||
_rows_written: usize,
|
||||
partition_id: PartitionId,
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
sequence_number: SequenceNumber,
|
||||
bytes_written: usize,
|
||||
rows_written: usize,
|
||||
) -> bool {
|
||||
self.log_calls.lock().push(MockLifecycleCall {
|
||||
partition_id,
|
||||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
sequence_number,
|
||||
bytes_written,
|
||||
rows_written,
|
||||
});
|
||||
|
||||
// do NOT pause ingest
|
||||
false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
//! Handle all requests from Querier
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{pin::Pin, sync::Arc};
|
||||
|
||||
use arrow::{error::ArrowError, record_batch::RecordBatch};
|
||||
use arrow_util::optimize::{optimize_record_batch, optimize_schema};
|
||||
use data_types::{PartitionId, SequenceNumber};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion_util::MemoryStream;
|
||||
use futures::StreamExt;
|
||||
use futures::{Stream, StreamExt};
|
||||
use generated_types::ingester::IngesterQueryRequest;
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::selection::Selection;
|
||||
|
|
@ -12,8 +15,8 @@ use snafu::{ensure, Snafu};
|
|||
|
||||
use crate::{
|
||||
data::{
|
||||
partition::UnpersistedPartitionData, IngesterData, IngesterQueryPartition,
|
||||
IngesterQueryResponse,
|
||||
namespace::NamespaceName, partition::UnpersistedPartitionData, table::TableName,
|
||||
IngesterData,
|
||||
},
|
||||
query::QueryableBatch,
|
||||
};
|
||||
|
|
@ -47,6 +50,159 @@ pub enum Error {
|
|||
/// A specialized `Error` for Ingester's Query errors
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Stream of snapshots.
|
||||
///
|
||||
/// Every snapshot is a dedicated [`SendableRecordBatchStream`].
|
||||
pub(crate) type SnapshotStream =
|
||||
Pin<Box<dyn Stream<Item = Result<SendableRecordBatchStream, ArrowError>> + Send>>;
|
||||
|
||||
/// Status of a partition that has unpersisted data.
|
||||
///
|
||||
/// Note that this structure is specific to a partition (which itself is bound to a table and
|
||||
/// shard)!
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub struct PartitionStatus {
|
||||
/// Max sequence number persisted
|
||||
pub parquet_max_sequence_number: Option<SequenceNumber>,
|
||||
}
|
||||
|
||||
/// Response data for a single partition.
|
||||
pub(crate) struct IngesterQueryPartition {
|
||||
/// Stream of snapshots.
|
||||
snapshots: SnapshotStream,
|
||||
|
||||
/// Partition ID.
|
||||
id: PartitionId,
|
||||
|
||||
/// Partition persistence status.
|
||||
status: PartitionStatus,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for IngesterQueryPartition {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("IngesterQueryPartition")
|
||||
.field("snapshots", &"<SNAPSHOT STREAM>")
|
||||
.field("id", &self.id)
|
||||
.field("status", &self.status)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl IngesterQueryPartition {
|
||||
pub(crate) fn new(snapshots: SnapshotStream, id: PartitionId, status: PartitionStatus) -> Self {
|
||||
Self {
|
||||
snapshots,
|
||||
id,
|
||||
status,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stream of partitions in this response.
|
||||
pub(crate) type IngesterQueryPartitionStream =
|
||||
Pin<Box<dyn Stream<Item = Result<IngesterQueryPartition, ArrowError>> + Send>>;
|
||||
|
||||
/// Response streams for querier<>ingester requests.
|
||||
///
|
||||
/// The data structure is constructed to allow lazy/streaming data generation. For easier
|
||||
/// consumption according to the wire protocol, use the [`flatten`](Self::flatten) method.
|
||||
pub struct IngesterQueryResponse {
|
||||
/// Stream of partitions.
|
||||
partitions: IngesterQueryPartitionStream,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for IngesterQueryResponse {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("IngesterQueryResponse")
|
||||
.field("partitions", &"<PARTITION STREAM>")
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl IngesterQueryResponse {
|
||||
/// Make a response
|
||||
pub(crate) fn new(partitions: IngesterQueryPartitionStream) -> Self {
|
||||
Self { partitions }
|
||||
}
|
||||
|
||||
/// Flattens the data according to the wire protocol.
|
||||
pub fn flatten(self) -> FlatIngesterQueryResponseStream {
|
||||
self.partitions
|
||||
.flat_map(|partition_res| match partition_res {
|
||||
Ok(partition) => {
|
||||
let head = futures::stream::once(async move {
|
||||
Ok(FlatIngesterQueryResponse::StartPartition {
|
||||
partition_id: partition.id,
|
||||
status: partition.status,
|
||||
})
|
||||
});
|
||||
let tail = partition
|
||||
.snapshots
|
||||
.flat_map(|snapshot_res| match snapshot_res {
|
||||
Ok(snapshot) => {
|
||||
let schema = Arc::new(optimize_schema(&snapshot.schema()));
|
||||
|
||||
let schema_captured = Arc::clone(&schema);
|
||||
let head = futures::stream::once(async {
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot {
|
||||
schema: schema_captured,
|
||||
})
|
||||
});
|
||||
|
||||
let tail = snapshot.map(move |batch_res| match batch_res {
|
||||
Ok(batch) => Ok(FlatIngesterQueryResponse::RecordBatch {
|
||||
batch: optimize_record_batch(&batch, Arc::clone(&schema))?,
|
||||
}),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
|
||||
head.chain(tail).boxed()
|
||||
}
|
||||
Err(e) => futures::stream::once(async { Err(e) }).boxed(),
|
||||
});
|
||||
|
||||
head.chain(tail).boxed()
|
||||
}
|
||||
Err(e) => futures::stream::once(async { Err(e) }).boxed(),
|
||||
})
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
/// Flattened version of [`IngesterQueryResponse`].
|
||||
pub(crate) type FlatIngesterQueryResponseStream =
|
||||
Pin<Box<dyn Stream<Item = Result<FlatIngesterQueryResponse, ArrowError>> + Send>>;
|
||||
|
||||
/// Element within the flat wire protocol.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum FlatIngesterQueryResponse {
|
||||
/// Start a new partition.
|
||||
StartPartition {
|
||||
/// Partition ID.
|
||||
partition_id: PartitionId,
|
||||
|
||||
/// Partition persistence status.
|
||||
status: PartitionStatus,
|
||||
},
|
||||
|
||||
/// Start a new snapshot.
|
||||
///
|
||||
/// The snapshot belongs to the partition of the last [`StartPartition`](Self::StartPartition)
|
||||
/// message.
|
||||
StartSnapshot {
|
||||
/// Snapshot schema.
|
||||
schema: Arc<arrow::datatypes::Schema>,
|
||||
},
|
||||
|
||||
/// Add a record batch to the snapshot that was announced by the last
|
||||
/// [`StartSnapshot`](Self::StartSnapshot) message.
|
||||
RecordBatch {
|
||||
/// Record batch.
|
||||
batch: RecordBatch,
|
||||
},
|
||||
}
|
||||
|
||||
/// Return data to send as a response back to the Querier per its request
|
||||
pub async fn prepare_data_to_querier(
|
||||
ingest_data: &Arc<IngesterData>,
|
||||
|
|
@ -57,7 +213,8 @@ pub async fn prepare_data_to_querier(
|
|||
let mut found_namespace = false;
|
||||
for (shard_id, shard_data) in ingest_data.shards() {
|
||||
debug!(shard_id=%shard_id.get());
|
||||
let namespace_data = match shard_data.namespace(&request.namespace) {
|
||||
let namespace_name = NamespaceName::from(&request.namespace);
|
||||
let namespace_data = match shard_data.namespace(&namespace_name) {
|
||||
Some(namespace_data) => {
|
||||
debug!(namespace=%request.namespace, "found namespace");
|
||||
found_namespace = true;
|
||||
|
|
@ -68,7 +225,8 @@ pub async fn prepare_data_to_querier(
|
|||
}
|
||||
};
|
||||
|
||||
let table_data = match namespace_data.table_data(&request.table) {
|
||||
let table_name = TableName::from(&request.table);
|
||||
let table_data = match namespace_data.table_data(&table_name) {
|
||||
Some(table_data) => {
|
||||
debug!(table_name=%request.table, "found table");
|
||||
table_data
|
||||
|
|
@ -153,7 +311,6 @@ fn prepare_data_to_querier_for_partition(
|
|||
request.table.clone().into(),
|
||||
unpersisted_partition_data.partition_id,
|
||||
vec![],
|
||||
vec![],
|
||||
)
|
||||
})
|
||||
.with_data(unpersisted_partition_data.non_persisted);
|
||||
|
|
@ -188,22 +345,106 @@ fn prepare_data_to_querier_for_partition(
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::{array::new_null_array, record_batch::RecordBatch};
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use arrow::{array::new_null_array, datatypes::SchemaRef, record_batch::RecordBatch};
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
use assert_matches::assert_matches;
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use datafusion::{
|
||||
logical_plan::{col, lit},
|
||||
physical_plan::RecordBatchStream,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use predicate::Predicate;
|
||||
use schema::merge::SchemaMerger;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
data::FlatIngesterQueryResponse,
|
||||
test_util::{
|
||||
make_ingester_data, make_ingester_data_with_tombstones, DataLocation, TEST_NAMESPACE,
|
||||
TEST_TABLE,
|
||||
},
|
||||
};
|
||||
use crate::test_util::{make_ingester_data, DataLocation, TEST_NAMESPACE, TEST_TABLE};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ingester_query_response_flatten() {
|
||||
let batch_1_1 = lp_to_batch("table x=1 0");
|
||||
let batch_1_2 = lp_to_batch("table x=2 1");
|
||||
let batch_2 = lp_to_batch("table y=1 10");
|
||||
let batch_3 = lp_to_batch("table z=1 10");
|
||||
|
||||
let schema_1 = batch_1_1.schema();
|
||||
let schema_2 = batch_2.schema();
|
||||
let schema_3 = batch_3.schema();
|
||||
|
||||
let response = IngesterQueryResponse::new(Box::pin(futures::stream::iter([
|
||||
Ok(IngesterQueryPartition::new(
|
||||
Box::pin(futures::stream::iter([
|
||||
Ok(Box::pin(TestRecordBatchStream::new(
|
||||
vec![
|
||||
Ok(batch_1_1.clone()),
|
||||
Err(ArrowError::NotYetImplemented("not yet implemeneted".into())),
|
||||
Ok(batch_1_2.clone()),
|
||||
],
|
||||
Arc::clone(&schema_1),
|
||||
)) as _),
|
||||
Err(ArrowError::InvalidArgumentError("invalid arg".into())),
|
||||
Ok(Box::pin(TestRecordBatchStream::new(
|
||||
vec![Ok(batch_2.clone())],
|
||||
Arc::clone(&schema_2),
|
||||
)) as _),
|
||||
Ok(Box::pin(TestRecordBatchStream::new(vec![], Arc::clone(&schema_3))) as _),
|
||||
])),
|
||||
PartitionId::new(2),
|
||||
PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
},
|
||||
)),
|
||||
Err(ArrowError::IoError("some io error".into())),
|
||||
Ok(IngesterQueryPartition::new(
|
||||
Box::pin(futures::stream::iter([])),
|
||||
PartitionId::new(1),
|
||||
PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
},
|
||||
)),
|
||||
])));
|
||||
|
||||
let actual: Vec<_> = response.flatten().collect().await;
|
||||
let expected = vec![
|
||||
Ok(FlatIngesterQueryResponse::StartPartition {
|
||||
partition_id: PartitionId::new(2),
|
||||
status: PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
},
|
||||
}),
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot { schema: schema_1 }),
|
||||
Ok(FlatIngesterQueryResponse::RecordBatch { batch: batch_1_1 }),
|
||||
Err(ArrowError::NotYetImplemented("not yet implemeneted".into())),
|
||||
Ok(FlatIngesterQueryResponse::RecordBatch { batch: batch_1_2 }),
|
||||
Err(ArrowError::InvalidArgumentError("invalid arg".into())),
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot { schema: schema_2 }),
|
||||
Ok(FlatIngesterQueryResponse::RecordBatch { batch: batch_2 }),
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot { schema: schema_3 }),
|
||||
Err(ArrowError::IoError("some io error".into())),
|
||||
Ok(FlatIngesterQueryResponse::StartPartition {
|
||||
partition_id: PartitionId::new(1),
|
||||
status: PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
assert_eq!(actual.len(), expected.len());
|
||||
for (actual, expected) in actual.into_iter().zip(expected) {
|
||||
match (actual, expected) {
|
||||
(Ok(actual), Ok(expected)) => {
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
(Err(_), Err(_)) => {
|
||||
// cannot compare `ArrowError`, but it's unlikely that someone changed the error
|
||||
}
|
||||
(Ok(_), Err(_)) => panic!("Actual is Ok but expected is Err"),
|
||||
(Err(_), Ok(_)) => panic!("Actual is Err but expected is Ok"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prepare_data_to_querier() {
|
||||
|
|
@ -360,180 +601,44 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prepare_data_to_querier_with_tombstones() {
|
||||
test_helpers::maybe_start_logging();
|
||||
pub struct TestRecordBatchStream {
|
||||
schema: SchemaRef,
|
||||
batches: Vec<Result<RecordBatch, ArrowError>>,
|
||||
}
|
||||
|
||||
// make 7 scenarios for ingester data with tombstones
|
||||
let mut scenarios = vec![];
|
||||
for loc in &[
|
||||
DataLocation::BUFFER,
|
||||
DataLocation::BUFFER_SNAPSHOT,
|
||||
DataLocation::BUFFER_PERSISTING,
|
||||
DataLocation::BUFFER_SNAPSHOT_PERSISTING,
|
||||
DataLocation::SNAPSHOT,
|
||||
DataLocation::SNAPSHOT_PERSISTING,
|
||||
DataLocation::PERSISTING,
|
||||
] {
|
||||
let scenario = Arc::new(make_ingester_data_with_tombstones(*loc).await);
|
||||
scenarios.push((loc, scenario));
|
||||
impl TestRecordBatchStream {
|
||||
pub fn new(batches: Vec<Result<RecordBatch, ArrowError>>, schema: SchemaRef) -> Self {
|
||||
Self { schema, batches }
|
||||
}
|
||||
}
|
||||
|
||||
// read data from all scenarios without any filters
|
||||
let request = Arc::new(IngesterQueryRequest::new(
|
||||
TEST_NAMESPACE.to_string(),
|
||||
TEST_TABLE.to_string(),
|
||||
vec![],
|
||||
None,
|
||||
));
|
||||
let expected_not_persisting = vec![
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
"| city | day | temp | time |",
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
"| Andover | mon | | 1970-01-01T00:00:00.000000046Z |",
|
||||
"| Andover | tue | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| Medford | sun | 55 | 1970-01-01T00:00:00.000000022Z |",
|
||||
"| Medford | wed | | 1970-01-01T00:00:00.000000026Z |",
|
||||
"| Reading | mon | 58 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| Wilmington | mon | | 1970-01-01T00:00:00.000000035Z |",
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
];
|
||||
// For "persisting" data locations the tombstones were NOT applied because they arrived AFTER the data
|
||||
// transitioned into the "persisting" state. In this case, the ingester will apply the tombstones.
|
||||
let expected_persisting = vec![
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
"| city | day | temp | time |",
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
"| Andover | mon | | 1970-01-01T00:00:00.000000046Z |",
|
||||
"| Andover | tue | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| Boston | mon | | 1970-01-01T00:00:00.000000038Z |",
|
||||
"| Boston | sun | 60 | 1970-01-01T00:00:00.000000036Z |",
|
||||
"| Medford | sun | 55 | 1970-01-01T00:00:00.000000022Z |",
|
||||
"| Medford | wed | | 1970-01-01T00:00:00.000000026Z |",
|
||||
"| Reading | mon | 58 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| Wilmington | mon | | 1970-01-01T00:00:00.000000035Z |",
|
||||
"+------------+-----+------+--------------------------------+",
|
||||
];
|
||||
for (loc, scenario) in &scenarios {
|
||||
println!("Location: {loc:?}");
|
||||
let expected = if loc.intersects(DataLocation::PERSISTING) {
|
||||
&expected_persisting
|
||||
impl RecordBatchStream for TestRecordBatchStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
Arc::clone(&self.schema)
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Stream for TestRecordBatchStream {
|
||||
type Item = Result<RecordBatch, ArrowError>;
|
||||
|
||||
fn poll_next(
|
||||
mut self: std::pin::Pin<&mut Self>,
|
||||
_: &mut Context<'_>,
|
||||
) -> Poll<Option<Self::Item>> {
|
||||
if self.batches.is_empty() {
|
||||
Poll::Ready(None)
|
||||
} else {
|
||||
&expected_not_persisting
|
||||
};
|
||||
|
||||
let stream = prepare_data_to_querier(scenario, &request).await.unwrap();
|
||||
let result = ingester_response_to_record_batches(stream).await;
|
||||
assert_batches_sorted_eq!(expected, &result);
|
||||
Poll::Ready(Some(self.batches.remove(0)))
|
||||
}
|
||||
}
|
||||
|
||||
// read data from all scenarios and filter out column day
|
||||
let request = Arc::new(IngesterQueryRequest::new(
|
||||
TEST_NAMESPACE.to_string(),
|
||||
TEST_TABLE.to_string(),
|
||||
vec!["city".to_string(), "temp".to_string(), "time".to_string()],
|
||||
None,
|
||||
));
|
||||
let expected_not_persisting = vec![
|
||||
"+------------+------+--------------------------------+",
|
||||
"| city | temp | time |",
|
||||
"+------------+------+--------------------------------+",
|
||||
"| Andover | | 1970-01-01T00:00:00.000000046Z |",
|
||||
"| Andover | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| Medford | | 1970-01-01T00:00:00.000000026Z |",
|
||||
"| Medford | 55 | 1970-01-01T00:00:00.000000022Z |",
|
||||
"| Reading | 58 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| Wilmington | | 1970-01-01T00:00:00.000000035Z |",
|
||||
"+------------+------+--------------------------------+",
|
||||
];
|
||||
// For "persisting" data locations the tombstones were NOT applied because they arrived AFTER the data
|
||||
// transitioned into the "persisting" state. In this case, the ingester will apply the tombstones.
|
||||
let expected_persisting = vec![
|
||||
"+------------+------+--------------------------------+",
|
||||
"| city | temp | time |",
|
||||
"+------------+------+--------------------------------+",
|
||||
"| Andover | | 1970-01-01T00:00:00.000000046Z |",
|
||||
"| Andover | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| Boston | | 1970-01-01T00:00:00.000000038Z |",
|
||||
"| Boston | 60 | 1970-01-01T00:00:00.000000036Z |",
|
||||
"| Medford | | 1970-01-01T00:00:00.000000026Z |",
|
||||
"| Medford | 55 | 1970-01-01T00:00:00.000000022Z |",
|
||||
"| Reading | 58 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| Wilmington | | 1970-01-01T00:00:00.000000035Z |",
|
||||
"+------------+------+--------------------------------+",
|
||||
];
|
||||
for (loc, scenario) in &scenarios {
|
||||
println!("Location: {loc:?}");
|
||||
let expected = if loc.intersects(DataLocation::PERSISTING) {
|
||||
&expected_persisting
|
||||
} else {
|
||||
&expected_not_persisting
|
||||
};
|
||||
|
||||
let stream = prepare_data_to_querier(scenario, &request).await.unwrap();
|
||||
let result = ingester_response_to_record_batches(stream).await;
|
||||
assert_batches_sorted_eq!(expected, &result);
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(self.batches.len(), Some(self.batches.len()))
|
||||
}
|
||||
}
|
||||
|
||||
// read data from all scenarios, filter out column day, city Medford, time outside range [0, 42)
|
||||
let expr = col("city").not_eq(lit("Medford"));
|
||||
let pred = Predicate::default().with_expr(expr).with_range(0, 42);
|
||||
let request = Arc::new(IngesterQueryRequest::new(
|
||||
TEST_NAMESPACE.to_string(),
|
||||
TEST_TABLE.to_string(),
|
||||
vec!["city".to_string(), "temp".to_string(), "time".to_string()],
|
||||
Some(pred),
|
||||
));
|
||||
// predicates and de-dup are NOT applied!, otherwise this would look like this:
|
||||
// let expected = vec![
|
||||
// "+------------+------+--------------------------------+",
|
||||
// "| city | temp | time |",
|
||||
// "+------------+------+--------------------------------+",
|
||||
// "| Andover | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
// "| Reading | 58 | 1970-01-01T00:00:00.000000040Z |",
|
||||
// "| Wilmington | | 1970-01-01T00:00:00.000000035Z |",
|
||||
// "+------------+------+--------------------------------+",
|
||||
// ];
|
||||
let expected_not_persisting = vec![
|
||||
"+------------+------+--------------------------------+",
|
||||
"| city | temp | time |",
|
||||
"+------------+------+--------------------------------+",
|
||||
"| Andover | | 1970-01-01T00:00:00.000000046Z |",
|
||||
"| Andover | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| Medford | | 1970-01-01T00:00:00.000000026Z |",
|
||||
"| Medford | 55 | 1970-01-01T00:00:00.000000022Z |",
|
||||
"| Reading | 58 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| Wilmington | | 1970-01-01T00:00:00.000000035Z |",
|
||||
"+------------+------+--------------------------------+",
|
||||
];
|
||||
// For "persisting" data locations the tombstones were NOT applied because they arrived AFTER the data
|
||||
// transitioned into the "persisting" state. In this case, the ingester will apply the tombstones.
|
||||
let expected_persisting = vec![
|
||||
"+------------+------+--------------------------------+",
|
||||
"| city | temp | time |",
|
||||
"+------------+------+--------------------------------+",
|
||||
"| Andover | | 1970-01-01T00:00:00.000000046Z |",
|
||||
"| Andover | 56 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| Boston | | 1970-01-01T00:00:00.000000038Z |",
|
||||
"| Boston | 60 | 1970-01-01T00:00:00.000000036Z |",
|
||||
"| Medford | | 1970-01-01T00:00:00.000000026Z |",
|
||||
"| Medford | 55 | 1970-01-01T00:00:00.000000022Z |",
|
||||
"| Reading | 58 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| Wilmington | | 1970-01-01T00:00:00.000000035Z |",
|
||||
"+------------+------+--------------------------------+",
|
||||
];
|
||||
for (loc, scenario) in &scenarios {
|
||||
println!("Location: {loc:?}");
|
||||
let expected = if loc.intersects(DataLocation::PERSISTING) {
|
||||
&expected_persisting
|
||||
} else {
|
||||
&expected_not_persisting
|
||||
};
|
||||
|
||||
let stream = prepare_data_to_querier(scenario, &request).await.unwrap();
|
||||
let result = ingester_response_to_record_batches(stream).await;
|
||||
assert_batches_sorted_eq!(expected, &result);
|
||||
}
|
||||
fn lp_to_batch(lp: &str) -> RecordBatch {
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Selection::All).unwrap()
|
||||
}
|
||||
|
||||
/// Convert [`IngesterQueryResponse`] to a set of [`RecordBatch`]es.
|
||||
|
|
|
|||
|
|
@ -6,26 +6,26 @@ use arrow::record_batch::RecordBatch;
|
|||
use arrow_util::util::ensure_schema;
|
||||
use data_types::{
|
||||
ChunkId, ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary,
|
||||
TimestampMinMax, Tombstone,
|
||||
TimestampMinMax,
|
||||
};
|
||||
use datafusion::physical_plan::{
|
||||
common::SizedRecordBatchStream,
|
||||
metrics::{ExecutionPlanMetricsSet, MemTrackingMetrics},
|
||||
SendableRecordBatchStream,
|
||||
use datafusion::{
|
||||
error::DataFusionError,
|
||||
physical_plan::{
|
||||
common::SizedRecordBatchStream,
|
||||
metrics::{ExecutionPlanMetricsSet, MemTrackingMetrics},
|
||||
SendableRecordBatchStream,
|
||||
},
|
||||
};
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
QueryChunk, QueryChunkError, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkMeta,
|
||||
};
|
||||
use observability_deps::tracing::trace;
|
||||
use predicate::{
|
||||
delete_predicate::{tombstones_to_delete_predicates, tombstones_to_delete_predicates_iter},
|
||||
Predicate,
|
||||
};
|
||||
use predicate::Predicate;
|
||||
use schema::{merge::merge_record_batch_schemas, selection::Selection, sort::SortKey, Schema};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::data::partition::SnapshotBatch;
|
||||
use crate::data::{partition::SnapshotBatch, table::TableName};
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(Debug, Snafu)]
|
||||
|
|
@ -53,11 +53,8 @@ pub(crate) struct QueryableBatch {
|
|||
/// data
|
||||
pub(crate) data: Vec<Arc<SnapshotBatch>>,
|
||||
|
||||
/// Delete predicates of the tombstones
|
||||
pub(crate) delete_predicates: Vec<Arc<DeletePredicate>>,
|
||||
|
||||
/// This is needed to return a reference for a trait function
|
||||
pub(crate) table_name: Arc<str>,
|
||||
pub(crate) table_name: TableName,
|
||||
|
||||
/// Partition ID
|
||||
pub(crate) partition_id: PartitionId,
|
||||
|
|
@ -66,15 +63,12 @@ pub(crate) struct QueryableBatch {
|
|||
impl QueryableBatch {
|
||||
/// Initilaize a QueryableBatch
|
||||
pub(crate) fn new(
|
||||
table_name: Arc<str>,
|
||||
table_name: TableName,
|
||||
partition_id: PartitionId,
|
||||
data: Vec<Arc<SnapshotBatch>>,
|
||||
deletes: Vec<Tombstone>,
|
||||
) -> Self {
|
||||
let delete_predicates = tombstones_to_delete_predicates(&deletes);
|
||||
Self {
|
||||
data,
|
||||
delete_predicates,
|
||||
table_name,
|
||||
partition_id,
|
||||
}
|
||||
|
|
@ -86,12 +80,6 @@ impl QueryableBatch {
|
|||
self
|
||||
}
|
||||
|
||||
/// Add more tombstones
|
||||
pub(crate) fn add_tombstones(&mut self, deletes: &[Tombstone]) {
|
||||
let delete_predicates = tombstones_to_delete_predicates_iter(deletes);
|
||||
self.delete_predicates.extend(delete_predicates);
|
||||
}
|
||||
|
||||
/// return min and max of all the snapshots
|
||||
pub(crate) fn min_max_sequence_numbers(&self) -> (SequenceNumber, SequenceNumber) {
|
||||
let min = self
|
||||
|
|
@ -110,11 +98,6 @@ impl QueryableBatch {
|
|||
|
||||
(min, max)
|
||||
}
|
||||
|
||||
/// return true if it has no data
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
self.data.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryChunkMeta for QueryableBatch {
|
||||
|
|
@ -144,16 +127,16 @@ impl QueryChunkMeta for QueryableBatch {
|
|||
None // Ingester data is not sorted
|
||||
}
|
||||
|
||||
fn delete_predicates(&self) -> &[Arc<DeletePredicate>] {
|
||||
self.delete_predicates.as_ref()
|
||||
}
|
||||
|
||||
fn timestamp_min_max(&self) -> Option<TimestampMinMax> {
|
||||
// Note: we need to consider which option we want to go with
|
||||
// . Return None here and avoid taking time to compute time's min max of RecordBacthes (current choice)
|
||||
// . Compute time's min max here and avoid compacting non-overlapped QueryableBatches in the Ingester
|
||||
None
|
||||
}
|
||||
|
||||
fn delete_predicates(&self) -> &[Arc<DeletePredicate>] {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryChunk for QueryableBatch {
|
||||
|
|
@ -185,7 +168,7 @@ impl QueryChunk for QueryableBatch {
|
|||
_ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
_columns: Selection<'_>,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
|
|
@ -199,7 +182,7 @@ impl QueryChunk for QueryableBatch {
|
|||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
|
|
@ -210,12 +193,16 @@ impl QueryChunk for QueryableBatch {
|
|||
mut ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, QueryChunkError> {
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
ctx.set_metadata("storage", "ingester");
|
||||
ctx.set_metadata("projection", format!("{}", selection));
|
||||
trace!(?selection, "selection");
|
||||
|
||||
let schema = self.schema().select(selection).context(SchemaSnafu)?;
|
||||
let schema = self
|
||||
.schema()
|
||||
.select(selection)
|
||||
.context(SchemaSnafu)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
// Get all record batches from their snapshots
|
||||
let batches = self
|
||||
|
|
@ -234,7 +221,8 @@ impl QueryChunk for QueryableBatch {
|
|||
.map(Arc::new);
|
||||
Some(batch)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
// Return stream of data
|
||||
let dummy_metrics = ExecutionPlanMetricsSet::new();
|
||||
|
|
@ -257,165 +245,3 @@ impl QueryChunk for QueryableBatch {
|
|||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::{
|
||||
array::{
|
||||
ArrayRef, BooleanArray, DictionaryArray, Float64Array, Int64Array, StringArray,
|
||||
TimestampNanosecondArray, UInt64Array,
|
||||
},
|
||||
datatypes::{DataType, Int32Type, TimeUnit},
|
||||
};
|
||||
use data_types::{DeleteExpr, Op, Scalar, TimestampRange};
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::create_tombstone;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_batch_schema() {
|
||||
// Merge schema of the batches
|
||||
// The fields in the schema are sorted by column name
|
||||
let batches = create_batches();
|
||||
let merged_schema = (*merge_record_batch_schemas(&batches)).clone();
|
||||
|
||||
// Expected Arrow schema
|
||||
let arrow_schema = Arc::new(arrow::datatypes::Schema::new(vec![
|
||||
arrow::datatypes::Field::new(
|
||||
"dict",
|
||||
DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
|
||||
true,
|
||||
),
|
||||
arrow::datatypes::Field::new("int64", DataType::Int64, true),
|
||||
arrow::datatypes::Field::new("string", DataType::Utf8, true),
|
||||
arrow::datatypes::Field::new("bool", DataType::Boolean, true),
|
||||
arrow::datatypes::Field::new(
|
||||
"time",
|
||||
DataType::Timestamp(TimeUnit::Nanosecond, None),
|
||||
false,
|
||||
),
|
||||
arrow::datatypes::Field::new("uint64", DataType::UInt64, false),
|
||||
arrow::datatypes::Field::new("float64", DataType::Float64, true),
|
||||
]));
|
||||
let expected_schema = Schema::try_from(arrow_schema)
|
||||
.unwrap()
|
||||
.sort_fields_by_name();
|
||||
|
||||
assert_eq!(
|
||||
expected_schema, merged_schema,
|
||||
"\nExpected:\n{:#?}\nActual:\n{:#?}",
|
||||
expected_schema, merged_schema
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tombstones_to_delete_predicates() {
|
||||
// create tombstones
|
||||
let tombstones = vec![
|
||||
create_tombstone(1, 1, 1, 1, 100, 200, "temp=10"),
|
||||
create_tombstone(1, 1, 1, 2, 100, 350, "temp!=10 and city=Boston"),
|
||||
];
|
||||
|
||||
// This new queryable batch will convert tombstone to delete predicates
|
||||
let query_batch =
|
||||
QueryableBatch::new("test_table".into(), PartitionId::new(0), vec![], tombstones);
|
||||
let predicates = query_batch.delete_predicates();
|
||||
let expected = vec![
|
||||
Arc::new(DeletePredicate {
|
||||
range: TimestampRange::new(100, 200),
|
||||
exprs: vec![DeleteExpr {
|
||||
column: String::from("temp"),
|
||||
op: Op::Eq,
|
||||
scalar: Scalar::I64(10),
|
||||
}],
|
||||
}),
|
||||
Arc::new(DeletePredicate {
|
||||
range: TimestampRange::new(100, 350),
|
||||
exprs: vec![
|
||||
DeleteExpr {
|
||||
column: String::from("temp"),
|
||||
op: Op::Ne,
|
||||
scalar: Scalar::I64(10),
|
||||
},
|
||||
DeleteExpr {
|
||||
column: String::from("city"),
|
||||
op: Op::Eq,
|
||||
scalar: Scalar::String(String::from(r#"Boston"#)),
|
||||
},
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
assert_eq!(expected, predicates);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------------
|
||||
// Data for testing
|
||||
|
||||
// Create pure RecordBatches without knowledge of Influx datatype
|
||||
fn create_batches() -> Vec<Arc<RecordBatch>> {
|
||||
// Batch 1: <dict, i64, str, bool, time> & 3 rows
|
||||
let dict_array: ArrayRef = Arc::new(
|
||||
vec![Some("a"), None, Some("b")]
|
||||
.into_iter()
|
||||
.collect::<DictionaryArray<Int32Type>>(),
|
||||
);
|
||||
let int64_array: ArrayRef =
|
||||
Arc::new([Some(-1), None, Some(2)].iter().collect::<Int64Array>());
|
||||
let string_array: ArrayRef = Arc::new(
|
||||
vec![Some("foo"), Some("and"), Some("bar")]
|
||||
.into_iter()
|
||||
.collect::<StringArray>(),
|
||||
);
|
||||
let bool_array: ArrayRef = Arc::new(
|
||||
[Some(true), None, Some(false)]
|
||||
.iter()
|
||||
.collect::<BooleanArray>(),
|
||||
);
|
||||
let ts_array: ArrayRef = Arc::new(
|
||||
[Some(150), Some(200), Some(1526823730000000000)]
|
||||
.iter()
|
||||
.collect::<TimestampNanosecondArray>(),
|
||||
);
|
||||
let batch1 = RecordBatch::try_from_iter_with_nullable(vec![
|
||||
("dict", dict_array, true),
|
||||
("int64", int64_array, true),
|
||||
("string", string_array, true),
|
||||
("bool", bool_array, true),
|
||||
("time", ts_array, false), // not null
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
// Batch 2: <dict, u64, f64, str, bool, time> & 2 rows
|
||||
let dict_array: ArrayRef = Arc::new(
|
||||
vec![None, Some("d")]
|
||||
.into_iter()
|
||||
.collect::<DictionaryArray<Int32Type>>(),
|
||||
);
|
||||
let uint64_array: ArrayRef = Arc::new([Some(1), Some(2)].iter().collect::<UInt64Array>()); // not null
|
||||
let float64_array: ArrayRef =
|
||||
Arc::new([Some(1.0), Some(2.0)].iter().collect::<Float64Array>());
|
||||
let string_array: ArrayRef = Arc::new(
|
||||
vec![Some("foo"), Some("bar")]
|
||||
.into_iter()
|
||||
.collect::<StringArray>(),
|
||||
);
|
||||
let bool_array: ArrayRef = Arc::new([Some(true), None].iter().collect::<BooleanArray>());
|
||||
let ts_array: ArrayRef = Arc::new(
|
||||
[Some(100), Some(1626823730000000000)] // not null
|
||||
.iter()
|
||||
.collect::<TimestampNanosecondArray>(),
|
||||
);
|
||||
let batch2 = RecordBatch::try_from_iter_with_nullable(vec![
|
||||
("dict", dict_array, true),
|
||||
("uint64", uint64_array, false), // not null
|
||||
("float64", float64_array, true),
|
||||
("string", string_array, true),
|
||||
("bool", bool_array, true),
|
||||
("time", ts_array, false), // not null
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
vec![Arc::new(batch1), Arc::new(batch2)]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,8 +30,8 @@ use trace::ctx::SpanContext;
|
|||
use write_summary::WriteSummary;
|
||||
|
||||
use crate::{
|
||||
data::{FlatIngesterQueryResponse, FlatIngesterQueryResponseStream},
|
||||
handler::IngestHandler,
|
||||
querier_handler::{FlatIngesterQueryResponse, FlatIngesterQueryResponseStream},
|
||||
};
|
||||
|
||||
/// This type is responsible for managing all gRPC services exposed by
|
||||
|
|
@ -410,9 +410,6 @@ impl Stream for GetStream {
|
|||
parquet_max_sequence_number: status
|
||||
.parquet_max_sequence_number
|
||||
.map(|x| x.get()),
|
||||
tombstone_max_sequence_number: status
|
||||
.tombstone_max_sequence_number
|
||||
.map(|x| x.get()),
|
||||
}),
|
||||
};
|
||||
prost::Message::encode(&app_metadata, &mut bytes)
|
||||
|
|
@ -467,8 +464,9 @@ mod tests {
|
|||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use schema::selection::Selection;
|
||||
|
||||
use crate::querier_handler::PartitionStatus;
|
||||
|
||||
use super::*;
|
||||
use crate::data::partition::PartitionStatus;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_stream_empty() {
|
||||
|
|
@ -489,7 +487,6 @@ mod tests {
|
|||
partition_id: PartitionId::new(1),
|
||||
status: PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
},
|
||||
}),
|
||||
Ok(FlatIngesterQueryResponse::StartSnapshot { schema }),
|
||||
|
|
@ -502,7 +499,6 @@ mod tests {
|
|||
partition_id: 1,
|
||||
status: Some(proto::PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
}),
|
||||
},
|
||||
}),
|
||||
|
|
@ -527,7 +523,6 @@ mod tests {
|
|||
partition_id: PartitionId::new(1),
|
||||
status: PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
},
|
||||
}),
|
||||
Err(ArrowError::IoError("foo".into())),
|
||||
|
|
@ -535,7 +530,6 @@ mod tests {
|
|||
partition_id: PartitionId::new(1),
|
||||
status: PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
},
|
||||
}),
|
||||
],
|
||||
|
|
@ -546,7 +540,6 @@ mod tests {
|
|||
partition_id: 1,
|
||||
status: Some(proto::PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
}),
|
||||
},
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -396,6 +396,12 @@ something clever.",
|
|||
if let Some(delta) = duration_since_production {
|
||||
// Update the TTBR metric before potentially sleeping.
|
||||
self.time_to_be_readable.set(delta);
|
||||
trace!(
|
||||
kafka_topic=%self.topic_name,
|
||||
shard_index=%self.shard_index,
|
||||
delta=%delta.as_millis(),
|
||||
"reporting TTBR for shard (ms)"
|
||||
);
|
||||
}
|
||||
|
||||
if should_pause {
|
||||
|
|
@ -939,7 +945,7 @@ mod tests {
|
|||
Ok(DmlOperation::Write(make_write("good_op", 2)))
|
||||
]],
|
||||
sink_rets = [
|
||||
Err(crate::data::Error::TableNotPresent),
|
||||
Err(crate::data::Error::NamespaceNotFound{namespace: "bananas".to_string() }),
|
||||
Ok(true),
|
||||
],
|
||||
want_ttbr = 2,
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
//! [`LifecycleManager`]: crate::lifecycle::LifecycleManager
|
||||
//! [`LifecycleHandle::can_resume_ingest()`]: crate::lifecycle::LifecycleHandle::can_resume_ingest()
|
||||
|
||||
pub mod handler;
|
||||
pub(crate) mod handler;
|
||||
mod periodic_watermark_fetcher;
|
||||
mod sink;
|
||||
|
||||
|
|
@ -25,8 +25,8 @@ mod sink;
|
|||
pub mod mock_sink;
|
||||
#[cfg(test)]
|
||||
pub mod mock_watermark_fetcher;
|
||||
pub mod sink_adaptor;
|
||||
pub mod sink_instrumentation;
|
||||
pub(crate) mod sink_adaptor;
|
||||
pub(crate) mod sink_instrumentation;
|
||||
|
||||
pub use periodic_watermark_fetcher::*;
|
||||
pub use sink::*;
|
||||
pub(crate) use periodic_watermark_fetcher::*;
|
||||
pub(crate) use sink::*;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ use super::sink_instrumentation::WatermarkFetcher;
|
|||
/// Emits an error metric named `write_buffer_watermark_fetch_errors` that
|
||||
/// increments once per fetch error.
|
||||
#[derive(Debug)]
|
||||
pub struct PeriodicWatermarkFetcher {
|
||||
pub(crate) struct PeriodicWatermarkFetcher {
|
||||
last_watermark: Arc<AtomicI64>,
|
||||
poll_handle: JoinHandle<()>,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use dml::DmlOperation;
|
|||
|
||||
/// A [`DmlSink`] handles [`DmlOperation`] instances read from a shard.
|
||||
#[async_trait]
|
||||
pub trait DmlSink: Debug + Send + Sync {
|
||||
pub(crate) trait DmlSink: Debug + Send + Sync {
|
||||
/// Apply `op` read from a shard, returning `Ok(true)` if ingest should
|
||||
/// be paused.
|
||||
async fn apply(&self, op: DmlOperation) -> Result<bool, crate::data::Error>;
|
||||
|
|
|
|||
|
|
@ -414,11 +414,13 @@ mod tests {
|
|||
let got = test(
|
||||
op,
|
||||
&metrics,
|
||||
Err(crate::data::Error::TableNotPresent),
|
||||
Err(crate::data::Error::NamespaceNotFound {
|
||||
namespace: "bananas".to_string(),
|
||||
}),
|
||||
Some(12345),
|
||||
)
|
||||
.await;
|
||||
assert_matches!(got, Err(crate::data::Error::TableNotPresent));
|
||||
assert_matches!(got, Err(crate::data::Error::NamespaceNotFound { .. }));
|
||||
|
||||
// Validate the various write buffer metrics
|
||||
assert_matches!(
|
||||
|
|
|
|||
|
|
@ -9,17 +9,16 @@ use arrow::record_batch::RecordBatch;
|
|||
use arrow_util::assert_batches_eq;
|
||||
use bitflags::bitflags;
|
||||
use data_types::{
|
||||
CompactionLevel, NamespaceId, NonEmptyString, PartitionId, PartitionKey, Sequence,
|
||||
SequenceNumber, ShardId, ShardIndex, TableId, Timestamp, Tombstone, TombstoneId,
|
||||
CompactionLevel, NamespaceId, PartitionId, PartitionKey, Sequence, SequenceNumber, ShardId,
|
||||
ShardIndex, TableId,
|
||||
};
|
||||
use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
|
||||
use dml::{DmlMeta, DmlOperation, DmlWrite};
|
||||
use iox_catalog::{interface::Catalog, mem::MemCatalog};
|
||||
use iox_query::test::{raw_data, TestChunk};
|
||||
use iox_time::{SystemProvider, Time};
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use object_store::memory::InMemory;
|
||||
use parquet_file::metadata::IoxMetadata;
|
||||
use predicate::delete_predicate::parse_delete_predicate;
|
||||
use schema::sort::SortKey;
|
||||
use uuid::Uuid;
|
||||
|
||||
|
|
@ -28,31 +27,10 @@ use crate::{
|
|||
partition::{resolver::CatalogPartitionResolver, PersistingBatch, SnapshotBatch},
|
||||
IngesterData,
|
||||
},
|
||||
lifecycle::{LifecycleConfig, LifecycleHandle, LifecycleManager},
|
||||
lifecycle::{LifecycleConfig, LifecycleManager},
|
||||
query::QueryableBatch,
|
||||
};
|
||||
|
||||
/// Create tombstone for testing
|
||||
pub(crate) fn create_tombstone(
|
||||
id: i64,
|
||||
table_id: i64,
|
||||
shard_id: i64,
|
||||
seq_num: i64,
|
||||
min_time: i64,
|
||||
max_time: i64,
|
||||
predicate: &str,
|
||||
) -> Tombstone {
|
||||
Tombstone {
|
||||
id: TombstoneId::new(id),
|
||||
table_id: TableId::new(table_id),
|
||||
shard_id: ShardId::new(shard_id),
|
||||
sequence_number: SequenceNumber::new(seq_num),
|
||||
min_time: Timestamp::new(min_time),
|
||||
max_time: Timestamp::new(max_time),
|
||||
serialized_predicate: predicate.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn make_meta(
|
||||
object_store_id: Uuid,
|
||||
|
|
@ -93,15 +71,8 @@ pub(crate) fn make_persisting_batch(
|
|||
partition_id: i64,
|
||||
object_store_id: Uuid,
|
||||
batches: Vec<Arc<RecordBatch>>,
|
||||
tombstones: Vec<Tombstone>,
|
||||
) -> Arc<PersistingBatch> {
|
||||
let queryable_batch = make_queryable_batch_with_deletes(
|
||||
table_name,
|
||||
partition_id,
|
||||
seq_num_start,
|
||||
batches,
|
||||
tombstones,
|
||||
);
|
||||
let queryable_batch = make_queryable_batch(table_name, partition_id, seq_num_start, batches);
|
||||
Arc::new(PersistingBatch {
|
||||
shard_id: ShardId::new(shard_id),
|
||||
table_id: TableId::new(table_id),
|
||||
|
|
@ -116,16 +87,6 @@ pub(crate) fn make_queryable_batch(
|
|||
partition_id: i64,
|
||||
seq_num_start: i64,
|
||||
batches: Vec<Arc<RecordBatch>>,
|
||||
) -> Arc<QueryableBatch> {
|
||||
make_queryable_batch_with_deletes(table_name, partition_id, seq_num_start, batches, vec![])
|
||||
}
|
||||
|
||||
pub(crate) fn make_queryable_batch_with_deletes(
|
||||
table_name: &str,
|
||||
partition_id: i64,
|
||||
seq_num_start: i64,
|
||||
batches: Vec<Arc<RecordBatch>>,
|
||||
tombstones: Vec<Tombstone>,
|
||||
) -> Arc<QueryableBatch> {
|
||||
// make snapshots for the batches
|
||||
let mut snapshots = vec![];
|
||||
|
|
@ -140,7 +101,6 @@ pub(crate) fn make_queryable_batch_with_deletes(
|
|||
table_name.into(),
|
||||
PartitionId::new(partition_id),
|
||||
snapshots,
|
||||
tombstones,
|
||||
))
|
||||
}
|
||||
|
||||
|
|
@ -655,65 +615,24 @@ pub(crate) async fn make_ingester_data(two_partitions: bool, loc: DataLocation)
|
|||
let _ignored = ingester
|
||||
.shard(shard_id)
|
||||
.unwrap()
|
||||
.namespace(TEST_NAMESPACE)
|
||||
.namespace(&TEST_NAMESPACE.into())
|
||||
.unwrap()
|
||||
.snapshot_to_persisting(TEST_TABLE, &PartitionKey::from(TEST_PARTITION_1))
|
||||
.snapshot_to_persisting(&TEST_TABLE.into(), &PartitionKey::from(TEST_PARTITION_1))
|
||||
.await;
|
||||
} else if loc.contains(DataLocation::SNAPSHOT) {
|
||||
// move partition 1 data to snapshot
|
||||
let _ignored = ingester
|
||||
.shard(shard_id)
|
||||
.unwrap()
|
||||
.namespace(TEST_NAMESPACE)
|
||||
.namespace(&TEST_NAMESPACE.into())
|
||||
.unwrap()
|
||||
.snapshot(TEST_TABLE, &PartitionKey::from(TEST_PARTITION_1))
|
||||
.snapshot(&TEST_TABLE.into(), &PartitionKey::from(TEST_PARTITION_1))
|
||||
.await;
|
||||
}
|
||||
|
||||
ingester
|
||||
}
|
||||
|
||||
pub(crate) async fn make_ingester_data_with_tombstones(loc: DataLocation) -> IngesterData {
|
||||
// Whatever data because they won't be used in the tests
|
||||
let metrics: Arc<metric::Registry> = Default::default();
|
||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
|
||||
let object_store = Arc::new(InMemory::new());
|
||||
let exec = Arc::new(iox_query::exec::Executor::new(1));
|
||||
let lifecycle = LifecycleManager::new(
|
||||
LifecycleConfig::new(
|
||||
200_000_000,
|
||||
100_000_000,
|
||||
100_000_000,
|
||||
Duration::from_secs(100_000_000),
|
||||
Duration::from_secs(100_000_000),
|
||||
100_000_000,
|
||||
),
|
||||
Arc::clone(&metrics),
|
||||
Arc::new(SystemProvider::default()),
|
||||
);
|
||||
|
||||
// Make data for one shard and two tables
|
||||
let shard_index = ShardIndex::new(0);
|
||||
let (shard_id, _, _) =
|
||||
populate_catalog(&*catalog, shard_index, TEST_NAMESPACE, TEST_TABLE).await;
|
||||
|
||||
let ingester = IngesterData::new(
|
||||
object_store,
|
||||
Arc::clone(&catalog),
|
||||
[(shard_id, shard_index)],
|
||||
exec,
|
||||
Arc::new(CatalogPartitionResolver::new(catalog)),
|
||||
backoff::BackoffConfig::default(),
|
||||
metrics,
|
||||
);
|
||||
|
||||
// Make partitions per requested
|
||||
make_one_partition_with_tombstones(&ingester, &lifecycle.handle(), loc, shard_index, shard_id)
|
||||
.await;
|
||||
|
||||
ingester
|
||||
}
|
||||
|
||||
/// Make data for one or two partitions per requested
|
||||
pub(crate) fn make_partitions(two_partitions: bool, shard_index: ShardIndex) -> Vec<DmlOperation> {
|
||||
// In-memory data includes these rows but split between 4 groups go into
|
||||
|
|
@ -783,133 +702,6 @@ pub(crate) fn make_partitions(two_partitions: bool, shard_index: ShardIndex) ->
|
|||
ops
|
||||
}
|
||||
|
||||
/// Make data for one partition with tombstones
|
||||
async fn make_one_partition_with_tombstones(
|
||||
ingester: &IngesterData,
|
||||
lifecycle_handle: &dyn LifecycleHandle,
|
||||
loc: DataLocation,
|
||||
shard_index: ShardIndex,
|
||||
shard_id: ShardId,
|
||||
) {
|
||||
// In-memory data includes these rows but split between 4 groups go into
|
||||
// different batches of parittion 1 or partittion 2 as requeted
|
||||
// let expected = vec![
|
||||
// "+------------+-----+------+--------------------------------+",
|
||||
// "| city | day | temp | time |",
|
||||
// "+------------+-----+------+--------------------------------+",
|
||||
// "| Andover | tue | 56 | 1970-01-01T00:00:00.000000030Z |", // in group 1 - seq_num: 2
|
||||
// "| Andover | mon | | 1970-01-01T00:00:00.000000046Z |", // in group 2 - seq_num: 3
|
||||
// "| Boston | sun | 60 | 1970-01-01T00:00:00.000000036Z |", // in group 1 - seq_num: 1 --> will get deleted
|
||||
// "| Boston | mon | | 1970-01-01T00:00:00.000000038Z |", // in group 3 - seq_num: 5 --> will get deleted
|
||||
// "| Medford | sun | 55 | 1970-01-01T00:00:00.000000022Z |", // in group 4 - seq_num: 8 (after the tombstone's seq num)
|
||||
// "| Medford | wed | | 1970-01-01T00:00:00.000000026Z |", // in group 2 - seq_num: 4
|
||||
// "| Reading | mon | 58 | 1970-01-01T00:00:00.000000040Z |", // in group 4 - seq_num: 9
|
||||
// "| Wilmington | mon | | 1970-01-01T00:00:00.000000035Z |", // in group 3 - seq_num: 6
|
||||
// "+------------+-----+------+--------------------------------+",
|
||||
// ];
|
||||
|
||||
let (ops, seq_num) =
|
||||
make_first_partition_data(&PartitionKey::from(TEST_PARTITION_1), shard_index);
|
||||
|
||||
// Apply all ops
|
||||
for op in ops {
|
||||
ingester
|
||||
.buffer_operation(shard_id, op, lifecycle_handle)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
if loc.contains(DataLocation::PERSISTING) {
|
||||
// Move partition 1 data to persisting
|
||||
let _ignored = ingester
|
||||
.shard(shard_id)
|
||||
.unwrap()
|
||||
.namespace(TEST_NAMESPACE)
|
||||
.unwrap()
|
||||
.snapshot_to_persisting(TEST_TABLE, &PartitionKey::from(TEST_PARTITION_1))
|
||||
.await;
|
||||
} else if loc.contains(DataLocation::SNAPSHOT) {
|
||||
// move partition 1 data to snapshot
|
||||
let _ignored = ingester
|
||||
.shard(shard_id)
|
||||
.unwrap()
|
||||
.namespace(TEST_NAMESPACE)
|
||||
.unwrap()
|
||||
.snapshot(TEST_TABLE, &PartitionKey::from(TEST_PARTITION_1))
|
||||
.await;
|
||||
}
|
||||
|
||||
// Add tombstones
|
||||
// Depending on where the existing data is, they (buffer & snapshot) will be either moved to a new snapshot after
|
||||
// applying the tombstone or (persisting) stay where they are and the tombstones is kept to get applied later
|
||||
// ------------------------------------------
|
||||
// Delete
|
||||
let mut seq_num = seq_num.get();
|
||||
seq_num += 1;
|
||||
|
||||
let delete = parse_delete_predicate(
|
||||
"1970-01-01T00:00:00.000000010Z",
|
||||
"1970-01-01T00:00:00.000000050Z",
|
||||
"city=Boston",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
ingester
|
||||
.buffer_operation(
|
||||
shard_id,
|
||||
DmlOperation::Delete(DmlDelete::new(
|
||||
TEST_NAMESPACE.to_string(),
|
||||
delete,
|
||||
NonEmptyString::new(TEST_TABLE),
|
||||
DmlMeta::sequenced(
|
||||
Sequence {
|
||||
shard_index,
|
||||
sequence_number: SequenceNumber::new(seq_num),
|
||||
},
|
||||
Time::MIN,
|
||||
None,
|
||||
42,
|
||||
),
|
||||
)),
|
||||
lifecycle_handle,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Group 4: in buffer of p1 after the tombstone
|
||||
|
||||
ingester
|
||||
.buffer_operation(
|
||||
shard_id,
|
||||
DmlOperation::Write(make_write_op(
|
||||
&PartitionKey::from(TEST_PARTITION_1),
|
||||
shard_index,
|
||||
TEST_NAMESPACE,
|
||||
seq_num,
|
||||
r#"test_table,city=Medford day="sun",temp=55 22"#,
|
||||
)),
|
||||
lifecycle_handle,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
seq_num += 1;
|
||||
|
||||
ingester
|
||||
.buffer_operation(
|
||||
shard_id,
|
||||
DmlOperation::Write(make_write_op(
|
||||
&PartitionKey::from(TEST_PARTITION_1),
|
||||
shard_index,
|
||||
TEST_NAMESPACE,
|
||||
seq_num,
|
||||
r#"test_table,city=Reading day="mon",temp=58 40"#,
|
||||
)),
|
||||
lifecycle_handle,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub(crate) fn make_write_op(
|
||||
partition_key: &PartitionKey,
|
||||
shard_index: ShardIndex,
|
||||
|
|
|
|||
|
|
@ -463,7 +463,10 @@ pub trait PartitionRepo: Send + Sync {
|
|||
partition_id: PartitionId,
|
||||
) -> Result<Option<PartitionInfo>>;
|
||||
|
||||
/// Update the sort key for the partition
|
||||
/// Update the sort key for the partition.
|
||||
///
|
||||
/// NOTE: it is expected that ONLY the ingesters update sort keys for
|
||||
/// existing partitions.
|
||||
async fn update_sort_key(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
|
|
|
|||
|
|
@ -1878,7 +1878,7 @@ LIMIT $4;
|
|||
sqlx::query_as::<_, PartitionParam>(
|
||||
r#"
|
||||
SELECT parquet_file.partition_id, parquet_file.shard_id, parquet_file.namespace_id,
|
||||
parquet_file.table_id,
|
||||
parquet_file.table_id,
|
||||
count(case when to_delete is null then 1 end) total_count,
|
||||
max(case when compaction_level= $4 then parquet_file.created_at end)
|
||||
FROM parquet_file
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ chrono = { version = "0.4", default-features = false }
|
|||
chrono-english = "0.1.4"
|
||||
clap = { version = "4", features = ["derive", "env", "cargo"] }
|
||||
futures = "0.3"
|
||||
handlebars = "4.3.4"
|
||||
handlebars = "4.3.5"
|
||||
humantime = "2.1.0"
|
||||
influxdb2_client = { path = "../influxdb2_client" }
|
||||
itertools = "0.10.5"
|
||||
|
|
@ -22,7 +22,7 @@ rand = { version = "0.8.3", features = ["small_rng"] }
|
|||
regex = "1.6"
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.83"
|
||||
serde_json = "1.0.86"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.21", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
toml = "0.5.9"
|
||||
|
|
|
|||
|
|
@ -762,7 +762,7 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
// Input has one row that has no value (NULL value) for tag_b, which is its own series
|
||||
let input = stream_from_batch(batch);
|
||||
let input = stream_from_batch(batch.schema(), batch);
|
||||
|
||||
let table_name = "foo";
|
||||
let tag_columns = ["tag_a", "tag_b"];
|
||||
|
|
@ -873,7 +873,8 @@ mod tests {
|
|||
.collect();
|
||||
|
||||
// stream from those batches
|
||||
stream_from_batches(batches)
|
||||
assert!(!batches.is_empty());
|
||||
stream_from_batches(batches[0].schema(), batches)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -14,7 +14,7 @@ use async_trait::async_trait;
|
|||
use data_types::{
|
||||
ChunkId, ChunkOrder, DeletePredicate, InfluxDbType, PartitionId, TableSummary, TimestampMinMax,
|
||||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use exec::{stringset::StringSet, IOxSessionContext};
|
||||
use hashbrown::HashMap;
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
|
|
@ -141,9 +141,6 @@ impl Drop for QueryCompletedToken {
|
|||
/// This avoids storing potentially large strings
|
||||
pub type QueryText = Box<dyn std::fmt::Display + Send + Sync>;
|
||||
|
||||
/// Error type for [`QueryDatabase`] operations.
|
||||
pub type QueryDatabaseError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
/// A `Database` is the main trait implemented by the IOx subsystems
|
||||
/// that store actual data.
|
||||
///
|
||||
|
|
@ -154,12 +151,15 @@ pub trait QueryDatabase: QueryDatabaseMeta + Debug + Send + Sync {
|
|||
/// Returns a set of chunks within the partition with data that may match
|
||||
/// the provided predicate. If possible, chunks which have no rows that can
|
||||
/// possibly match the predicate may be omitted.
|
||||
/// If projection is None, returned chunks will include all columns of its original data. Otherwise,
|
||||
/// returned chunks will includs PK columns (tags and time) and columns specified in the projection.
|
||||
async fn chunks(
|
||||
&self,
|
||||
table_name: &str,
|
||||
predicate: &Predicate,
|
||||
projection: &Option<Vec<usize>>,
|
||||
ctx: IOxSessionContext,
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>, QueryDatabaseError>;
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>, DataFusionError>;
|
||||
|
||||
/// Record that particular type of query was run / planned
|
||||
fn record_query(
|
||||
|
|
@ -175,9 +175,6 @@ pub trait QueryDatabase: QueryDatabaseMeta + Debug + Send + Sync {
|
|||
fn as_meta(&self) -> &dyn QueryDatabaseMeta;
|
||||
}
|
||||
|
||||
/// Error type for [`QueryChunk`] operations.
|
||||
pub type QueryChunkError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
/// Collection of data that shares the same partition key
|
||||
pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
||||
/// returns the Id of this chunk. Ids are unique within a
|
||||
|
|
@ -200,7 +197,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
|||
fn apply_predicate_to_metadata(
|
||||
&self,
|
||||
predicate: &Predicate,
|
||||
) -> Result<PredicateMatch, QueryChunkError> {
|
||||
) -> Result<PredicateMatch, DataFusionError> {
|
||||
Ok(self
|
||||
.summary()
|
||||
.map(|summary| predicate.apply_to_table_summary(&summary, self.schema().as_arrow()))
|
||||
|
|
@ -216,7 +213,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
|||
ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
columns: Selection<'_>,
|
||||
) -> Result<Option<StringSet>, QueryChunkError>;
|
||||
) -> Result<Option<StringSet>, DataFusionError>;
|
||||
|
||||
/// Return a set of Strings containing the distinct values in the
|
||||
/// specified columns. If the predicate can be evaluated entirely
|
||||
|
|
@ -228,7 +225,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
|||
ctx: IOxSessionContext,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, QueryChunkError>;
|
||||
) -> Result<Option<StringSet>, DataFusionError>;
|
||||
|
||||
/// Provides access to raw `QueryChunk` data as an
|
||||
/// asynchronous stream of `RecordBatch`es filtered by a *required*
|
||||
|
|
@ -248,7 +245,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
|||
ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, QueryChunkError>;
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError>;
|
||||
|
||||
/// Returns chunk type. Useful in tests and debug logs.
|
||||
fn chunk_type(&self) -> &str;
|
||||
|
|
|
|||
|
|
@ -262,7 +262,7 @@ mod tests {
|
|||
let batch = make_batch();
|
||||
|
||||
let output_schema = batch.schema();
|
||||
let input_stream = stream_from_batch(batch);
|
||||
let input_stream = stream_from_batch(batch.schema(), batch);
|
||||
let adapter_stream =
|
||||
SchemaAdapterStream::try_new(input_stream, output_schema, baseline_metrics()).unwrap();
|
||||
|
||||
|
|
@ -291,7 +291,7 @@ mod tests {
|
|||
Field::new("c", DataType::Utf8, false),
|
||||
Field::new("a", DataType::Int32, false),
|
||||
]));
|
||||
let input_stream = stream_from_batch(batch);
|
||||
let input_stream = stream_from_batch(batch.schema(), batch);
|
||||
let adapter_stream =
|
||||
SchemaAdapterStream::try_new(input_stream, output_schema, baseline_metrics()).unwrap();
|
||||
|
||||
|
|
@ -321,7 +321,7 @@ mod tests {
|
|||
Field::new("d", DataType::Float32, true),
|
||||
Field::new("a", DataType::Int32, false),
|
||||
]));
|
||||
let input_stream = stream_from_batch(batch);
|
||||
let input_stream = stream_from_batch(batch.schema(), batch);
|
||||
let adapter_stream =
|
||||
SchemaAdapterStream::try_new(input_stream, output_schema, baseline_metrics()).unwrap();
|
||||
|
||||
|
|
@ -349,7 +349,7 @@ mod tests {
|
|||
Field::new("c", DataType::Utf8, false),
|
||||
Field::new("a", DataType::Int32, false),
|
||||
]));
|
||||
let input_stream = stream_from_batch(batch);
|
||||
let input_stream = stream_from_batch(batch.schema(), batch);
|
||||
let res = SchemaAdapterStream::try_new(input_stream, output_schema, baseline_metrics());
|
||||
|
||||
assert_contains!(
|
||||
|
|
@ -368,7 +368,7 @@ mod tests {
|
|||
Field::new("b", DataType::Int32, false),
|
||||
Field::new("a", DataType::Int32, false),
|
||||
]));
|
||||
let input_stream = stream_from_batch(batch);
|
||||
let input_stream = stream_from_batch(batch.schema(), batch);
|
||||
let res = SchemaAdapterStream::try_new(input_stream, output_schema, baseline_metrics());
|
||||
|
||||
assert_contains!(res.unwrap_err().to_string(), "input field 'c' had type 'Utf8' which is different than output field 'c' which had type 'Float32'");
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ use crate::{
|
|||
stringset::{StringSet, StringSetRef},
|
||||
ExecutionContextProvider, Executor, ExecutorType, IOxSessionContext,
|
||||
},
|
||||
Predicate, PredicateMatch, QueryChunk, QueryChunkError, QueryChunkMeta, QueryCompletedToken,
|
||||
QueryDatabase, QueryDatabaseError, QueryText,
|
||||
Predicate, PredicateMatch, QueryChunk, QueryChunkMeta, QueryCompletedToken, QueryDatabase,
|
||||
QueryText,
|
||||
};
|
||||
use arrow::{
|
||||
array::{
|
||||
|
|
@ -24,7 +24,7 @@ use data_types::{
|
|||
ChunkId, ChunkOrder, ColumnSummary, DeletePredicate, InfluxDbType, PartitionId, StatValues,
|
||||
Statistics, TableSummary, TimestampMinMax,
|
||||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use datafusion_util::stream_from_batches;
|
||||
use futures::StreamExt;
|
||||
use hashbrown::HashSet;
|
||||
|
|
@ -108,18 +108,54 @@ impl QueryDatabase for TestDatabase {
|
|||
&self,
|
||||
table_name: &str,
|
||||
predicate: &Predicate,
|
||||
projection: &Option<Vec<usize>>,
|
||||
_ctx: IOxSessionContext,
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>, QueryDatabaseError> {
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>, DataFusionError> {
|
||||
// save last predicate
|
||||
*self.chunks_predicate.lock() = predicate.clone();
|
||||
|
||||
let partitions = self.partitions.lock();
|
||||
Ok(partitions
|
||||
let partitions = self.partitions.lock().clone();
|
||||
let chunks = partitions
|
||||
.values()
|
||||
.flat_map(|x| x.values())
|
||||
.filter(|x| x.table_name == table_name)
|
||||
.map(|x| Arc::clone(x) as _)
|
||||
.collect())
|
||||
.map(|x| Arc::clone(x) as Arc<dyn QueryChunk>)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Return chunks with fewer columns if a projection is specified
|
||||
let mut new_chunks = Vec::with_capacity(chunks.len());
|
||||
for c in chunks {
|
||||
let schema = c.schema();
|
||||
let cols = schema.select_given_and_pk_columns(projection);
|
||||
let cols = cols.iter().map(|c| c.as_str()).collect::<Vec<_>>();
|
||||
let selection = Selection::Some(&cols);
|
||||
|
||||
let read_result =
|
||||
c.read_filter(IOxSessionContext::with_testing(), predicate, selection);
|
||||
if read_result.is_err() {
|
||||
return Err(read_result.err().unwrap());
|
||||
}
|
||||
let mut stream = read_result.unwrap();
|
||||
|
||||
let mut new_chunk = TestChunk::new(c.table_name());
|
||||
while let Some(b) = stream.next().await {
|
||||
let b = b.expect("Error in stream");
|
||||
new_chunk.table_data.push(Arc::new(b));
|
||||
}
|
||||
|
||||
let new_chunk = if !new_chunk.table_data.is_empty() {
|
||||
let new_schema = Schema::try_from(new_chunk.table_data[0].schema()).unwrap();
|
||||
let new_chunk = new_chunk.add_schema_to_table(new_schema, true, None);
|
||||
Arc::new(new_chunk) as _
|
||||
} else {
|
||||
// No data, return the original empty chunk with the original schema
|
||||
c
|
||||
};
|
||||
|
||||
new_chunks.push(new_chunk);
|
||||
}
|
||||
|
||||
Ok(new_chunks)
|
||||
}
|
||||
|
||||
fn record_query(
|
||||
|
|
@ -327,9 +363,9 @@ impl TestChunk {
|
|||
}
|
||||
|
||||
/// Checks the saved error, and returns it if any, otherwise returns OK
|
||||
fn check_error(&self) -> Result<(), QueryChunkError> {
|
||||
fn check_error(&self) -> Result<(), DataFusionError> {
|
||||
if let Some(message) = self.saved_error.as_ref() {
|
||||
Err(message.clone().into())
|
||||
Err(DataFusionError::External(message.clone().into()))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -509,12 +545,8 @@ impl TestChunk {
|
|||
mut self,
|
||||
new_column_schema: Schema,
|
||||
add_column_summary: bool,
|
||||
stats: Option<Statistics>,
|
||||
input_stats: Option<Statistics>,
|
||||
) -> Self {
|
||||
// assume the new schema has exactly a single table
|
||||
assert_eq!(new_column_schema.len(), 1);
|
||||
let (col_type, new_field) = new_column_schema.field(0);
|
||||
|
||||
let mut merger = SchemaMerger::new();
|
||||
merger = merger.merge(&new_column_schema).unwrap();
|
||||
merger = merger
|
||||
|
|
@ -522,34 +554,38 @@ impl TestChunk {
|
|||
.expect("merging was successful");
|
||||
self.schema = merger.build();
|
||||
|
||||
if add_column_summary {
|
||||
let influxdb_type = col_type.map(|t| match t {
|
||||
InfluxColumnType::Tag => InfluxDbType::Tag,
|
||||
InfluxColumnType::Field(_) => InfluxDbType::Field,
|
||||
InfluxColumnType::Timestamp => InfluxDbType::Timestamp,
|
||||
});
|
||||
for i in 0..new_column_schema.len() {
|
||||
let (col_type, new_field) = new_column_schema.field(i);
|
||||
if add_column_summary {
|
||||
let influxdb_type = col_type.map(|t| match t {
|
||||
InfluxColumnType::Tag => InfluxDbType::Tag,
|
||||
InfluxColumnType::Field(_) => InfluxDbType::Field,
|
||||
InfluxColumnType::Timestamp => InfluxDbType::Timestamp,
|
||||
});
|
||||
|
||||
let stats = stats.unwrap_or_else(|| match new_field.data_type() {
|
||||
DataType::Boolean => Statistics::Bool(StatValues::default()),
|
||||
DataType::Int64 => Statistics::I64(StatValues::default()),
|
||||
DataType::UInt64 => Statistics::U64(StatValues::default()),
|
||||
DataType::Utf8 => Statistics::String(StatValues::default()),
|
||||
DataType::Dictionary(_, value_type) => {
|
||||
assert!(matches!(**value_type, DataType::Utf8));
|
||||
Statistics::String(StatValues::default())
|
||||
}
|
||||
DataType::Float64 => Statistics::F64(StatValues::default()),
|
||||
DataType::Timestamp(_, _) => Statistics::I64(StatValues::default()),
|
||||
_ => panic!("Unsupported type in TestChunk: {:?}", new_field.data_type()),
|
||||
});
|
||||
let stats = input_stats.clone();
|
||||
let stats = stats.unwrap_or_else(|| match new_field.data_type() {
|
||||
DataType::Boolean => Statistics::Bool(StatValues::default()),
|
||||
DataType::Int64 => Statistics::I64(StatValues::default()),
|
||||
DataType::UInt64 => Statistics::U64(StatValues::default()),
|
||||
DataType::Utf8 => Statistics::String(StatValues::default()),
|
||||
DataType::Dictionary(_, value_type) => {
|
||||
assert!(matches!(**value_type, DataType::Utf8));
|
||||
Statistics::String(StatValues::default())
|
||||
}
|
||||
DataType::Float64 => Statistics::F64(StatValues::default()),
|
||||
DataType::Timestamp(_, _) => Statistics::I64(StatValues::default()),
|
||||
_ => panic!("Unsupported type in TestChunk: {:?}", new_field.data_type()),
|
||||
});
|
||||
|
||||
let column_summary = ColumnSummary {
|
||||
name: new_field.name().clone(),
|
||||
influxdb_type,
|
||||
stats,
|
||||
};
|
||||
let column_summary = ColumnSummary {
|
||||
name: new_field.name().clone(),
|
||||
influxdb_type,
|
||||
stats,
|
||||
};
|
||||
|
||||
self.table_summary.columns.push(column_summary);
|
||||
self.table_summary.columns.push(column_summary);
|
||||
}
|
||||
}
|
||||
|
||||
self
|
||||
|
|
@ -921,13 +957,17 @@ impl QueryChunk for TestChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, QueryChunkError> {
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
self.check_error()?;
|
||||
|
||||
// save the predicate
|
||||
self.predicates.lock().push(predicate.clone());
|
||||
|
||||
let batches = match self.schema.df_projection(selection)? {
|
||||
let batches = match self
|
||||
.schema
|
||||
.df_projection(selection)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?
|
||||
{
|
||||
None => self.table_data.clone(),
|
||||
Some(projection) => self
|
||||
.table_data
|
||||
|
|
@ -938,7 +978,8 @@ impl QueryChunk for TestChunk {
|
|||
})
|
||||
.collect::<std::result::Result<Vec<_>, ArrowError>>()?,
|
||||
};
|
||||
Ok(stream_from_batches(batches))
|
||||
|
||||
Ok(stream_from_batches(self.schema().as_arrow(), batches))
|
||||
}
|
||||
|
||||
fn chunk_type(&self) -> &str {
|
||||
|
|
@ -948,7 +989,7 @@ impl QueryChunk for TestChunk {
|
|||
fn apply_predicate_to_metadata(
|
||||
&self,
|
||||
predicate: &Predicate,
|
||||
) -> Result<PredicateMatch, QueryChunkError> {
|
||||
) -> Result<PredicateMatch, DataFusionError> {
|
||||
self.check_error()?;
|
||||
|
||||
// save the predicate
|
||||
|
|
@ -967,7 +1008,7 @@ impl QueryChunk for TestChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
// Model not being able to get column values from metadata
|
||||
Ok(None)
|
||||
}
|
||||
|
|
@ -977,7 +1018,7 @@ impl QueryChunk for TestChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
self.check_error()?;
|
||||
|
||||
// save the predicate
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
iox_time = { path = "../iox_time" }
|
||||
metric = { path = "../metric" }
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ log = "0.4"
|
|||
parking_lot = "0.12"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.83"
|
||||
serde_json = "1.0.86"
|
||||
serde_urlencoded = "0.7.0"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.21", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
iox_query = { path = "../iox_query" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
iox_time = { path = "../iox_time" }
|
||||
trace = { path = "../trace" }
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ ingester = { path = "../ingester" }
|
|||
iox_catalog = { path = "../iox_catalog" }
|
||||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
iox_query = { path = "../iox_query" }
|
||||
trace = { path = "../trace" }
|
||||
write_buffer = { path = "../write_buffer" }
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ generated_types = { path = "../generated_types" }
|
|||
iox_catalog = { path = "../iox_catalog" }
|
||||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
querier = { path = "../querier" }
|
||||
iox_query = { path = "../iox_query" }
|
||||
router = { path = "../router" }
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
mutable_batch = { path = "../mutable_batch" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
router = { path = "../router" }
|
||||
sharder = { path = "../sharder" }
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ bytes = "1.2"
|
|||
futures = "0.3"
|
||||
iox_time = { version = "0.1.0", path = "../iox_time" }
|
||||
metric = { version = "0.1.0", path = "../metric" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
pin-project = "1.0.12"
|
||||
tokio = { version = "1.21", features = ["io-util"] }
|
||||
workspace-hack = { path = "../workspace-hack" }
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ datafusion_util = { path = "../datafusion_util" }
|
|||
futures = "0.3"
|
||||
generated_types = { path = "../generated_types" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.12"
|
||||
parquet = {version = "23.0.0", features = ["experimental"]}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ datafusion = { path = "../datafusion" }
|
|||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
futures = {version = "0.3"}
|
||||
num_cpus = "1.13.1"
|
||||
object_store = { version = "0.5.0" }
|
||||
object_store = { version = "0.5.1" }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
schema = { path = "../schema" }
|
||||
tokio = "1.0"
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ itertools = "0.10"
|
|||
observability_deps = { path = "../observability_deps" }
|
||||
query_functions = { path = "../query_functions"}
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.83"
|
||||
serde_json = "1.0.86"
|
||||
snafu = "0.7"
|
||||
sqlparser = "0.24.0"
|
||||
sqlparser = "0.25.0"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@
|
|||
|
||||
pub mod delete_expr;
|
||||
pub mod delete_predicate;
|
||||
pub mod rewrite;
|
||||
pub mod rpc_predicate;
|
||||
|
||||
use arrow::{
|
||||
|
|
|
|||
|
|
@ -1,19 +1,23 @@
|
|||
mod column_rewrite;
|
||||
mod field_rewrite;
|
||||
mod measurement_rewrite;
|
||||
mod rewrite;
|
||||
mod value_rewrite;
|
||||
|
||||
use crate::{rewrite, Predicate};
|
||||
use crate::Predicate;
|
||||
|
||||
use datafusion::error::{DataFusionError, Result as DataFusionResult};
|
||||
use datafusion::execution::context::ExecutionProps;
|
||||
use datafusion::logical_expr::lit;
|
||||
use datafusion::logical_plan::{
|
||||
Column, Expr, ExprSchema, ExprSchemable, ExprSimplifiable, SimplifyInfo,
|
||||
Column, Expr, ExprRewritable, ExprSchema, ExprSchemable, ExprSimplifiable, SimplifyInfo,
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use schema::Schema;
|
||||
use std::collections::BTreeSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use self::column_rewrite::MissingColumnRewriter;
|
||||
use self::field_rewrite::FieldProjectionRewriter;
|
||||
use self::measurement_rewrite::rewrite_measurement_references;
|
||||
use self::value_rewrite::rewrite_field_value_references;
|
||||
|
|
@ -187,6 +191,7 @@ fn normalize_predicate(
|
|||
let mut predicate = predicate.clone();
|
||||
|
||||
let mut field_projections = FieldProjectionRewriter::new(Arc::clone(&schema));
|
||||
let mut missing_columums = MissingColumnRewriter::new(Arc::clone(&schema));
|
||||
|
||||
let mut field_value_exprs = vec![];
|
||||
|
||||
|
|
@ -194,24 +199,38 @@ fn normalize_predicate(
|
|||
.exprs
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
rewrite_measurement_references(table_name, e)
|
||||
debug!(?e, "rewriting expr");
|
||||
|
||||
let e = rewrite_measurement_references(table_name, e)
|
||||
.map(|e| log_rewrite(e, "rewrite_measurement_references"))
|
||||
// Rewrite any references to `_value = some_value` to literal true values.
|
||||
// Keeps track of these expressions, which can then be used to
|
||||
// augment field projections with conditions using `CASE` statements.
|
||||
.and_then(|e| rewrite_field_value_references(&mut field_value_exprs, e))
|
||||
.map(|e| log_rewrite(e, "rewrite_field_value_references"))
|
||||
// Rewrite any references to `_field` with a literal
|
||||
// and keep track of referenced field names to add to
|
||||
// the field column projection set.
|
||||
.and_then(|e| field_projections.rewrite_field_exprs(e))
|
||||
.map(|e| log_rewrite(e, "field_projections"))
|
||||
// remove references to columns that don't exist in this schema
|
||||
.and_then(|e| e.rewrite(&mut missing_columums))
|
||||
.map(|e| log_rewrite(e, "missing_columums"))
|
||||
// apply IOx specific rewrites (that unlock other simplifications)
|
||||
.and_then(rewrite::rewrite)
|
||||
// Call the core DataFusion simplification logic
|
||||
.map(|e| log_rewrite(e, "rewrite"))
|
||||
// Call DataFusion simplification logic
|
||||
.and_then(|e| {
|
||||
let adapter = SimplifyAdapter::new(schema.as_ref());
|
||||
// simplify twice to ensure "full" cleanup
|
||||
e.simplify(&adapter)?.simplify(&adapter)
|
||||
})
|
||||
.map(|e| log_rewrite(e, "simplify_expr"))
|
||||
.and_then(rewrite::simplify_predicate)
|
||||
.map(|e| log_rewrite(e, "simplify_expr"));
|
||||
|
||||
debug!(?e, "rewritten expr");
|
||||
e
|
||||
})
|
||||
// Filter out literal true so is_empty works correctly
|
||||
.filter(|f| match f {
|
||||
|
|
@ -227,6 +246,11 @@ fn normalize_predicate(
|
|||
field_projections.add_to_predicate(predicate)
|
||||
}
|
||||
|
||||
fn log_rewrite(expr: Expr, description: &str) -> Expr {
|
||||
trace!(?expr, %description, "After rewrite");
|
||||
expr
|
||||
}
|
||||
|
||||
struct SimplifyAdapter<'a> {
|
||||
schema: &'a Schema,
|
||||
execution_props: ExecutionProps,
|
||||
|
|
@ -290,9 +314,27 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use datafusion::{
|
||||
logical_plan::{col, lit},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
use test_helpers::assert_contains;
|
||||
|
||||
#[test]
|
||||
fn test_normalize_predicate_coerced() {
|
||||
let schema = schema();
|
||||
let predicate = normalize_predicate(
|
||||
"table",
|
||||
Arc::clone(&schema),
|
||||
&Predicate::new().with_expr(col("t1").eq(lit("f1"))),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let expected = Predicate::new().with_expr(col("t1").eq(lit("f1")));
|
||||
|
||||
assert_eq!(predicate, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_predicate_field_rewrite() {
|
||||
let predicate = normalize_predicate(
|
||||
|
|
@ -336,6 +378,20 @@ mod tests {
|
|||
assert_eq!(predicate, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_predicate_field_non_tag() {
|
||||
// should treat
|
||||
let predicate = normalize_predicate(
|
||||
"table",
|
||||
schema(),
|
||||
&Predicate::new().with_expr(col("not_a_tag").eq(lit("blarg"))),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let expected = Predicate::new().with_expr(lit(ScalarValue::Boolean(None)));
|
||||
assert_eq!(predicate, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_predicate_field_rewrite_multi_field_unsupported() {
|
||||
let err = normalize_predicate(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,99 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use datafusion::{
|
||||
error::Result as DataFusionResult, logical_plan::ExprRewriter, prelude::*, scalar::ScalarValue,
|
||||
};
|
||||
use schema::Schema;
|
||||
|
||||
/// Logic for rewriting expressions from influxrpc that reference non
|
||||
/// existent columns to NULL
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct MissingColumnRewriter {
|
||||
/// The input schema
|
||||
schema: Arc<Schema>,
|
||||
}
|
||||
|
||||
impl MissingColumnRewriter {
|
||||
/// Create a new [`MissingColumnRewriter`] targeting the given schema
|
||||
pub(crate) fn new(schema: Arc<Schema>) -> Self {
|
||||
Self { schema }
|
||||
}
|
||||
|
||||
fn column_exists(&self, col: &Column) -> DataFusionResult<bool> {
|
||||
// todo a real error here (rpc_predicates shouldn't have table/relation qualifiers)
|
||||
assert!(col.relation.is_none());
|
||||
|
||||
if self.schema.find_index_of(&col.name).is_some() {
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lit_null() -> Expr {
|
||||
lit(ScalarValue::Utf8(None))
|
||||
}
|
||||
|
||||
impl ExprRewriter for MissingColumnRewriter {
|
||||
fn mutate(&mut self, expr: Expr) -> DataFusionResult<Expr> {
|
||||
Ok(match expr {
|
||||
Expr::Column(col) if !self.column_exists(&col)? => lit_null(),
|
||||
expr => expr,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datafusion::{arrow::datatypes::DataType, logical_plan::ExprRewritable};
|
||||
use schema::SchemaBuilder;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn all_columns_defined_no_rewrite() {
|
||||
// t1 = "foo"
|
||||
let expr = col("t1").eq(lit("foo"));
|
||||
assert_eq!(rewrite(expr.clone()), expr);
|
||||
|
||||
// f1 > 1.0
|
||||
let expr = col("f1").gt(lit(1.0));
|
||||
assert_eq!(rewrite(expr.clone()), expr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_columns_not_defined() {
|
||||
// non_defined = "foo" --> NULL = "foo"
|
||||
let expr = col("non_defined").eq(lit("foo"));
|
||||
let expected = lit_null().eq(lit("foo"));
|
||||
assert_eq!(rewrite(expr), expected);
|
||||
|
||||
// non_defined = 1.4 --> NULL = 1.4
|
||||
let expr = col("non_defined").eq(lit(1.4));
|
||||
// No type is inferred so this is a literal null string (even though it maybe should be a literal float)
|
||||
let expected = lit_null().eq(lit(1.4));
|
||||
assert_eq!(rewrite(expr), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn some_columns_not_defined() {
|
||||
// t1 = "foo" AND non_defined = "bar" --> t1 = "foo" and NULL = "bar"
|
||||
let expr = col("t1")
|
||||
.eq(lit("foo"))
|
||||
.and(col("non_defined").eq(lit("bar")));
|
||||
let expected = col("t1").eq(lit("foo")).and(lit_null().eq(lit("bar")));
|
||||
assert_eq!(rewrite(expr), expected);
|
||||
}
|
||||
|
||||
fn rewrite(expr: Expr) -> Expr {
|
||||
let schema = SchemaBuilder::new()
|
||||
.tag("t1")
|
||||
.field("f1", DataType::Int64)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut rewriter = MissingColumnRewriter::new(Arc::new(schema));
|
||||
expr.rewrite(&mut rewriter).unwrap()
|
||||
}
|
||||
}
|
||||
|
|
@ -55,8 +55,8 @@ impl FieldProjectionRewriter {
|
|||
}
|
||||
}
|
||||
|
||||
// Rewrites the predicate. See the description on
|
||||
// [`FieldProjectionRewriter`] for more details.
|
||||
/// Rewrites the predicate. See the description on
|
||||
/// [`FieldProjectionRewriter`] for more details.
|
||||
pub(crate) fn rewrite_field_exprs(&mut self, expr: Expr) -> DataFusionResult<Expr> {
|
||||
// for predicates like `A AND B AND C`
|
||||
// rewrite `A`, `B` and `C` separately and put them back together
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ generated_types = { path = "../generated_types" }
|
|||
influxdb_iox_client = { path = "../influxdb_iox_client" }
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.0"
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.12"
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
|
|
|
|||
|
|
@ -470,9 +470,9 @@ mod tests {
|
|||
.into_iter()
|
||||
.map(lp_to_record_batch)
|
||||
.map(Arc::new)
|
||||
.collect();
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let stream = stream_from_batches(batches);
|
||||
let stream = stream_from_batches(batches[0].schema(), batches);
|
||||
|
||||
let metric_registry = metric::Registry::new();
|
||||
|
||||
|
|
|
|||
|
|
@ -7,13 +7,16 @@ use arrow::{
|
|||
use data_types::{
|
||||
ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary, TimestampMinMax,
|
||||
};
|
||||
use datafusion::physical_plan::{
|
||||
stream::RecordBatchStreamAdapter, RecordBatchStream, SendableRecordBatchStream,
|
||||
use datafusion::{
|
||||
error::DataFusionError,
|
||||
physical_plan::{
|
||||
stream::RecordBatchStreamAdapter, RecordBatchStream, SendableRecordBatchStream,
|
||||
},
|
||||
};
|
||||
use futures::{Stream, TryStreamExt};
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
QueryChunk, QueryChunkError, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkMeta,
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
use predicate::Predicate;
|
||||
|
|
@ -114,7 +117,7 @@ impl QueryChunk for QuerierChunk {
|
|||
mut ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
columns: Selection<'_>,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
ctx.set_metadata("projection", format!("{}", columns));
|
||||
ctx.set_metadata("predicate", format!("{}", &predicate));
|
||||
|
||||
|
|
@ -161,10 +164,10 @@ impl QueryChunk for QuerierChunk {
|
|||
None
|
||||
}
|
||||
Err(other) => {
|
||||
return Err(Box::new(Error::RBChunk {
|
||||
return Err(DataFusionError::External(Box::new(Error::RBChunk {
|
||||
source: other,
|
||||
chunk_id: self.id(),
|
||||
}))
|
||||
})))
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -178,7 +181,7 @@ impl QueryChunk for QuerierChunk {
|
|||
mut ctx: IOxSessionContext,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
ctx.set_metadata("column_name", column_name.to_string());
|
||||
ctx.set_metadata("predicate", format!("{}", &predicate));
|
||||
|
||||
|
|
@ -205,11 +208,13 @@ impl QueryChunk for QuerierChunk {
|
|||
};
|
||||
ctx.set_metadata("rb_predicate", format!("{}", &rb_predicate));
|
||||
|
||||
let mut values = rb_chunk.column_values(
|
||||
rb_predicate,
|
||||
Selection::Some(&[column_name]),
|
||||
BTreeMap::new(),
|
||||
)?;
|
||||
let mut values = rb_chunk
|
||||
.column_values(
|
||||
rb_predicate,
|
||||
Selection::Some(&[column_name]),
|
||||
BTreeMap::new(),
|
||||
)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
// The InfluxRPC frontend only supports getting column values
|
||||
// for one column at a time (this is a restriction on the Influx
|
||||
|
|
@ -221,7 +226,8 @@ impl QueryChunk for QuerierChunk {
|
|||
.context(ColumnNameNotFoundSnafu {
|
||||
chunk_id: self.id(),
|
||||
column_name,
|
||||
})?;
|
||||
})
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
ctx.set_metadata("output_values", values.len() as i64);
|
||||
|
||||
Ok(Some(values))
|
||||
|
|
@ -234,7 +240,7 @@ impl QueryChunk for QuerierChunk {
|
|||
mut ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, QueryChunkError> {
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
let span_recorder = SpanRecorder::new(
|
||||
ctx.span()
|
||||
.map(|span| span.child("QuerierChunk::read_filter")),
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ use data_types::{
|
|||
ChunkId, ChunkOrder, IngesterMapping, PartitionId, SequenceNumber, ShardId, ShardIndex,
|
||||
TableSummary, TimestampMinMax,
|
||||
};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datafusion_util::MemoryStream;
|
||||
use futures::{stream::FuturesUnordered, TryStreamExt};
|
||||
use generated_types::{
|
||||
|
|
@ -24,7 +25,7 @@ use influxdb_iox_client::flight::{
|
|||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
util::compute_timenanosecond_min_max,
|
||||
QueryChunk, QueryChunkError, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkMeta,
|
||||
};
|
||||
use iox_time::{Time, TimeProvider};
|
||||
use metric::{DurationHistogram, Metric};
|
||||
|
|
@ -612,9 +613,7 @@ impl IngesterStreamDecoder {
|
|||
partition_id,
|
||||
shard_id,
|
||||
status.parquet_max_sequence_number.map(SequenceNumber::new),
|
||||
status
|
||||
.tombstone_max_sequence_number
|
||||
.map(SequenceNumber::new),
|
||||
None,
|
||||
partition_sort_key,
|
||||
);
|
||||
self.current_partition = Some(partition);
|
||||
|
|
@ -1097,7 +1096,7 @@ impl QueryChunk for IngesterChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
_columns: Selection<'_>,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
// TODO maybe some special handling?
|
||||
Ok(None)
|
||||
}
|
||||
|
|
@ -1107,7 +1106,7 @@ impl QueryChunk for IngesterChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, QueryChunkError> {
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
// TODO maybe some special handling?
|
||||
Ok(None)
|
||||
}
|
||||
|
|
@ -1117,11 +1116,15 @@ impl QueryChunk for IngesterChunk {
|
|||
_ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<datafusion::physical_plan::SendableRecordBatchStream, QueryChunkError> {
|
||||
) -> Result<datafusion::physical_plan::SendableRecordBatchStream, DataFusionError> {
|
||||
trace!(?predicate, ?selection, input_batches=?self.batches, "Reading data");
|
||||
|
||||
// Apply selection to in-memory batch
|
||||
let batches = match self.schema.df_projection(selection)? {
|
||||
let batches = match self
|
||||
.schema
|
||||
.df_projection(selection)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?
|
||||
{
|
||||
None => self.batches.clone(),
|
||||
Some(projection) => self
|
||||
.batches
|
||||
|
|
@ -1333,7 +1336,6 @@ mod tests {
|
|||
partition_id: 1,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
}),
|
||||
},
|
||||
))],
|
||||
|
|
@ -1389,7 +1391,6 @@ mod tests {
|
|||
partition_id: 1,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
}),
|
||||
},
|
||||
)),
|
||||
|
|
@ -1399,7 +1400,6 @@ mod tests {
|
|||
partition_id: 2,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
}),
|
||||
},
|
||||
)),
|
||||
|
|
@ -1409,7 +1409,6 @@ mod tests {
|
|||
partition_id: 1,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
tombstone_max_sequence_number: None,
|
||||
}),
|
||||
},
|
||||
)),
|
||||
|
|
@ -1489,7 +1488,6 @@ mod tests {
|
|||
partition_id: 1,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: Some(11),
|
||||
tombstone_max_sequence_number: Some(12),
|
||||
}),
|
||||
},
|
||||
)),
|
||||
|
|
@ -1519,7 +1517,6 @@ mod tests {
|
|||
partition_id: 2,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: Some(21),
|
||||
tombstone_max_sequence_number: Some(22),
|
||||
}),
|
||||
},
|
||||
)),
|
||||
|
|
@ -1544,7 +1541,6 @@ mod tests {
|
|||
partition_id: 3,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: Some(31),
|
||||
tombstone_max_sequence_number: Some(32),
|
||||
}),
|
||||
},
|
||||
)),
|
||||
|
|
@ -1574,10 +1570,7 @@ mod tests {
|
|||
p1.parquet_max_sequence_number,
|
||||
Some(SequenceNumber::new(11))
|
||||
);
|
||||
assert_eq!(
|
||||
p1.tombstone_max_sequence_number,
|
||||
Some(SequenceNumber::new(12))
|
||||
);
|
||||
assert_eq!(p1.tombstone_max_sequence_number, None);
|
||||
assert_eq!(p1.chunks.len(), 2);
|
||||
assert_eq!(p1.chunks[0].schema().as_arrow(), schema_1_1);
|
||||
assert_eq!(p1.chunks[0].batches.len(), 2);
|
||||
|
|
@ -1594,10 +1587,7 @@ mod tests {
|
|||
p2.parquet_max_sequence_number,
|
||||
Some(SequenceNumber::new(21))
|
||||
);
|
||||
assert_eq!(
|
||||
p2.tombstone_max_sequence_number,
|
||||
Some(SequenceNumber::new(22))
|
||||
);
|
||||
assert_eq!(p2.tombstone_max_sequence_number, None);
|
||||
assert_eq!(p2.chunks.len(), 1);
|
||||
assert_eq!(p2.chunks[0].schema().as_arrow(), schema_2_1);
|
||||
assert_eq!(p2.chunks[0].batches.len(), 1);
|
||||
|
|
@ -1610,10 +1600,7 @@ mod tests {
|
|||
p3.parquet_max_sequence_number,
|
||||
Some(SequenceNumber::new(31))
|
||||
);
|
||||
assert_eq!(
|
||||
p3.tombstone_max_sequence_number,
|
||||
Some(SequenceNumber::new(32))
|
||||
);
|
||||
assert_eq!(p3.tombstone_max_sequence_number, None);
|
||||
assert_eq!(p3.chunks.len(), 1);
|
||||
assert_eq!(p3.chunks[0].schema().as_arrow(), schema_3_1);
|
||||
assert_eq!(p3.chunks[0].batches.len(), 1);
|
||||
|
|
@ -1733,7 +1720,6 @@ mod tests {
|
|||
partition_id: 1,
|
||||
status: Some(PartitionStatus {
|
||||
parquet_max_sequence_number: Some(11),
|
||||
tombstone_max_sequence_number: Some(12),
|
||||
}),
|
||||
},
|
||||
)),
|
||||
|
|
@ -1773,10 +1759,7 @@ mod tests {
|
|||
p1.parquet_max_sequence_number,
|
||||
Some(SequenceNumber::new(11))
|
||||
);
|
||||
assert_eq!(
|
||||
p1.tombstone_max_sequence_number,
|
||||
Some(SequenceNumber::new(12))
|
||||
);
|
||||
assert_eq!(p1.tombstone_max_sequence_number, None);
|
||||
assert_eq!(p1.chunks.len(), 1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,11 @@ use data_types::NamespaceId;
|
|||
use datafusion::{
|
||||
catalog::{catalog::CatalogProvider, schema::SchemaProvider},
|
||||
datasource::TableProvider,
|
||||
error::DataFusionError,
|
||||
};
|
||||
use iox_query::{
|
||||
exec::{ExecutionContextProvider, ExecutorType, IOxSessionContext},
|
||||
QueryChunk, QueryCompletedToken, QueryDatabase, QueryDatabaseError, QueryText, DEFAULT_SCHEMA,
|
||||
QueryChunk, QueryCompletedToken, QueryDatabase, QueryText, DEFAULT_SCHEMA,
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate};
|
||||
|
|
@ -40,8 +41,9 @@ impl QueryDatabase for QuerierNamespace {
|
|||
&self,
|
||||
table_name: &str,
|
||||
predicate: &Predicate,
|
||||
projection: &Option<Vec<usize>>,
|
||||
ctx: IOxSessionContext,
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>, QueryDatabaseError> {
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>, DataFusionError> {
|
||||
debug!(%table_name, %predicate, "Finding chunks for table");
|
||||
// get table metadata
|
||||
let table = match self.tables.get(table_name).map(Arc::clone) {
|
||||
|
|
@ -57,7 +59,7 @@ impl QueryDatabase for QuerierNamespace {
|
|||
.chunks(
|
||||
predicate,
|
||||
ctx.span().map(|span| span.child("querier table chunks")),
|
||||
&None, // todo: pushdown projection to chunks
|
||||
projection,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
|
@ -627,7 +629,7 @@ mod tests {
|
|||
.unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
format!("Cannot build plan: External error: Chunk pruning failed: Query would scan at least {total_size} bytes, more than configured maximum {limit} bytes. Try adjusting your compactor settings or increasing the per query memory limit."),
|
||||
format!("Cannot build plan: Resources exhausted: Query would scan at least {total_size} bytes, more than configured maximum {limit} bytes. Try adjusting your compactor settings or increasing the per query memory limit."),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use crate::{
|
|||
IngesterConnection,
|
||||
};
|
||||
use data_types::{ColumnId, PartitionId, ShardIndex, TableId, TimestampMinMax};
|
||||
use datafusion::error::DataFusionError;
|
||||
use futures::{join, StreamExt};
|
||||
use iox_query::pruning::prune_summaries;
|
||||
use iox_query::{exec::Executor, provider, provider::ChunkPruner, QueryChunk};
|
||||
|
|
@ -65,6 +66,17 @@ pub enum Error {
|
|||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
impl From<Error> for DataFusionError {
|
||||
fn from(err: Error) -> Self {
|
||||
match err {
|
||||
Error::ChunkPruning {
|
||||
source: err @ provider::Error::TooMuchData { .. },
|
||||
} => Self::ResourcesExhausted(err.to_string()),
|
||||
_ => Self::External(Box::new(err) as _),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Args to create a [`QuerierTable`].
|
||||
pub struct QuerierTableArgs {
|
||||
pub sharder: Arc<JumpHash<Arc<ShardIndex>>>,
|
||||
|
|
|
|||
|
|
@ -66,8 +66,7 @@ impl TableProvider for QuerierTable {
|
|||
ctx.child_span("querier table chunks"),
|
||||
projection,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
.await?;
|
||||
|
||||
for chunk in chunks {
|
||||
builder = builder.add_chunk(chunk);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ use crate::{
|
|||
use self::interface::{IngesterPartitionInfo, ParquetFileInfo, TombstoneInfo};
|
||||
|
||||
#[derive(Snafu, Debug)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub enum ReconcileError {
|
||||
#[snafu(display("Compactor processed file that the querier would need to split apart which is not yet implemented"))]
|
||||
CompactorConflict,
|
||||
|
|
|
|||
|
|
@ -1,25 +0,0 @@
|
|||
-- Test Setup: OneDeleteSimpleExprOneChunkDeleteAll
|
||||
-- SQL: SELECT * from cpu;
|
||||
++
|
||||
++
|
||||
-- SQL: SELECT time from cpu;
|
||||
++
|
||||
++
|
||||
-- SQL: SELECT count(*), count(bar), count(time) from cpu;
|
||||
+-----------------+----------------+-----------------+
|
||||
| COUNT(UInt8(1)) | COUNT(cpu.bar) | COUNT(cpu.time) |
|
||||
+-----------------+----------------+-----------------+
|
||||
| 0 | 0 | 0 |
|
||||
+-----------------+----------------+-----------------+
|
||||
-- SQL: SELECT min(bar), max(bar), min(time), max(time) from cpu;
|
||||
+--------------+--------------+---------------+---------------+
|
||||
| MIN(cpu.bar) | MAX(cpu.bar) | MIN(cpu.time) | MAX(cpu.time) |
|
||||
+--------------+--------------+---------------+---------------+
|
||||
| | | | |
|
||||
+--------------+--------------+---------------+---------------+
|
||||
-- SQL: SELECT max(bar) from cpu;
|
||||
+--------------+
|
||||
| MAX(cpu.bar) |
|
||||
+--------------+
|
||||
| |
|
||||
+--------------+
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
-- Demonstrate soft deleted rows will not be return to queries
|
||||
-- IOX_SETUP: OneDeleteSimpleExprOneChunkDeleteAll
|
||||
|
||||
-- select *
|
||||
SELECT * from cpu;
|
||||
|
||||
-- select one specific column
|
||||
SELECT time from cpu;
|
||||
|
||||
-- select aggregate of every column inlcuding star
|
||||
SELECT count(*), count(bar), count(time) from cpu;
|
||||
|
||||
-- select aggregate of every column
|
||||
SELECT min(bar), max(bar), min(time), max(time) from cpu;
|
||||
|
||||
-- select aggregate of one column
|
||||
SELECT max(bar) from cpu;
|
||||
|
|
@ -1,207 +0,0 @@
|
|||
-- Test Setup: OneDeleteMultiExprsOneChunk
|
||||
-- SQL: SELECT * from cpu order by bar, foo, time;
|
||||
+-----+-----+--------------------------------+
|
||||
| bar | foo | time |
|
||||
+-----+-----+--------------------------------+
|
||||
| 1 | me | 1970-01-01T00:00:00.000000040Z |
|
||||
| 2 | you | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+-----+--------------------------------+
|
||||
-- SQL: SELECT time, bar from cpu order by time, bar;
|
||||
+--------------------------------+-----+
|
||||
| time | bar |
|
||||
+--------------------------------+-----+
|
||||
| 1970-01-01T00:00:00.000000020Z | 2 |
|
||||
| 1970-01-01T00:00:00.000000040Z | 1 |
|
||||
+--------------------------------+-----+
|
||||
-- SQL: SELECT bar from cpu order by bar;
|
||||
+-----+
|
||||
| bar |
|
||||
+-----+
|
||||
| 1 |
|
||||
| 2 |
|
||||
+-----+
|
||||
-- SQL: SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu;
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
| COUNT(cpu.time) | COUNT(UInt8(1)) | COUNT(cpu.bar) | MIN(cpu.bar) | MAX(cpu.bar) | MIN(cpu.time) | MAX(cpu.time) |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
| 2 | 2 | 2 | 1 | 2 | 1970-01-01T00:00:00.000000020Z | 1970-01-01T00:00:00.000000040Z |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
-- SQL: SELECT count(time) from cpu;
|
||||
+-----------------+
|
||||
| COUNT(cpu.time) |
|
||||
+-----------------+
|
||||
| 2 |
|
||||
+-----------------+
|
||||
-- SQL: SELECT count(foo) from cpu;
|
||||
+----------------+
|
||||
| COUNT(cpu.foo) |
|
||||
+----------------+
|
||||
| 2 |
|
||||
+----------------+
|
||||
-- SQL: SELECT count(bar) from cpu;
|
||||
+----------------+
|
||||
| COUNT(cpu.bar) |
|
||||
+----------------+
|
||||
| 2 |
|
||||
+----------------+
|
||||
-- SQL: SELECT count(*) from cpu;
|
||||
+-----------------+
|
||||
| COUNT(UInt8(1)) |
|
||||
+-----------------+
|
||||
| 2 |
|
||||
+-----------------+
|
||||
-- SQL: SELECT min(bar) from cpu;
|
||||
+--------------+
|
||||
| MIN(cpu.bar) |
|
||||
+--------------+
|
||||
| 1 |
|
||||
+--------------+
|
||||
-- SQL: SELECT foo from cpu;
|
||||
-- Results After Sorting
|
||||
+-----+
|
||||
| foo |
|
||||
+-----+
|
||||
| me |
|
||||
| you |
|
||||
+-----+
|
||||
-- SQL: SELECT min(foo) as min_foo from cpu order by min_foo;
|
||||
+---------+
|
||||
| min_foo |
|
||||
+---------+
|
||||
| me |
|
||||
+---------+
|
||||
-- SQL: SELECT max(foo) as max_foo from cpu order by max_foo;
|
||||
+---------+
|
||||
| max_foo |
|
||||
+---------+
|
||||
| you |
|
||||
+---------+
|
||||
-- SQL: SELECT min(foo) as min_foo from cpu group by time order by min_foo;
|
||||
+---------+
|
||||
| min_foo |
|
||||
+---------+
|
||||
| me |
|
||||
| you |
|
||||
+---------+
|
||||
-- SQL: SELECT max(foo) as max_foo from cpu group by time order by max_foo;
|
||||
+---------+
|
||||
| max_foo |
|
||||
+---------+
|
||||
| me |
|
||||
| you |
|
||||
+---------+
|
||||
-- SQL: SELECT time, max(foo) as max_foo from cpu group by time order by time, max_foo;
|
||||
+--------------------------------+---------+
|
||||
| time | max_foo |
|
||||
+--------------------------------+---------+
|
||||
| 1970-01-01T00:00:00.000000020Z | you |
|
||||
| 1970-01-01T00:00:00.000000040Z | me |
|
||||
+--------------------------------+---------+
|
||||
-- SQL: SELECT min(foo) as min_foo from cpu group by bar order by min_foo;
|
||||
+---------+
|
||||
| min_foo |
|
||||
+---------+
|
||||
| me |
|
||||
| you |
|
||||
+---------+
|
||||
-- SQL: SELECT bar, max(foo) as max_foo from cpu group by bar order by bar, max_foo;
|
||||
+-----+---------+
|
||||
| bar | max_foo |
|
||||
+-----+---------+
|
||||
| 1 | me |
|
||||
| 2 | you |
|
||||
+-----+---------+
|
||||
-- SQL: SELECT max(foo) as max_foo from cpu group by time order by max_foo;
|
||||
+---------+
|
||||
| max_foo |
|
||||
+---------+
|
||||
| me |
|
||||
| you |
|
||||
+---------+
|
||||
-- SQL: SELECT min(time) as min_time from cpu order by min_time;
|
||||
+--------------------------------+
|
||||
| min_time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(time) as max_time from cpu order by max_time;
|
||||
+--------------------------------+
|
||||
| max_time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT min(time) as min_time from cpu group by bar order by min_time;
|
||||
+--------------------------------+
|
||||
| min_time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT bar, min(time) as min_time from cpu group by bar order by bar, min_time;
|
||||
+-----+--------------------------------+
|
||||
| bar | min_time |
|
||||
+-----+--------------------------------+
|
||||
| 1 | 1970-01-01T00:00:00.000000040Z |
|
||||
| 2 | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT max(time) as max_time from cpu group by foo order by max_time;
|
||||
+--------------------------------+
|
||||
| max_time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT foo, max(time) as max_time from cpu group by foo order by foo, max_time;
|
||||
+-----+--------------------------------+
|
||||
| foo | max_time |
|
||||
+-----+--------------------------------+
|
||||
| me | 1970-01-01T00:00:00.000000040Z |
|
||||
| you | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT time from cpu;
|
||||
-- Results After Sorting
|
||||
+--------------------------------+
|
||||
| time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(bar) from cpu order by 1;
|
||||
+--------------+
|
||||
| MAX(cpu.bar) |
|
||||
+--------------+
|
||||
| 2 |
|
||||
+--------------+
|
||||
-- SQL: SELECT * from cpu where bar >= 1.0 order by bar, foo, time;
|
||||
+-----+-----+--------------------------------+
|
||||
| bar | foo | time |
|
||||
+-----+-----+--------------------------------+
|
||||
| 1 | me | 1970-01-01T00:00:00.000000040Z |
|
||||
| 2 | you | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+-----+--------------------------------+
|
||||
-- SQL: SELECT foo from cpu where bar >= 1.0 order by foo;
|
||||
+-----+
|
||||
| foo |
|
||||
+-----+
|
||||
| me |
|
||||
| you |
|
||||
+-----+
|
||||
-- SQL: SELECT time, bar from cpu where bar >= 1.0 order by bar, time;
|
||||
+--------------------------------+-----+
|
||||
| time | bar |
|
||||
+--------------------------------+-----+
|
||||
| 1970-01-01T00:00:00.000000040Z | 1 |
|
||||
| 1970-01-01T00:00:00.000000020Z | 2 |
|
||||
+--------------------------------+-----+
|
||||
-- SQL: SELECT * from cpu where foo = 'you' order by bar, foo, time;
|
||||
+-----+-----+--------------------------------+
|
||||
| bar | foo | time |
|
||||
+-----+-----+--------------------------------+
|
||||
| 2 | you | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+-----+--------------------------------+
|
||||
-- SQL: SELECT min(bar) as mi, max(time) as ma from cpu where foo = 'you' order by mi, ma
|
||||
+----+--------------------------------+
|
||||
| mi | ma |
|
||||
+----+--------------------------------+
|
||||
| 2 | 1970-01-01T00:00:00.000000020Z |
|
||||
+----+--------------------------------+
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
-- Demonstrate soft deleted rows will not be return to queries
|
||||
-- IOX_SETUP: OneDeleteMultiExprsOneChunk
|
||||
|
||||
-- select *
|
||||
SELECT * from cpu order by bar, foo, time;
|
||||
|
||||
SELECT time, bar from cpu order by time, bar;
|
||||
|
||||
SELECT bar from cpu order by bar;
|
||||
|
||||
SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu;
|
||||
|
||||
SELECT count(time) from cpu;
|
||||
|
||||
SELECT count(foo) from cpu;
|
||||
|
||||
SELECT count(bar) from cpu;
|
||||
|
||||
SELECT count(*) from cpu;
|
||||
|
||||
SELECT min(bar) from cpu;
|
||||
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT foo from cpu;
|
||||
|
||||
SELECT min(foo) as min_foo from cpu order by min_foo;
|
||||
SELECT max(foo) as max_foo from cpu order by max_foo;
|
||||
|
||||
SELECT min(foo) as min_foo from cpu group by time order by min_foo;
|
||||
SELECT max(foo) as max_foo from cpu group by time order by max_foo;
|
||||
SELECT time, max(foo) as max_foo from cpu group by time order by time, max_foo;
|
||||
|
||||
SELECT min(foo) as min_foo from cpu group by bar order by min_foo;
|
||||
SELECT bar, max(foo) as max_foo from cpu group by bar order by bar, max_foo;
|
||||
SELECT max(foo) as max_foo from cpu group by time order by max_foo;
|
||||
|
||||
SELECT min(time) as min_time from cpu order by min_time;
|
||||
SELECT max(time) as max_time from cpu order by max_time;
|
||||
|
||||
SELECT min(time) as min_time from cpu group by bar order by min_time;
|
||||
SELECT bar, min(time) as min_time from cpu group by bar order by bar, min_time;
|
||||
SELECT max(time) as max_time from cpu group by foo order by max_time;
|
||||
SELECT foo, max(time) as max_time from cpu group by foo order by foo, max_time;
|
||||
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT time from cpu;
|
||||
|
||||
SELECT max(bar) from cpu order by 1;
|
||||
|
||||
--------------------------------------------------------
|
||||
-- With selection predicate
|
||||
|
||||
SELECT * from cpu where bar >= 1.0 order by bar, foo, time;
|
||||
|
||||
SELECT foo from cpu where bar >= 1.0 order by foo;
|
||||
|
||||
SELECT time, bar from cpu where bar >= 1.0 order by bar, time;
|
||||
|
||||
SELECT * from cpu where foo = 'you' order by bar, foo, time;
|
||||
|
||||
SELECT min(bar) as mi, max(time) as ma from cpu where foo = 'you' order by mi, ma
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
-- Test Setup: OneDeleteSimpleExprOneChunk
|
||||
-- SQL: SELECT * from cpu;
|
||||
+-----+--------------------------------+
|
||||
| bar | time |
|
||||
+-----+--------------------------------+
|
||||
| 2 | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT time, bar from cpu;
|
||||
+--------------------------------+-----+
|
||||
| time | bar |
|
||||
+--------------------------------+-----+
|
||||
| 1970-01-01T00:00:00.000000020Z | 2 |
|
||||
+--------------------------------+-----+
|
||||
-- SQL: SELECT min(bar), max(bar) from cpu;
|
||||
+--------------+--------------+
|
||||
| MIN(cpu.bar) | MAX(cpu.bar) |
|
||||
+--------------+--------------+
|
||||
| 2 | 2 |
|
||||
+--------------+--------------+
|
||||
-- SQL: SELECT time from cpu;
|
||||
+--------------------------------+
|
||||
| time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MAX(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT min(time) from cpu group by bar;
|
||||
+--------------------------------+
|
||||
| MIN(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT bar, min(time) from cpu group by bar;
|
||||
+-----+--------------------------------+
|
||||
| bar | MIN(cpu.time) |
|
||||
+-----+--------------------------------+
|
||||
| 2 | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT count(time), max(time) from cpu;
|
||||
+-----------------+--------------------------------+
|
||||
| COUNT(cpu.time) | MAX(cpu.time) |
|
||||
+-----------------+--------------------------------+
|
||||
| 1 | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----------------+--------------------------------+
|
||||
-- SQL: SELECT count(time) from cpu;
|
||||
+-----------------+
|
||||
| COUNT(cpu.time) |
|
||||
+-----------------+
|
||||
| 1 |
|
||||
+-----------------+
|
||||
-- SQL: SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu;
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
| COUNT(cpu.time) | COUNT(UInt8(1)) | COUNT(cpu.bar) | MIN(cpu.bar) | MAX(cpu.bar) | MIN(cpu.time) | MAX(cpu.time) |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
| 1 | 1 | 1 | 2 | 2 | 1970-01-01T00:00:00.000000020Z | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
-- SQL: SELECT * from cpu where bar = 2.0;
|
||||
+-----+--------------------------------+
|
||||
| bar | time |
|
||||
+-----+--------------------------------+
|
||||
| 2 | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT * from cpu where bar != 2.0;
|
||||
++
|
||||
++
|
||||
-- SQL: SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu where bar= 2.0;
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
| COUNT(cpu.time) | COUNT(UInt8(1)) | COUNT(cpu.bar) | MIN(cpu.bar) | MAX(cpu.bar) | MIN(cpu.time) | MAX(cpu.time) |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
| 1 | 1 | 1 | 2 | 2 | 1970-01-01T00:00:00.000000020Z | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
|
||||
-- SQL: SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu where bar != 2.0;
|
||||
+-----------------+-----------------+----------------+--------------+--------------+---------------+---------------+
|
||||
| COUNT(cpu.time) | COUNT(UInt8(1)) | COUNT(cpu.bar) | MIN(cpu.bar) | MAX(cpu.bar) | MIN(cpu.time) | MAX(cpu.time) |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+---------------+---------------+
|
||||
| 0 | 0 | 0 | | | | |
|
||||
+-----------------+-----------------+----------------+--------------+--------------+---------------+---------------+
|
||||
-- SQL: SELECT time from cpu where bar=2;
|
||||
+--------------------------------+
|
||||
| time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT bar from cpu where bar!= 2;
|
||||
++
|
||||
++
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
-- Demonstrate soft deleted rows will not be return to queries
|
||||
-- IOX_SETUP: OneDeleteSimpleExprOneChunk
|
||||
|
||||
-- select *
|
||||
SELECT * from cpu;
|
||||
|
||||
SELECT time, bar from cpu;
|
||||
|
||||
SELECT min(bar), max(bar) from cpu;
|
||||
|
||||
SELECT time from cpu;
|
||||
|
||||
SELECT max(time) from cpu;
|
||||
SELECT min(time) from cpu group by bar;
|
||||
SELECT bar, min(time) from cpu group by bar;
|
||||
|
||||
SELECT count(time), max(time) from cpu;
|
||||
|
||||
SELECT count(time) from cpu;
|
||||
|
||||
SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu;
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Now add selection predicate
|
||||
SELECT * from cpu where bar = 2.0;
|
||||
|
||||
SELECT * from cpu where bar != 2.0;
|
||||
|
||||
SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu where bar= 2.0;
|
||||
|
||||
SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) from cpu where bar != 2.0;
|
||||
|
||||
SELECT time from cpu where bar=2;
|
||||
|
||||
SELECT bar from cpu where bar!= 2;
|
||||
|
||||
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
-- Test Setup: ThreeDeleteThreeChunks
|
||||
-- SQL: SELECT * from cpu order by foo, bar, time;
|
||||
+-----+-----+--------------------------------+
|
||||
| bar | foo | time |
|
||||
+-----+-----+--------------------------------+
|
||||
| 1 | me | 1970-01-01T00:00:00.000000040Z |
|
||||
| 1 | me | 1970-01-01T00:00:00.000000042Z |
|
||||
| 1 | me | 1970-01-01T00:00:00.000000062Z |
|
||||
| 4 | me | 1970-01-01T00:00:00.000000050Z |
|
||||
| 5 | me | 1970-01-01T00:00:00.000000060Z |
|
||||
| 7 | me | 1970-01-01T00:00:00.000000080Z |
|
||||
| 3 | you | 1970-01-01T00:00:00.000000070Z |
|
||||
+-----+-----+--------------------------------+
|
||||
-- SQL: SELECT time, bar from cpu order by bar, time;
|
||||
+--------------------------------+-----+
|
||||
| time | bar |
|
||||
+--------------------------------+-----+
|
||||
| 1970-01-01T00:00:00.000000040Z | 1 |
|
||||
| 1970-01-01T00:00:00.000000042Z | 1 |
|
||||
| 1970-01-01T00:00:00.000000062Z | 1 |
|
||||
| 1970-01-01T00:00:00.000000070Z | 3 |
|
||||
| 1970-01-01T00:00:00.000000050Z | 4 |
|
||||
| 1970-01-01T00:00:00.000000060Z | 5 |
|
||||
| 1970-01-01T00:00:00.000000080Z | 7 |
|
||||
+--------------------------------+-----+
|
||||
-- SQL: SELECT bar from cpu order by bar;
|
||||
+-----+
|
||||
| bar |
|
||||
+-----+
|
||||
| 1 |
|
||||
| 1 |
|
||||
| 1 |
|
||||
| 3 |
|
||||
| 4 |
|
||||
| 5 |
|
||||
| 7 |
|
||||
+-----+
|
||||
-- SQL: SELECT count(time) as t, count(*) as c, count(bar) as b, min(bar) as mi, min(time) as mt, max(time) as mat from cpu order by t, c, b, mi, mt, mat;
|
||||
+---+---+---+----+--------------------------------+--------------------------------+
|
||||
| t | c | b | mi | mt | mat |
|
||||
+---+---+---+----+--------------------------------+--------------------------------+
|
||||
| 7 | 7 | 7 | 1 | 1970-01-01T00:00:00.000000040Z | 1970-01-01T00:00:00.000000080Z |
|
||||
+---+---+---+----+--------------------------------+--------------------------------+
|
||||
-- SQL: SELECT count(time) from cpu;
|
||||
+-----------------+
|
||||
| COUNT(cpu.time) |
|
||||
+-----------------+
|
||||
| 7 |
|
||||
+-----------------+
|
||||
-- SQL: SELECT count(foo) from cpu;
|
||||
+----------------+
|
||||
| COUNT(cpu.foo) |
|
||||
+----------------+
|
||||
| 7 |
|
||||
+----------------+
|
||||
-- SQL: SELECT count(bar) from cpu;
|
||||
+----------------+
|
||||
| COUNT(cpu.bar) |
|
||||
+----------------+
|
||||
| 7 |
|
||||
+----------------+
|
||||
-- SQL: SELECT count(*) from cpu;
|
||||
+-----------------+
|
||||
| COUNT(UInt8(1)) |
|
||||
+-----------------+
|
||||
| 7 |
|
||||
+-----------------+
|
||||
-- SQL: SELECT min(bar) from cpu;
|
||||
+--------------+
|
||||
| MIN(cpu.bar) |
|
||||
+--------------+
|
||||
| 1 |
|
||||
+--------------+
|
||||
-- SQL: SELECT foo from cpu order by foo;
|
||||
+-----+
|
||||
| foo |
|
||||
+-----+
|
||||
| me |
|
||||
| me |
|
||||
| me |
|
||||
| me |
|
||||
| me |
|
||||
| me |
|
||||
| you |
|
||||
+-----+
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
-- Demonstrate soft deleted rows will not be return to queries
|
||||
-- IOX_SETUP: ThreeDeleteThreeChunks
|
||||
|
||||
-- select *
|
||||
SELECT * from cpu order by foo, bar, time;
|
||||
|
||||
SELECT time, bar from cpu order by bar, time;
|
||||
|
||||
SELECT bar from cpu order by bar;
|
||||
|
||||
SELECT count(time) as t, count(*) as c, count(bar) as b, min(bar) as mi, min(time) as mt, max(time) as mat from cpu order by t, c, b, mi, mt, mat;
|
||||
|
||||
SELECT count(time) from cpu;
|
||||
|
||||
SELECT count(foo) from cpu;
|
||||
|
||||
SELECT count(bar) from cpu;
|
||||
|
||||
SELECT count(*) from cpu;
|
||||
|
||||
SELECT min(bar) from cpu;
|
||||
|
||||
SELECT foo from cpu order by foo;
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
-- Test Setup: ThreeDeleteThreeChunks
|
||||
-- SQL: SELECT min(foo) from cpu;
|
||||
+--------------+
|
||||
| MIN(cpu.foo) |
|
||||
+--------------+
|
||||
| me |
|
||||
+--------------+
|
||||
-- SQL: SELECT max(foo) from cpu;
|
||||
+--------------+
|
||||
| MAX(cpu.foo) |
|
||||
+--------------+
|
||||
| you |
|
||||
+--------------+
|
||||
-- SQL: SELECT min(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MIN(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MAX(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000080Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT foo, min(time) from cpu group by foo;
|
||||
-- Results After Sorting
|
||||
+-----+--------------------------------+
|
||||
| foo | MIN(cpu.time) |
|
||||
+-----+--------------------------------+
|
||||
| me | 1970-01-01T00:00:00.000000040Z |
|
||||
| you | 1970-01-01T00:00:00.000000070Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT bar, max(time) as max_time from cpu group by bar order by bar, max_time;
|
||||
+-----+--------------------------------+
|
||||
| bar | max_time |
|
||||
+-----+--------------------------------+
|
||||
| 1 | 1970-01-01T00:00:00.000000062Z |
|
||||
| 3 | 1970-01-01T00:00:00.000000070Z |
|
||||
| 4 | 1970-01-01T00:00:00.000000050Z |
|
||||
| 5 | 1970-01-01T00:00:00.000000060Z |
|
||||
| 7 | 1970-01-01T00:00:00.000000080Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT max(time) as max_time from cpu group by bar order by max_time;
|
||||
+--------------------------------+
|
||||
| max_time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000050Z |
|
||||
| 1970-01-01T00:00:00.000000060Z |
|
||||
| 1970-01-01T00:00:00.000000062Z |
|
||||
| 1970-01-01T00:00:00.000000070Z |
|
||||
| 1970-01-01T00:00:00.000000080Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT time from cpu order by time;
|
||||
+--------------------------------+
|
||||
| time |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
| 1970-01-01T00:00:00.000000042Z |
|
||||
| 1970-01-01T00:00:00.000000050Z |
|
||||
| 1970-01-01T00:00:00.000000060Z |
|
||||
| 1970-01-01T00:00:00.000000062Z |
|
||||
| 1970-01-01T00:00:00.000000070Z |
|
||||
| 1970-01-01T00:00:00.000000080Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(bar) from cpu;
|
||||
+--------------+
|
||||
| MAX(cpu.bar) |
|
||||
+--------------+
|
||||
| 7 |
|
||||
+--------------+
|
||||
-- SQL: SELECT min(time), max(time) from cpu;
|
||||
+--------------------------------+--------------------------------+
|
||||
| MIN(cpu.time) | MAX(cpu.time) |
|
||||
+--------------------------------+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000040Z | 1970-01-01T00:00:00.000000080Z |
|
||||
+--------------------------------+--------------------------------+
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
-- Demonstrate soft deleted rows will not be return to queries
|
||||
-- IOX_SETUP: ThreeDeleteThreeChunks
|
||||
|
||||
SELECT min(foo) from cpu;
|
||||
SELECT max(foo) from cpu;
|
||||
|
||||
SELECT min(time) from cpu;
|
||||
SELECT max(time) from cpu;
|
||||
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT foo, min(time) from cpu group by foo;
|
||||
SELECT bar, max(time) as max_time from cpu group by bar order by bar, max_time;
|
||||
SELECT max(time) as max_time from cpu group by bar order by max_time;
|
||||
|
||||
SELECT time from cpu order by time;
|
||||
|
||||
SELECT max(bar) from cpu;
|
||||
|
||||
SELECT min(time), max(time) from cpu;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue