Merge branch 'main' into dom/always-requeue
commit
d44b6d412f
|
@ -549,12 +549,6 @@ version = "0.13.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.21.0"
|
||||
|
@ -1003,6 +997,7 @@ dependencies = [
|
|||
"data_types",
|
||||
"datafusion",
|
||||
"futures",
|
||||
"insta",
|
||||
"iox_catalog",
|
||||
"iox_query",
|
||||
"iox_tests",
|
||||
|
@ -1405,7 +1400,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1451,7 +1446,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
|
@ -1464,7 +1459,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-expr"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1476,7 +1471,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-optimizer"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
@ -1492,7 +1487,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-physical-expr"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1522,7 +1517,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-proto"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
|
@ -1539,7 +1534,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-row"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
|
@ -1550,7 +1545,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-sql"
|
||||
version = "17.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
|
||||
dependencies = [
|
||||
"arrow-schema",
|
||||
"datafusion-common",
|
||||
|
@ -1747,14 +1742,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.19"
|
||||
version = "0.2.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9"
|
||||
checksum = "8a3de6e8d11b22ff9edc6d916f890800597d60f8b2da1caf2955c274638d6412"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"windows-sys 0.42.0",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3793,12 +3788,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "object_store"
|
||||
version = "0.5.3"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4201837dc4c27a8670f0363b1255cd3845a4f0c521211cced1ed14c1d0cc6d2"
|
||||
checksum = "1f344e51ec9584d2f51199c0c29c6f73dddd04ade986497875bf8fa2f178caf0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.20.0",
|
||||
"base64 0.21.0",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"futures",
|
||||
|
@ -5965,9 +5960,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.7.1"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "772c1426ab886e7362aedf4abc9c0d1348a979517efedfc25862944d10137af0"
|
||||
checksum = "f7afcae9e3f0fe2c370fd4657108972cbb2fa9db1b9f84849cefd80741b01cb6"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
|
@ -5986,9 +5981,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.19.1"
|
||||
version = "0.19.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90a238ee2e6ede22fb95350acc78e21dc40da00bb66c0334bde83de4ed89424e"
|
||||
checksum = "5e6a7712b49e1775fb9a7b998de6635b299237f48b404dde71704f2e0e7f37e5"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"nom8",
|
||||
|
|
|
@ -116,8 +116,8 @@ license = "MIT OR Apache-2.0"
|
|||
[workspace.dependencies]
|
||||
arrow = { version = "32.0.0" }
|
||||
arrow-flight = { version = "32.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="a67ef9197a0d7242a8089be6324ba2e25e84f41e", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="a67ef9197a0d7242a8089be6324ba2e25e84f41e" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="e222bd627b6e7974133364fed4600d74b4da6811", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="e222bd627b6e7974133364fed4600d74b4da6811" }
|
||||
hashbrown = { version = "0.13.2" }
|
||||
parquet = { version = "32.0.0" }
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ humantime = "2.1.0"
|
|||
iox_catalog = { path = "../iox_catalog" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.92"
|
||||
|
|
|
@ -236,4 +236,12 @@ pub struct Compactor2Config {
|
|||
action
|
||||
)]
|
||||
pub process_once: bool,
|
||||
|
||||
/// Compact all partitions found in the catalog, no matter if/when the received writes.
|
||||
#[clap(
|
||||
long = "compaction-process-all-partitions",
|
||||
env = "INFLUXDB_IOX_COMPACTION_PROCESS_ALL_PARTITIONS",
|
||||
action
|
||||
)]
|
||||
pub process_all_partitions: bool,
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
iox_query = { path = "../iox_query" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
predicate = { path = "../predicate" }
|
||||
|
|
|
@ -16,7 +16,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
iox_query = { path = "../iox_query" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
predicate = { path = "../predicate" }
|
||||
|
@ -34,3 +34,4 @@ workspace-hack = { path = "../workspace-hack"}
|
|||
arrow_util = { path = "../arrow_util" }
|
||||
iox_tests = { path = "../iox_tests" }
|
||||
test_helpers = { path = "../test_helpers"}
|
||||
insta = { version = "1.26.0", features = ["yaml"] }
|
||||
|
|
|
@ -58,10 +58,8 @@ impl Compactor2 {
|
|||
_ = async {
|
||||
compact(config.partition_concurrency, config.partition_timeout, Arc::clone(&job_semaphore), &components).await;
|
||||
|
||||
// the main entry point does not allow servers to shut down themselves, so we just wait forever
|
||||
info!("comapctor done");
|
||||
futures::future::pending::<()>().await;
|
||||
} => unreachable!(),
|
||||
} => {}
|
||||
}
|
||||
});
|
||||
let worker = shared_handle(worker);
|
||||
|
|
|
@ -1,445 +0,0 @@
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use iox_query::exec::ExecutorType;
|
||||
use tracker::AsyncSemaphoreMetrics;
|
||||
|
||||
use crate::{
|
||||
components::{
|
||||
df_planner::panic::PanicDataFusionPlanner, hardcoded::hardcoded_components, Components,
|
||||
},
|
||||
config::AlgoVersion,
|
||||
driver::compact,
|
||||
test_util::{list_object_store, AssertFutureExt, TestSetup},
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_no_file() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// no files
|
||||
let setup = TestSetup::builder().build().await;
|
||||
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert!(files.is_empty());
|
||||
|
||||
// compact
|
||||
// This wil wait for files forever.
|
||||
let fut = run_compact(&setup);
|
||||
tokio::pin!(fut);
|
||||
fut.assert_pending().await;
|
||||
|
||||
// verify catalog is still empty
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert!(files.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_all_at_once() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let mut setup = TestSetup::builder().with_files().build().await;
|
||||
setup.set_compact_version(AlgoVersion::AllAtOnce);
|
||||
|
||||
// verify 6 files
|
||||
// verify ID and compaction level of the files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
|
||||
// verify ID and max_l0_created_at
|
||||
let times = setup.test_times();
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
vec![
|
||||
(1, times.time_1_minute_future),
|
||||
(2, times.time_2_minutes_future),
|
||||
(3, times.time_5_minutes_future),
|
||||
(4, times.time_3_minutes_future),
|
||||
(5, times.time_5_minutes_future),
|
||||
(6, times.time_2_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// compact
|
||||
run_compact(&setup).await;
|
||||
|
||||
// verify number of files: 6 files are compacted into 2 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(7, CompactionLevel::FileNonOverlapped),
|
||||
(8, CompactionLevel::FileNonOverlapped),
|
||||
],
|
||||
);
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
// both files have max_l0_created time_5_minutes_future
|
||||
// which is the max of all L0 input's max_l0_created_at
|
||||
vec![
|
||||
(7, times.time_5_minutes_future),
|
||||
(8, times.time_5_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// verify the content of files
|
||||
// Compacted smaller file with the later data
|
||||
let mut files = setup.list_by_table_not_to_delete().await;
|
||||
let file1 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file1).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
// Compacted larger file with the earlier data
|
||||
let file0 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file0).await;
|
||||
assert_batches_sorted_eq!(
|
||||
[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_target_level() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let mut setup = TestSetup::builder().with_files().build().await;
|
||||
setup.set_compact_version(AlgoVersion::TargetLevel);
|
||||
setup.set_min_num_l1_files_to_compact(2);
|
||||
|
||||
// verify 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
|
||||
// verify ID and max_l0_created_at
|
||||
let times = setup.test_times();
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
vec![
|
||||
(1, times.time_1_minute_future),
|
||||
(2, times.time_2_minutes_future),
|
||||
(3, times.time_5_minutes_future),
|
||||
(4, times.time_3_minutes_future),
|
||||
(5, times.time_5_minutes_future),
|
||||
(6, times.time_2_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// compact
|
||||
run_compact(&setup).await;
|
||||
|
||||
// verify number of files: 6 files are compacted into 2 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 2);
|
||||
|
||||
assert_levels(
|
||||
&files,
|
||||
// This is the result of 2-round compaction fomr L0s -> L1s and then L1s -> L2s
|
||||
// The first round will create two L1 files IDs 7 and 8
|
||||
// The second round will create tow L2 file IDs 9 and 10
|
||||
vec![(9, CompactionLevel::Final), (10, CompactionLevel::Final)],
|
||||
);
|
||||
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
// both files have max_l0_created time_5_minutes_future
|
||||
// which is the max of all L0 input's max_l0_created_at
|
||||
vec![
|
||||
(9, times.time_5_minutes_future),
|
||||
(10, times.time_5_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// verify the content of files
|
||||
// Compacted smaller file with the later data
|
||||
let mut files = setup.list_by_table_not_to_delete().await;
|
||||
let file1 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file1).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
// Compacted larger file with the earlier data
|
||||
let file0 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file0).await;
|
||||
assert_batches_sorted_eq!(
|
||||
[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_skip_compact() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder().with_files().build().await;
|
||||
|
||||
let expected_files_and_levels = vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
];
|
||||
|
||||
// verify 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(&files, expected_files_and_levels.clone());
|
||||
|
||||
// add the partition into skipped compaction
|
||||
setup
|
||||
.catalog
|
||||
.add_to_skipped_compaction(setup.partition_info.partition_id, "test reason")
|
||||
.await;
|
||||
|
||||
// compact but nothing will be compacted because the partition is skipped
|
||||
run_compact(&setup).await;
|
||||
|
||||
// verify still 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(&files, expected_files_and_levels.clone());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_partition_fail() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder().with_files().build().await;
|
||||
|
||||
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
|
||||
assert!(!catalog_files_pre.is_empty());
|
||||
|
||||
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
|
||||
assert!(!object_store_files_pre.is_empty());
|
||||
|
||||
run_compact_failing(&setup).await;
|
||||
|
||||
let catalog_files_post = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(catalog_files_pre, catalog_files_post);
|
||||
|
||||
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
|
||||
assert_eq!(object_store_files_pre, object_store_files_post);
|
||||
|
||||
let skipped = setup
|
||||
.catalog
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.list_skipped_compactions()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(skipped.len(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shadow_mode() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.with_files()
|
||||
.with_shadow_mode()
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
|
||||
assert!(!catalog_files_pre.is_empty());
|
||||
|
||||
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
|
||||
assert!(!object_store_files_pre.is_empty());
|
||||
|
||||
run_compact(&setup).await;
|
||||
|
||||
let catalog_files_post = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(catalog_files_pre, catalog_files_post);
|
||||
|
||||
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
|
||||
assert_eq!(object_store_files_pre, object_store_files_post);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shadow_mode_partition_fail() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.with_files()
|
||||
.with_shadow_mode()
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
|
||||
assert!(!catalog_files_pre.is_empty());
|
||||
|
||||
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
|
||||
assert!(!object_store_files_pre.is_empty());
|
||||
|
||||
run_compact_failing(&setup).await;
|
||||
|
||||
let catalog_files_post = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(catalog_files_pre, catalog_files_post);
|
||||
|
||||
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
|
||||
assert_eq!(object_store_files_pre, object_store_files_post);
|
||||
|
||||
let skipped = setup
|
||||
.catalog
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.list_skipped_compactions()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(skipped, vec![]);
|
||||
}
|
||||
|
||||
async fn run_compact(setup: &TestSetup) {
|
||||
let components = hardcoded_components(&setup.config);
|
||||
run_compact_impl(setup, components).await;
|
||||
}
|
||||
|
||||
async fn run_compact_failing(setup: &TestSetup) {
|
||||
let components = hardcoded_components(&setup.config);
|
||||
let components = Arc::new(Components {
|
||||
df_planner: Arc::new(PanicDataFusionPlanner::new()),
|
||||
..components.as_ref().clone()
|
||||
});
|
||||
run_compact_impl(setup, components).await;
|
||||
}
|
||||
|
||||
async fn run_compact_impl(setup: &TestSetup, components: Arc<Components>) {
|
||||
let config = Arc::clone(&setup.config);
|
||||
let job_semaphore = Arc::new(
|
||||
Arc::new(AsyncSemaphoreMetrics::new(&config.metric_registry, [])).new_semaphore(10),
|
||||
);
|
||||
|
||||
// register scratchpad store
|
||||
setup
|
||||
.catalog
|
||||
.exec()
|
||||
.new_context(ExecutorType::Reorg)
|
||||
.inner()
|
||||
.runtime_env()
|
||||
.register_object_store(
|
||||
"iox",
|
||||
config.parquet_store_scratchpad.id(),
|
||||
Arc::clone(config.parquet_store_scratchpad.object_store()),
|
||||
);
|
||||
|
||||
compact(
|
||||
NonZeroUsize::new(10).unwrap(),
|
||||
Duration::from_secs(3_6000),
|
||||
job_semaphore,
|
||||
&components,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_levels<'a>(
|
||||
files: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
expected_files_and_levels: impl IntoIterator<Item = (i64, CompactionLevel)>,
|
||||
) {
|
||||
let files_and_levels: Vec<_> = files
|
||||
.into_iter()
|
||||
.map(|f| (f.id.get(), f.compaction_level))
|
||||
.collect();
|
||||
|
||||
let expected_files_and_levels: Vec<_> = expected_files_and_levels.into_iter().collect();
|
||||
|
||||
assert_eq!(files_and_levels, expected_files_and_levels);
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
/// Asserts each parquet file has (id, max_l0_created_at)
|
||||
fn assert_max_l0_created_at<'a>(
|
||||
files: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
expected_files_and_max_l0_created_ats: impl IntoIterator<Item = (i64, i64)>,
|
||||
) {
|
||||
let files_and_max_l0_created_ats: Vec<_> = files
|
||||
.into_iter()
|
||||
.map(|f| (f.id.get(), f.max_l0_created_at.get()))
|
||||
.collect();
|
||||
|
||||
let expected_files_and_max_l0_created_ats: Vec<_> =
|
||||
expected_files_and_max_l0_created_ats.into_iter().collect();
|
||||
|
||||
assert_eq!(
|
||||
files_and_max_l0_created_ats,
|
||||
expected_files_and_max_l0_created_ats
|
||||
);
|
||||
}
|
||||
}
|
|
@ -33,7 +33,7 @@ use crate::components::{
|
|||
///
|
||||
/// | Step | Name | Type | Description |
|
||||
/// | ---- | --------------------- | ----------------------------------------------------------- | ----------- |
|
||||
/// | 1 | **Actual source** | `inner_source`/`T1`/[`PartitionsSource`], wrapped | This is the actual source, e.g. a [catalog](crate::components::partitions_source::catalog::CatalogPartitionsSource) |
|
||||
/// | 1 | **Actual source** | `inner_source`/`T1`/[`PartitionsSource`], wrapped | This is the actual source, e.g. a [catalog](crate::components::partitions_source::catalog_to_compact::CatalogToCompactPartitionsSource) |
|
||||
/// | 2 | **Unique IDs source** | [`UniquePartionsSourceWrapper`], wraps `inner_source`/`T1` | Outputs that [`PartitionId`]s from the `inner_source` but filters out partitions that have not yet reached the uniqueness sink (step 4) |
|
||||
/// | 3 | **Critical section** | -- | Here it is always ensured that a single [`PartitionId`] does NOT occur more than once. |
|
||||
/// | 4 | **Unique IDs sink** | [`UniquePartitionDoneSinkWrapper`], wraps `inner_sink`/`T2` | Observes incoming IDs and removes them from the filter applied in step 2. |
|
||||
|
|
|
@ -6,6 +6,7 @@ use std::{
|
|||
use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
|
||||
|
||||
pub mod dedicated;
|
||||
pub mod noop;
|
||||
|
||||
pub trait DataFusionPlanExec: Debug + Display + Send + Sync {
|
||||
/// Convert DataFusion [`ExecutionPlan`] to multiple output streams.
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
use std::{fmt::Display, sync::Arc};
|
||||
|
||||
use datafusion::physical_plan::{
|
||||
stream::RecordBatchStreamAdapter, ExecutionPlan, SendableRecordBatchStream,
|
||||
};
|
||||
|
||||
use super::DataFusionPlanExec;
|
||||
|
||||
/// Creates a DataFusion plan that does nothing (for use in testing)
|
||||
#[derive(Debug, Default)]
|
||||
pub struct NoopDataFusionPlanExec;
|
||||
|
||||
impl NoopDataFusionPlanExec {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for NoopDataFusionPlanExec {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "noop")
|
||||
}
|
||||
}
|
||||
|
||||
impl DataFusionPlanExec for NoopDataFusionPlanExec {
|
||||
fn exec(&self, plan: Arc<dyn ExecutionPlan>) -> Vec<SendableRecordBatchStream> {
|
||||
let stream_count = plan.output_partitioning().partition_count();
|
||||
let schema = plan.schema();
|
||||
|
||||
(0..stream_count)
|
||||
.map(|_| {
|
||||
let stream = futures::stream::empty();
|
||||
let stream = RecordBatchStreamAdapter::new(Arc::clone(&schema), stream);
|
||||
Box::pin(stream) as SendableRecordBatchStream
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
|
@ -130,7 +130,8 @@ mod tests {
|
|||
|
||||
use crate::test_util::{
|
||||
create_l1_files, create_overlapped_files, create_overlapped_files_2,
|
||||
create_overlapped_l0_l1_files, create_overlapped_l1_l2_files,
|
||||
create_overlapped_l0_l1_files, create_overlapped_l1_l2_files, format_files,
|
||||
format_files_split,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
@ -186,7 +187,17 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_one_level_empty() {
|
||||
let files = create_l1_files(1);
|
||||
assert_eq!(files.len(), 3);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[600,700] |-----L1.13-----| "
|
||||
- "L1.12[400,500] |-----L1.12-----| "
|
||||
- "L1.11[250,350] |-----L1.11-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelNonOverlapSplit::new();
|
||||
|
||||
|
@ -204,29 +215,41 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_mix_1() {
|
||||
let files = create_overlapped_l0_l1_files(1);
|
||||
assert_eq!(files.len(), 6);
|
||||
|
||||
// Input files:
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
// Output files: (overlap, non_overlap) = ( [L0.1, L0.2, L0.3, L1.2, L1.3] , L1.1] )
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 1b "
|
||||
- "L0.2[650,750] |---L0.2---| "
|
||||
- "L0.1[450,620] |-------L0.1-------| "
|
||||
- "L0.3[800,900] |---L0.3---| "
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[600,700] |--L1.13---| "
|
||||
- "L1.12[400,500] |--L1.12---| "
|
||||
- "L1.11[250,350] |--L1.11---| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelNonOverlapSplit::new();
|
||||
let (overlap, non_overlap) = split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
assert_eq!(overlap.len(), 5);
|
||||
assert_eq!(non_overlap.len(), 1);
|
||||
|
||||
// Verify overlapping files
|
||||
// sort by id
|
||||
let mut overlap = overlap;
|
||||
overlap.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(overlap[0].id.get(), 1);
|
||||
assert_eq!(overlap[1].id.get(), 2);
|
||||
assert_eq!(overlap[2].id.get(), 3);
|
||||
assert_eq!(overlap[3].id.get(), 12);
|
||||
assert_eq!(overlap[4].id.get(), 13);
|
||||
// verify non-overlapping files
|
||||
assert_eq!(non_overlap[0].id.get(), 11);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("overlap", &overlap, "non_overlap", &non_overlap),
|
||||
@r###"
|
||||
---
|
||||
- overlap
|
||||
- "L0, all files 1b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 1b "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- non_overlap
|
||||
- "L1, all files 1b "
|
||||
- "L1.11[250,350] |------------------------------------L1.11-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
// |--L2.1--| |--L2.2--|
|
||||
|
@ -236,28 +259,39 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_mix_2() {
|
||||
let files = create_overlapped_l1_l2_files(1);
|
||||
assert_eq!(files.len(), 5);
|
||||
|
||||
// Input files:
|
||||
// |--L2.1--| |--L2.2--|
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
// Output files: (overlap, non_overlap) = ( [L1.1, L1.2, L1.3, L2.2] , L2.1] )
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[600,700] |--L1.13--| "
|
||||
- "L1.12[400,500] |--L1.12--| "
|
||||
- "L1.11[250,350] |--L1.11--| "
|
||||
- "L2, all files 1b "
|
||||
- "L2.21[0,100] |--L2.21--| "
|
||||
- "L2.22[200,300] |--L2.22--| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelNonOverlapSplit::new();
|
||||
let (overlap, non_overlap) = split.apply(files, CompactionLevel::Final);
|
||||
assert_eq!(overlap.len(), 4);
|
||||
assert_eq!(non_overlap.len(), 1);
|
||||
|
||||
// Verify overlapping files
|
||||
// sort by id
|
||||
let mut overlap = overlap;
|
||||
overlap.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(overlap[0].id.get(), 11);
|
||||
assert_eq!(overlap[1].id.get(), 12);
|
||||
assert_eq!(overlap[2].id.get(), 13);
|
||||
assert_eq!(overlap[3].id.get(), 22);
|
||||
// verify non-overlapping files
|
||||
assert_eq!(non_overlap[0].id.get(), 21);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("overlap", &overlap, "non_overlap", &non_overlap),
|
||||
@r###"
|
||||
---
|
||||
- overlap
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[600,700] |----L1.13-----|"
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "L1.11[250,350] |----L1.11-----| "
|
||||
- "L2, all files 1b "
|
||||
- "L2.22[200,300] |----L2.22-----| "
|
||||
- non_overlap
|
||||
- "L2, all files 1b "
|
||||
- "L2.21[0,100] |------------------------------------L2.21-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -269,26 +303,40 @@ mod tests {
|
|||
//
|
||||
// . Output: (overlap, non_overlap) = ( [L0.1, L0.2, L1.2, L1.3] , [L1.1, L1.4] )
|
||||
let files = create_overlapped_files_2(1);
|
||||
assert_eq!(files.len(), 6);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 1b "
|
||||
- "L0.2[520,550] |L0.2| "
|
||||
- "L0.1[250,350] |--L0.1---| "
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[400,500] |--L1.13--| "
|
||||
- "L1.12[200,300] |--L1.12--| "
|
||||
- "L1.11[0,100] |--L1.11--| "
|
||||
- "L1.14[600,700] |--L1.14--| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelNonOverlapSplit::new();
|
||||
let (overlap, non_overlap) = split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
assert_eq!(overlap.len(), 4);
|
||||
assert_eq!(non_overlap.len(), 2);
|
||||
|
||||
// Verify overlapping files
|
||||
// sort by id
|
||||
let mut overlap = overlap;
|
||||
overlap.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(overlap[0].id.get(), 1);
|
||||
assert_eq!(overlap[1].id.get(), 2);
|
||||
assert_eq!(overlap[2].id.get(), 12);
|
||||
assert_eq!(overlap[3].id.get(), 13);
|
||||
// verify non-overlapping files
|
||||
// sort by id
|
||||
let mut non_overlap = non_overlap;
|
||||
non_overlap.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(non_overlap[0].id.get(), 11);
|
||||
assert_eq!(non_overlap[1].id.get(), 14);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("overlap", &overlap, "non_overlap", &non_overlap),
|
||||
@r###"
|
||||
---
|
||||
- overlap
|
||||
- "L0, all files 1b "
|
||||
- "L0.2[520,550] |L0.2| "
|
||||
- "L0.1[250,350] |--------L0.1--------| "
|
||||
- "L1, all files 1b "
|
||||
- "L1.12[200,300] |-------L1.12--------| "
|
||||
- "L1.13[400,500] |-------L1.13--------| "
|
||||
- non_overlap
|
||||
- "L1, all files 1b "
|
||||
- "L1.11[0,100] |--L1.11--| "
|
||||
- "L1.14[600,700] |--L1.14--| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,8 +36,8 @@ impl FilesSplit for TargetLevelTargetLevelSplit {
|
|||
mod tests {
|
||||
|
||||
use crate::test_util::{
|
||||
assert_parquet_files, assert_parquet_files_split, create_l0_files, create_l1_files,
|
||||
create_l2_files, create_overlapped_files,
|
||||
create_l0_files, create_l1_files, create_l2_files, create_overlapped_files, format_files,
|
||||
format_files_split,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
@ -63,13 +63,17 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_partial_empty_files_l0() {
|
||||
let files = create_l0_files(1);
|
||||
let expected = vec![
|
||||
"L0 ",
|
||||
"L0.2[650,750] |-----L0.2------| ",
|
||||
"L0.1[450,620] |------------L0.1------------| ",
|
||||
"L0.3[800,900] |-----L0.3------| ",
|
||||
];
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 1b "
|
||||
- "L0.2[650,750] |-----L0.2------| "
|
||||
- "L0.1[450,620] |------------L0.1------------| "
|
||||
- "L0.3[800,900] |-----L0.3------| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelTargetLevelSplit::new();
|
||||
let (lower, higher) = split.apply(files.clone(), CompactionLevel::Initial);
|
||||
|
@ -88,13 +92,17 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_partial_empty_files_l1() {
|
||||
let files = create_l1_files(1);
|
||||
let expected = vec![
|
||||
"L1 ",
|
||||
"L1.13[600,700] |-----L1.13-----| ",
|
||||
"L1.12[400,500] |-----L1.12-----| ",
|
||||
"L1.11[250,350] |-----L1.11-----| ",
|
||||
];
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[600,700] |-----L1.13-----| "
|
||||
- "L1.12[400,500] |-----L1.12-----| "
|
||||
- "L1.11[250,350] |-----L1.11-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelTargetLevelSplit::new();
|
||||
let (lower, higher) = split.apply(files.clone(), CompactionLevel::Initial);
|
||||
|
@ -113,12 +121,16 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_partial_empty_files_l2() {
|
||||
let files = create_l2_files();
|
||||
let expected = vec![
|
||||
"L2 ",
|
||||
"L2.21[0,100] |---------L2.21----------| ",
|
||||
"L2.22[200,300] |---------L2.22----------| ",
|
||||
];
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L2, all files 1b "
|
||||
- "L2.21[0,100] |---------L2.21----------| "
|
||||
- "L2.22[200,300] |---------L2.22----------| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelTargetLevelSplit::new();
|
||||
let (lower, higher) = split.apply(files.clone(), CompactionLevel::Initial);
|
||||
|
@ -138,40 +150,47 @@ mod tests {
|
|||
fn test_apply_target_level_0() {
|
||||
// Test target level Initial
|
||||
let files = create_overlapped_files();
|
||||
let expected = vec![
|
||||
"L0 ",
|
||||
"L0.2[650,750]@1 |-L0.2-| ",
|
||||
"L0.1[450,620]@1 |----L0.1-----| ",
|
||||
"L0.3[800,900]@100 |-L0.3-| ",
|
||||
"L1 ",
|
||||
"L1.13[600,700]@100 |L1.13-| ",
|
||||
"L1.12[400,500]@1 |L1.12-| ",
|
||||
"L1.11[250,350]@1 |L1.11-| ",
|
||||
"L2 ",
|
||||
"L2.21[0,100]@1 |L2.21-| ",
|
||||
"L2.22[200,300]@1 |L2.22-| ",
|
||||
];
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0 "
|
||||
- "L0.2[650,750] 1b |-L0.2-| "
|
||||
- "L0.1[450,620] 1b |----L0.1-----| "
|
||||
- "L0.3[800,900] 100b |-L0.3-| "
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |L1.13-| "
|
||||
- "L1.12[400,500] 1b |L1.12-| "
|
||||
- "L1.11[250,350] 1b |L1.11-| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 1b |L2.21-| "
|
||||
- "L2.22[200,300] 1b |L2.22-| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelTargetLevelSplit::new();
|
||||
let (lower, higher) = split.apply(files, CompactionLevel::Initial);
|
||||
|
||||
let expected = vec![
|
||||
"left",
|
||||
"L0 ",
|
||||
"L0.2[650,750]@1 |-----L0.2------| ",
|
||||
"L0.1[450,620]@1 |------------L0.1------------| ",
|
||||
"L0.3[800,900]@100 |-----L0.3------| ",
|
||||
"right",
|
||||
"L1 ",
|
||||
"L1.13[600,700]@100 |--L1.13--| ",
|
||||
"L1.12[400,500]@1 |--L1.12--| ",
|
||||
"L1.11[250,350]@1 |--L1.11--| ",
|
||||
"L2 ",
|
||||
"L2.21[0,100]@1 |--L2.21--| ",
|
||||
"L2.22[200,300]@1 |--L2.22--| ",
|
||||
];
|
||||
assert_parquet_files_split(expected, &lower, &higher);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("lower", &lower, "higher", &higher),
|
||||
@r###"
|
||||
---
|
||||
- lower
|
||||
- "L0 "
|
||||
- "L0.2[650,750] 1b |-----L0.2------| "
|
||||
- "L0.1[450,620] 1b |------------L0.1------------| "
|
||||
- "L0.3[800,900] 100b |-----L0.3------| "
|
||||
- higher
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |--L1.13--| "
|
||||
- "L1.12[400,500] 1b |--L1.12--| "
|
||||
- "L1.11[250,350] 1b |--L1.11--| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 1b |--L2.21--| "
|
||||
- "L2.22[200,300] 1b |--L2.22--| "
|
||||
"###
|
||||
);
|
||||
|
||||
// verify number of files
|
||||
assert_eq!(lower.len(), 3);
|
||||
|
@ -190,40 +209,47 @@ mod tests {
|
|||
fn test_apply_target_level_l1() {
|
||||
// Test target level is FileNonOverlapped
|
||||
let files = create_overlapped_files();
|
||||
let expected = vec![
|
||||
"L0 ",
|
||||
"L0.2[650,750]@1 |-L0.2-| ",
|
||||
"L0.1[450,620]@1 |----L0.1-----| ",
|
||||
"L0.3[800,900]@100 |-L0.3-| ",
|
||||
"L1 ",
|
||||
"L1.13[600,700]@100 |L1.13-| ",
|
||||
"L1.12[400,500]@1 |L1.12-| ",
|
||||
"L1.11[250,350]@1 |L1.11-| ",
|
||||
"L2 ",
|
||||
"L2.21[0,100]@1 |L2.21-| ",
|
||||
"L2.22[200,300]@1 |L2.22-| ",
|
||||
];
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0 "
|
||||
- "L0.2[650,750] 1b |-L0.2-| "
|
||||
- "L0.1[450,620] 1b |----L0.1-----| "
|
||||
- "L0.3[800,900] 100b |-L0.3-| "
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |L1.13-| "
|
||||
- "L1.12[400,500] 1b |L1.12-| "
|
||||
- "L1.11[250,350] 1b |L1.11-| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 1b |L2.21-| "
|
||||
- "L2.22[200,300] 1b |L2.22-| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelTargetLevelSplit::new();
|
||||
let (lower, higher) = split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
let expected = vec![
|
||||
"left",
|
||||
"L0 ",
|
||||
"L0.2[650,750]@1 |---L0.2---| ",
|
||||
"L0.1[450,620]@1 |-------L0.1-------| ",
|
||||
"L0.3[800,900]@100 |---L0.3---| ",
|
||||
"L1 ",
|
||||
"L1.13[600,700]@100 |--L1.13---| ",
|
||||
"L1.12[400,500]@1 |--L1.12---| ",
|
||||
"L1.11[250,350]@1 |--L1.11---| ",
|
||||
"right",
|
||||
"L2 ",
|
||||
"L2.21[0,100] |---------L2.21----------| ",
|
||||
"L2.22[200,300] |---------L2.22----------| ",
|
||||
];
|
||||
assert_parquet_files_split(expected, &lower, &higher);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("lower", &lower, "higher", &higher),
|
||||
@r###"
|
||||
---
|
||||
- lower
|
||||
- "L0 "
|
||||
- "L0.2[650,750] 1b |---L0.2---| "
|
||||
- "L0.1[450,620] 1b |-------L0.1-------| "
|
||||
- "L0.3[800,900] 100b |---L0.3---| "
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |--L1.13---| "
|
||||
- "L1.12[400,500] 1b |--L1.12---| "
|
||||
- "L1.11[250,350] 1b |--L1.11---| "
|
||||
- higher
|
||||
- "L2, all files 1b "
|
||||
- "L2.21[0,100] |---------L2.21----------| "
|
||||
- "L2.22[200,300] |---------L2.22----------| "
|
||||
"###
|
||||
);
|
||||
|
||||
// verify number of files
|
||||
assert_eq!(lower.len(), 6);
|
||||
|
@ -242,20 +268,24 @@ mod tests {
|
|||
fn test_apply_taget_level_l2() {
|
||||
// Test target level is Final
|
||||
let files = create_overlapped_files();
|
||||
let expected = vec![
|
||||
"L0 ",
|
||||
"L0.2[650,750]@1 |-L0.2-| ",
|
||||
"L0.1[450,620]@1 |----L0.1-----| ",
|
||||
"L0.3[800,900]@100 |-L0.3-| ",
|
||||
"L1 ",
|
||||
"L1.13[600,700]@100 |L1.13-| ",
|
||||
"L1.12[400,500]@1 |L1.12-| ",
|
||||
"L1.11[250,350]@1 |L1.11-| ",
|
||||
"L2 ",
|
||||
"L2.21[0,100]@1 |L2.21-| ",
|
||||
"L2.22[200,300]@1 |L2.22-| ",
|
||||
];
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0 "
|
||||
- "L0.2[650,750] 1b |-L0.2-| "
|
||||
- "L0.1[450,620] 1b |----L0.1-----| "
|
||||
- "L0.3[800,900] 100b |-L0.3-| "
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |L1.13-| "
|
||||
- "L1.12[400,500] 1b |L1.12-| "
|
||||
- "L1.11[250,350] 1b |L1.11-| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 1b |L2.21-| "
|
||||
- "L2.22[200,300] 1b |L2.22-| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelTargetLevelSplit::new();
|
||||
let (lower, higher) = split.apply(files, CompactionLevel::Final);
|
||||
|
|
|
@ -145,7 +145,7 @@ mod tests {
|
|||
create_overlapped_files_2, create_overlapped_files_3, create_overlapped_files_3_mix_size,
|
||||
create_overlapped_l0_l1_files, create_overlapped_l1_l2_files,
|
||||
create_overlapped_l1_l2_files_mix_size, create_overlapped_l1_l2_files_mix_size_2,
|
||||
create_overlapping_l0_files,
|
||||
create_overlapping_l0_files, format_files, format_files_split,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
@ -202,24 +202,54 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1-----|
|
||||
// |--L0.2--| |--L0.3--|
|
||||
fn test_apply_one_level_overlap_small_l0() {
|
||||
let files = create_overlapping_l0_files((MAX_SIZE - 1) as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 99b "
|
||||
- "L0.2[150,180] |L0.2| "
|
||||
- "L0.1[100,200] |--L0.1--| "
|
||||
- "L0.3[800,900] |--L0.3--|"
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
// All files are small --> nothing to upgrade
|
||||
assert_eq!(files_to_compact.len(), 3);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 99b "
|
||||
- "L0.3[800,900] |--L0.3--|"
|
||||
- "L0.1[100,200] |--L0.1--| "
|
||||
- "L0.2[150,180] |L0.2| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1-----|
|
||||
// |--L0.2--| |--L0.3--|
|
||||
fn test_apply_one_level_overlap_large_l0() {
|
||||
let files = create_overlapping_l0_files((MAX_SIZE + 1) as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 101b "
|
||||
- "L0.2[150,180] |L0.2| "
|
||||
- "L0.1[100,200] |--L0.1--| "
|
||||
- "L0.3[800,900] |--L0.3--|"
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
@ -227,275 +257,599 @@ mod tests {
|
|||
// All files are large but only one eligible for upgrade
|
||||
// files_to_compact = [L0.1, L0.2]
|
||||
// files_to_upgrade = [L0.3]
|
||||
assert_eq!(files_to_compact.len(), 2);
|
||||
assert_eq!(files_to_upgrade.len(), 1);
|
||||
|
||||
// verify the files by sorting by id
|
||||
let mut files_to_compact = files_to_compact;
|
||||
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_compact[0].id.get(), 1);
|
||||
assert_eq!(files_to_compact[1].id.get(), 2);
|
||||
assert_eq!(files_to_upgrade[0].id.get(), 3);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 101b "
|
||||
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[150,180] |---------L0.2---------| "
|
||||
- files_to_upgrade
|
||||
- "L0, all files 101b "
|
||||
- "L0.3[800,900] |-------------------------------------L0.3-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
fn test_apply_one_level_small_l0() {
|
||||
let files = create_l0_files((MAX_SIZE - 1) as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 99b "
|
||||
- "L0.2[650,750] |-----L0.2------| "
|
||||
- "L0.1[450,620] |------------L0.1------------| "
|
||||
- "L0.3[800,900] |-----L0.3------| "
|
||||
"###
|
||||
);
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
// All files are small --> nothing to upgrade
|
||||
assert_eq!(files_to_compact.len(), 3);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 99b "
|
||||
- "L0.3[800,900] |-----L0.3------| "
|
||||
- "L0.1[450,620] |------------L0.1------------| "
|
||||
- "L0.2[650,750] |-----L0.2------| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
fn test_apply_one_level_large_l0() {
|
||||
let files = create_l0_files((MAX_SIZE + 1) as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 101b "
|
||||
- "L0.2[650,750] |-----L0.2------| "
|
||||
- "L0.1[450,620] |------------L0.1------------| "
|
||||
- "L0.3[800,900] |-----L0.3------| "
|
||||
"###
|
||||
);
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
// All files are large and eligible for upgrade
|
||||
assert_eq!(files_to_compact.len(), 0);
|
||||
assert_eq!(files_to_upgrade.len(), 3);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- files_to_upgrade
|
||||
- "L0, all files 101b "
|
||||
- "L0.2[650,750] |-----L0.2------| "
|
||||
- "L0.1[450,620] |------------L0.1------------| "
|
||||
- "L0.3[800,900] |-----L0.3------| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
fn test_apply_one_level_small_l1() {
|
||||
let files = create_l1_files((MAX_SIZE - 1) as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 99b "
|
||||
- "L1.13[600,700] |-----L1.13-----| "
|
||||
- "L1.12[400,500] |-----L1.12-----| "
|
||||
- "L1.11[250,350] |-----L1.11-----| "
|
||||
"###
|
||||
);
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
// All files are small --> nothing to upgrade
|
||||
assert_eq!(files_to_compact.len(), 3);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L1, all files 99b "
|
||||
- "L1.13[600,700] |-----L1.13-----| "
|
||||
- "L1.12[400,500] |-----L1.12-----| "
|
||||
- "L1.11[250,350] |-----L1.11-----| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
fn test_apply_one_level_large_l1() {
|
||||
let files = create_l1_files((MAX_SIZE + 1) as i64);
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
|
||||
|
||||
// All files are large and eligible for upgrade
|
||||
assert_eq!(files_to_compact.len(), 0);
|
||||
assert_eq!(files_to_upgrade.len(), 3);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- files_to_upgrade
|
||||
- "L1, all files 101b "
|
||||
- "L1.13[600,700] |-----L1.13-----| "
|
||||
- "L1.12[400,500] |-----L1.12-----| "
|
||||
- "L1.11[250,350] |-----L1.11-----| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--| |--L1.4--| |--L1.5--|
|
||||
// . small files (< size ): L1.1, L1.3
|
||||
// . Large files (.= size): L1.2, L1.4, L1.5
|
||||
//
|
||||
// . files_to_compact = [L1.1, L1.2, L1.3]
|
||||
// . files_to_upgrade = [L1.4, L1.5]
|
||||
fn test_apply_one_level_l1_mix_size() {
|
||||
let files = create_l1_files_mix_size(MAX_SIZE as i64);
|
||||
|
||||
// . small files (< size ): L1.1, L1.3
|
||||
// . Large files (.= size): L1.2, L1.4, L1.5
|
||||
//
|
||||
// . files_to_compact = [L1.1, L1.2, L1.3]
|
||||
// . files_to_upgrade = [L1.4, L1.5]
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1 "
|
||||
- "L1.15[1000,1100] 200b |-L1.15-| "
|
||||
- "L1.13[600,700] 90b |-L1.13-| "
|
||||
- "L1.12[400,500] 101b |-L1.12-| "
|
||||
- "L1.11[250,350] 99b |-L1.11-| "
|
||||
- "L1.14[800,900] 100b |-L1.14-| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
|
||||
|
||||
// All files are large and eligible for upgrade
|
||||
assert_eq!(files_to_compact.len(), 3);
|
||||
assert_eq!(files_to_upgrade.len(), 2);
|
||||
// verify IDs
|
||||
let mut files_to_compact = files_to_compact;
|
||||
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_compact[0].id.get(), 11);
|
||||
assert_eq!(files_to_compact[1].id.get(), 12);
|
||||
assert_eq!(files_to_compact[2].id.get(), 13);
|
||||
let mut files_to_upgrade = files_to_upgrade;
|
||||
files_to_upgrade.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_upgrade[0].id.get(), 14);
|
||||
assert_eq!(files_to_upgrade[1].id.get(), 15);
|
||||
// Some files are large and eligible for upgrade
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L1 "
|
||||
- "L1.11[250,350] 99b |-----L1.11-----| "
|
||||
- "L1.13[600,700] 90b |-----L1.13-----| "
|
||||
- "L1.12[400,500] 101b |-----L1.12-----| "
|
||||
- files_to_upgrade
|
||||
- "L1 "
|
||||
- "L1.15[1000,1100] 200b |---------L1.15----------| "
|
||||
- "L1.14[800,900] 100b |---------L1.14----------| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
fn test_apply_all_small_target_l1() {
|
||||
let files = create_overlapped_l0_l1_files((MAX_SIZE - 1) as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 99b "
|
||||
- "L0.2[650,750] |---L0.2---| "
|
||||
- "L0.1[450,620] |-------L0.1-------| "
|
||||
- "L0.3[800,900] |---L0.3---| "
|
||||
- "L1, all files 99b "
|
||||
- "L1.13[600,700] |--L1.13---| "
|
||||
- "L1.12[400,500] |--L1.12---| "
|
||||
- "L1.11[250,350] |--L1.11---| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
// All files are small --> nothing to upgrade
|
||||
assert_eq!(files_to_compact.len(), 6);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 99b "
|
||||
- "L0.3[800,900] |---L0.3---| "
|
||||
- "L0.1[450,620] |-------L0.1-------| "
|
||||
- "L0.2[650,750] |---L0.2---| "
|
||||
- "L1, all files 99b "
|
||||
- "L1.13[600,700] |--L1.13---| "
|
||||
- "L1.12[400,500] |--L1.12---| "
|
||||
- "L1.11[250,350] |--L1.11---| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
fn test_apply_all_large_target_l1() {
|
||||
let files = create_overlapped_l0_l1_files((MAX_SIZE) as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |---L0.2---| "
|
||||
- "L0.1[450,620] |-------L0.1-------| "
|
||||
- "L0.3[800,900] |---L0.3---| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |--L1.13---| "
|
||||
- "L1.12[400,500] |--L1.12---| "
|
||||
- "L1.11[250,350] |--L1.11---| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
// All files are large --> L0.3 is eligible for upgrade
|
||||
assert_eq!(files_to_compact.len(), 5);
|
||||
assert_eq!(files_to_upgrade.len(), 1);
|
||||
|
||||
// verify IDs
|
||||
let mut files_to_compact = files_to_compact;
|
||||
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_compact[0].id.get(), 1);
|
||||
assert_eq!(files_to_compact[1].id.get(), 2);
|
||||
assert_eq!(files_to_compact[2].id.get(), 11);
|
||||
assert_eq!(files_to_compact[3].id.get(), 12);
|
||||
assert_eq!(files_to_compact[4].id.get(), 13);
|
||||
//
|
||||
assert_eq!(files_to_upgrade[0].id.get(), 3);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.2[650,750] |-----L0.2-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "L1.11[250,350] |----L1.11-----| "
|
||||
- files_to_upgrade
|
||||
- "L0, all files 100b "
|
||||
- "L0.3[800,900] |-------------------------------------L0.3-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L2.1--| |--L2.2--|
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
fn test_apply_all_small_target_l2() {
|
||||
let files = create_overlapped_l1_l2_files((MAX_SIZE - 1) as i64);
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
|
||||
|
||||
// All files are small --> nothing to upgrade
|
||||
assert_eq!(files_to_compact.len(), 5);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L1, all files 99b "
|
||||
- "L1.11[250,350] |--L1.11--| "
|
||||
- "L1.12[400,500] |--L1.12--| "
|
||||
- "L1.13[600,700] |--L1.13--| "
|
||||
- "L2, all files 99b "
|
||||
- "L2.21[0,100] |--L2.21--| "
|
||||
- "L2.22[200,300] |--L2.22--| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L2.1--| |--L2.2--|
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
fn test_apply_all_large_target_l2() {
|
||||
let files = create_overlapped_l1_l2_files(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |--L1.13--| "
|
||||
- "L1.12[400,500] |--L1.12--| "
|
||||
- "L1.11[250,350] |--L1.11--| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.21[0,100] |--L2.21--| "
|
||||
- "L2.22[200,300] |--L2.22--| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
|
||||
|
||||
// All files are large --> L1.2 and L1.3 are eligible for upgrade
|
||||
assert_eq!(files_to_compact.len(), 3);
|
||||
assert_eq!(files_to_upgrade.len(), 2);
|
||||
|
||||
// verify IDs
|
||||
let mut files_to_compact = files_to_compact;
|
||||
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_compact[0].id.get(), 11);
|
||||
assert_eq!(files_to_compact[1].id.get(), 21);
|
||||
assert_eq!(files_to_compact[2].id.get(), 22);
|
||||
let mut files_to_upgrade = files_to_upgrade;
|
||||
files_to_upgrade.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_upgrade[0].id.get(), 12);
|
||||
assert_eq!(files_to_upgrade[1].id.get(), 13);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L1, all files 100b "
|
||||
- "L1.11[250,350] |-------L1.11--------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.21[0,100] |-------L2.21--------| "
|
||||
- "L2.22[200,300] |-------L2.22--------| "
|
||||
- files_to_upgrade
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |---------L1.13----------| "
|
||||
- "L1.12[400,500] |---------L1.12----------| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L2.1--| |--L2.2--|
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
// Small files (< size): [L1.3]
|
||||
// Large files: [L2.1, L2.2, L1.1, L1.2]
|
||||
// ==> nothing to upgrade
|
||||
fn test_apply_all_small_target_l2_mix_size() {
|
||||
let files = create_overlapped_l1_l2_files_mix_size(MAX_SIZE as i64);
|
||||
// Small files (< size): [L1.3]
|
||||
// Large files: [L2.1, L2.2, L1.1, L1.2]
|
||||
// ==> nothing to upgrade
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 99b |--L1.13--| "
|
||||
- "L1.12[400,500] 100b |--L1.12--| "
|
||||
- "L1.11[250,350] 100b |--L1.11--| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 100b |--L2.21--| "
|
||||
- "L2.22[200,300] 100b |--L2.22--| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 5);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L1 "
|
||||
- "L1.11[250,350] 100b |--L1.11--| "
|
||||
- "L1.13[600,700] 99b |--L1.13--| "
|
||||
- "L1.12[400,500] 100b |--L1.12--| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 100b |--L2.21--| "
|
||||
- "L2.22[200,300] 100b |--L2.22--| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L2.1--| |--L2.2--|
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
// Small files (< size): [L1.2]
|
||||
// Large files: [L2.1, L2.2, L1.1, L1.3]
|
||||
// ==> L1.3 is eligible for upgrade
|
||||
fn test_apply_all_small_target_l2_mix_size_2() {
|
||||
let files = create_overlapped_l1_l2_files_mix_size_2(MAX_SIZE as i64);
|
||||
// Small files (< size): [L1.2]
|
||||
// Large files: [L2.1, L2.2, L1.1, L1.3]
|
||||
// ==> L1.3 is eligible for upgrade
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |--L1.13--| "
|
||||
- "L1.12[400,500] 99b |--L1.12--| "
|
||||
- "L1.11[250,350] 100b |--L1.11--| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 100b |--L2.21--| "
|
||||
- "L2.22[200,300] 100b |--L2.22--| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 4);
|
||||
assert_eq!(files_to_upgrade.len(), 1);
|
||||
assert_eq!(files_to_upgrade[0].id.get(), 13);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L1 "
|
||||
- "L1.11[250,350] 100b |----L1.11-----| "
|
||||
- "L1.12[400,500] 99b |----L1.12-----|"
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 100b |----L2.21-----| "
|
||||
- "L2.22[200,300] 100b |----L2.22-----| "
|
||||
- files_to_upgrade
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |------------------------------------L1.13-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1--| |--L0.2--|
|
||||
// |--L1.1--| |--L1.2--| |--L1.3--| |--L1.4--|
|
||||
// L0s in the time range of L1 ==> nothing to upgrade
|
||||
fn test_apply_all_large_but_no_upragde() {
|
||||
let files = create_overlapped_files_2(MAX_SIZE as i64);
|
||||
// L0s in the time range of L1 ==> nothing to upgrade
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[520,550] |L0.2| "
|
||||
- "L0.1[250,350] |--L0.1---| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[400,500] |--L1.13--| "
|
||||
- "L1.12[200,300] |--L1.12--| "
|
||||
- "L1.11[0,100] |--L1.11--| "
|
||||
- "L1.14[600,700] |--L1.14--| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 6);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[250,350] |--L0.1---| "
|
||||
- "L0.2[520,550] |L0.2| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[400,500] |--L1.13--| "
|
||||
- "L1.12[200,300] |--L1.12--| "
|
||||
- "L1.11[0,100] |--L1.11--| "
|
||||
- "L1.14[600,700] |--L1.14--| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
// |--L0.4--| |--L0.5--| |--L0.6--|
|
||||
// |--L1.1--| |--L1.2--|
|
||||
// All small ==> nothing to upgrade
|
||||
fn test_apply_all_small_target_l1_2() {
|
||||
let files = create_overlapped_files_3((MAX_SIZE - 1) as i64);
|
||||
// All small ==> nothing to upgrade
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 99b "
|
||||
- "L0.3[400,500] |L0.3-| "
|
||||
- "L0.2[200,300] |L0.2-| "
|
||||
- "L0.1[0,100] |L0.1-| "
|
||||
- "L0.4[600,700] |L0.4-| "
|
||||
- "L0.5[800,900] |L0.5-| "
|
||||
- "L0.6[1000,1100] |L0.6-| "
|
||||
- "L1, all files 99b "
|
||||
- "L1.11[250,350] |L1.11| "
|
||||
- "L1.12[650,750] |L1.12| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 8);
|
||||
assert_eq!(files_to_upgrade.len(), 0);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 99b "
|
||||
- "L0.6[1000,1100] |L0.6-| "
|
||||
- "L0.5[800,900] |L0.5-| "
|
||||
- "L0.4[600,700] |L0.4-| "
|
||||
- "L0.1[0,100] |L0.1-| "
|
||||
- "L0.2[200,300] |L0.2-| "
|
||||
- "L0.3[400,500] |L0.3-| "
|
||||
- "L1, all files 99b "
|
||||
- "L1.11[250,350] |L1.11| "
|
||||
- "L1.12[650,750] |L1.12| "
|
||||
- files_to_upgrade
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
// |--L0.4--| |--L0.5--| |--L0.6--|
|
||||
// |--L1.1--| |--L1.2--|
|
||||
// All large ==> L0.1, L0.5, L0.6 are eligible for upgrade
|
||||
// files_to_compact: [L0.2, L0.3, L0.4, L1.1, L1.2]
|
||||
// files_to_upgrade: [L0.1, L0.5, L0.6]
|
||||
fn test_apply_all_large_target_l1_2() {
|
||||
let files = create_overlapped_files_3((MAX_SIZE + 10) as i64);
|
||||
// All large ==> L0.1, L0.5, L0.6 are eligible for upgrade
|
||||
// files_to_compact: [L0.2, L0.3, L0.4, L1.1, L1.2]
|
||||
// files_to_upgrade: [L0.1, L0.5, L0.6]
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 110b "
|
||||
- "L0.3[400,500] |L0.3-| "
|
||||
- "L0.2[200,300] |L0.2-| "
|
||||
- "L0.1[0,100] |L0.1-| "
|
||||
- "L0.4[600,700] |L0.4-| "
|
||||
- "L0.5[800,900] |L0.5-| "
|
||||
- "L0.6[1000,1100] |L0.6-| "
|
||||
- "L1, all files 110b "
|
||||
- "L1.11[250,350] |L1.11| "
|
||||
- "L1.12[650,750] |L1.12| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 5);
|
||||
assert_eq!(files_to_upgrade.len(), 3);
|
||||
let mut files_to_compact = files_to_compact;
|
||||
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_compact[0].id.get(), 2);
|
||||
assert_eq!(files_to_compact[1].id.get(), 3);
|
||||
assert_eq!(files_to_compact[2].id.get(), 4);
|
||||
assert_eq!(files_to_compact[3].id.get(), 11);
|
||||
assert_eq!(files_to_compact[4].id.get(), 12);
|
||||
let mut files_to_upgrade = files_to_upgrade;
|
||||
files_to_upgrade.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
assert_eq!(files_to_upgrade[0].id.get(), 1);
|
||||
assert_eq!(files_to_upgrade[1].id.get(), 5);
|
||||
assert_eq!(files_to_upgrade[2].id.get(), 6);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0, all files 110b "
|
||||
- "L0.4[600,700] |----L0.4----| "
|
||||
- "L0.2[200,300] |----L0.2----| "
|
||||
- "L0.3[400,500] |----L0.3----| "
|
||||
- "L1, all files 110b "
|
||||
- "L1.11[250,350] |---L1.11----| "
|
||||
- "L1.12[650,750] |---L1.12----| "
|
||||
- files_to_upgrade
|
||||
- "L0, all files 110b "
|
||||
- "L0.1[0,100] |L0.1-| "
|
||||
- "L0.5[800,900] |L0.5-| "
|
||||
- "L0.6[1000,1100] |L0.6-| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
// |--L0.4--| |--L0.5--| |--L0.6--|
|
||||
// |--L1.1--| |--L1.2--|
|
||||
// Small files (< size): L0.6
|
||||
// Large files: the rest
|
||||
// ==> only L0.1 is eligible for upgrade
|
||||
fn test_apply_mix_size_target_l1_2() {
|
||||
let files = create_overlapped_files_3_mix_size(MAX_SIZE as i64);
|
||||
// Small files (< size): L0.6
|
||||
// Large files: the rest
|
||||
// ==> only L0.1 is eligible for upgrade
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0 "
|
||||
- "L0.3[400,500] 100b |L0.3-| "
|
||||
- "L0.2[200,300] 100b |L0.2-| "
|
||||
- "L0.1[0,100] 100b |L0.1-| "
|
||||
- "L0.4[600,700] 100b |L0.4-| "
|
||||
- "L0.5[800,900] 100b |L0.5-| "
|
||||
- "L0.6[1000,1100] 99b |L0.6-| "
|
||||
- "L1 "
|
||||
- "L1.11[250,350] 100b |L1.11| "
|
||||
- "L1.12[650,750] 100b |L1.12| "
|
||||
"###
|
||||
);
|
||||
|
||||
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
split.apply(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 7);
|
||||
assert_eq!(files_to_upgrade.len(), 1);
|
||||
assert_eq!(files_to_upgrade[0].id.get(), 1);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
|
||||
@r###"
|
||||
---
|
||||
- files_to_compact
|
||||
- "L0 "
|
||||
- "L0.6[1000,1100] 99b |-L0.6-| "
|
||||
- "L0.4[600,700] 100b |-L0.4-| "
|
||||
- "L0.2[200,300] 100b |-L0.2-| "
|
||||
- "L0.3[400,500] 100b |-L0.3-| "
|
||||
- "L0.5[800,900] 100b |-L0.5-| "
|
||||
- "L1 "
|
||||
- "L1.11[250,350] 100b |L1.11-| "
|
||||
- "L1.12[650,750] 100b |L1.12-| "
|
||||
- files_to_upgrade
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[0,100] |-------------------------------------L0.1-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@ use crate::{
|
|||
namespaces_source::catalog::CatalogNamespacesSource,
|
||||
tables_source::catalog::CatalogTablesSource,
|
||||
},
|
||||
config::{AlgoVersion, Config},
|
||||
config::{AlgoVersion, Config, PartitionsSourceConfig},
|
||||
error::ErrorKind,
|
||||
};
|
||||
|
||||
|
@ -22,7 +22,9 @@ use super::{
|
|||
catalog::CatalogCommit, logging::LoggingCommitWrapper, metrics::MetricsCommitWrapper,
|
||||
mock::MockCommit, Commit,
|
||||
},
|
||||
df_plan_exec::dedicated::DedicatedDataFusionPlanExec,
|
||||
df_plan_exec::{
|
||||
dedicated::DedicatedDataFusionPlanExec, noop::NoopDataFusionPlanExec, DataFusionPlanExec,
|
||||
},
|
||||
df_planner::{logging::LoggingDataFusionPlannerWrapper, planner_v1::V1DataFusionPlanner},
|
||||
divide_initial::single_branch::SingleBranchDivideInitial,
|
||||
file_filter::{and::AndFileFilter, level_range::LevelRangeFileFilter},
|
||||
|
@ -36,13 +38,12 @@ use super::{
|
|||
target_level_upgrade_split::TargetLevelUpgradeSplit, FilesSplit,
|
||||
},
|
||||
id_only_partition_filter::{
|
||||
and::AndIdOnlyPartitionFilter, by_id::ByIdPartitionFilter, shard::ShardPartitionFilter,
|
||||
IdOnlyPartitionFilter,
|
||||
and::AndIdOnlyPartitionFilter, shard::ShardPartitionFilter, IdOnlyPartitionFilter,
|
||||
},
|
||||
level_exist::one_level::OneLevelExist,
|
||||
parquet_file_sink::{
|
||||
dedicated::DedicatedExecParquetFileSinkWrapper, logging::LoggingParquetFileSinkWrapper,
|
||||
object_store::ObjectStoreParquetFileSink,
|
||||
mock::MockParquetFileSink, object_store::ObjectStoreParquetFileSink, ParquetFileSink,
|
||||
},
|
||||
partition_done_sink::{
|
||||
catalog::CatalogPartitionDoneSink, error_kind::ErrorKindPartitionDoneSinkWrapper,
|
||||
|
@ -52,6 +53,7 @@ use super::{
|
|||
partition_files_source::catalog::CatalogPartitionFilesSource,
|
||||
partition_filter::{
|
||||
and::AndPartitionFilter, greater_matching_files::GreaterMatchingFilesPartitionFilter,
|
||||
greater_size_matching_files::GreaterSizeMatchingFilesPartitionFilter,
|
||||
has_files::HasFilesPartitionFilter, has_matching_file::HasMatchingFilePartitionFilter,
|
||||
logging::LoggingPartitionFilterWrapper, max_files::MaxFilesPartitionFilter,
|
||||
max_parquet_bytes::MaxParquetBytesPartitionFilter, metrics::MetricsPartitionFilterWrapper,
|
||||
|
@ -65,13 +67,18 @@ use super::{
|
|||
endless::EndlessPartititionStream, once::OncePartititionStream, PartitionStream,
|
||||
},
|
||||
partitions_source::{
|
||||
catalog::CatalogPartitionsSource, filter::FilterPartitionsSourceWrapper,
|
||||
logging::LoggingPartitionsSourceWrapper, metrics::MetricsPartitionsSourceWrapper,
|
||||
mock::MockPartitionsSource, not_empty::NotEmptyPartitionsSourceWrapper,
|
||||
catalog_all::CatalogAllPartitionsSource,
|
||||
catalog_to_compact::CatalogToCompactPartitionsSource,
|
||||
filter::FilterPartitionsSourceWrapper, logging::LoggingPartitionsSourceWrapper,
|
||||
metrics::MetricsPartitionsSourceWrapper, mock::MockPartitionsSource,
|
||||
not_empty::NotEmptyPartitionsSourceWrapper,
|
||||
randomize_order::RandomizeOrderPartitionsSourcesWrapper, PartitionsSource,
|
||||
},
|
||||
round_split::all_now::AllNowRoundSplit,
|
||||
scratchpad::{ignore_writes_object_store::IgnoreWrites, prod::ProdScratchpadGen},
|
||||
scratchpad::{
|
||||
ignore_writes_object_store::IgnoreWrites, noop::NoopScratchpadGen, prod::ProdScratchpadGen,
|
||||
ScratchpadGen,
|
||||
},
|
||||
skipped_compactions_source::catalog::CatalogSkippedCompactionsSource,
|
||||
target_level_chooser::{
|
||||
all_at_once::AllAtOnceTargetLevelChooser, target_level::TargetLevelTargetLevelChooser,
|
||||
|
@ -85,22 +92,25 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
// TODO: partitions source: Implementing ID-based sharding / hash-partitioning so we can run multiple compactors in
|
||||
// parallel. This should be a wrapper around the existing partions source.
|
||||
|
||||
let partitions_source: Arc<dyn PartitionsSource> = if let Some(ids) = &config.partition_filter {
|
||||
Arc::new(MockPartitionsSource::new(ids.iter().cloned().collect()))
|
||||
} else {
|
||||
Arc::new(CatalogPartitionsSource::new(
|
||||
let partitions_source: Arc<dyn PartitionsSource> = match &config.partitions_source {
|
||||
PartitionsSourceConfig::CatalogRecentWrites => {
|
||||
Arc::new(CatalogToCompactPartitionsSource::new(
|
||||
config.backoff_config.clone(),
|
||||
Arc::clone(&config.catalog),
|
||||
config.partition_threshold,
|
||||
Arc::clone(&config.time_provider),
|
||||
))
|
||||
}
|
||||
PartitionsSourceConfig::CatalogAll => Arc::new(CatalogAllPartitionsSource::new(
|
||||
config.backoff_config.clone(),
|
||||
Arc::clone(&config.catalog),
|
||||
config.partition_threshold,
|
||||
Arc::clone(&config.time_provider),
|
||||
))
|
||||
)),
|
||||
PartitionsSourceConfig::Fixed(ids) => {
|
||||
Arc::new(MockPartitionsSource::new(ids.iter().cloned().collect()))
|
||||
}
|
||||
};
|
||||
|
||||
let mut id_only_partition_filters: Vec<Arc<dyn IdOnlyPartitionFilter>> = vec![];
|
||||
if let Some(ids) = &config.partition_filter {
|
||||
// filter as early as possible, so we don't need any catalog lookups for the filtered partitions
|
||||
id_only_partition_filters.push(Arc::new(ByIdPartitionFilter::new(ids.clone())));
|
||||
}
|
||||
if let Some(shard_config) = &config.shard_config {
|
||||
// add shard filter before performing any catalog IO
|
||||
id_only_partition_filters.push(Arc::new(ShardPartitionFilter::new(
|
||||
|
@ -125,6 +135,15 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
}
|
||||
partition_filters.append(&mut version_specific_partition_filters(config));
|
||||
|
||||
let partition_resource_limit_filters: Vec<Arc<dyn PartitionFilter>> = vec![
|
||||
Arc::new(MaxFilesPartitionFilter::new(
|
||||
config.max_input_files_per_partition,
|
||||
)),
|
||||
Arc::new(MaxParquetBytesPartitionFilter::new(
|
||||
config.max_input_parquet_bytes_per_partition,
|
||||
)),
|
||||
];
|
||||
|
||||
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.shadow_mode {
|
||||
Arc::new(MockPartitionDoneSink::new())
|
||||
} else {
|
||||
|
@ -159,23 +178,81 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
Duration::from_secs(60),
|
||||
1,
|
||||
);
|
||||
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.all_errors_are_fatal {
|
||||
Arc::new(partition_done_sink)
|
||||
} else {
|
||||
Arc::new(ErrorKindPartitionDoneSinkWrapper::new(
|
||||
partition_done_sink,
|
||||
ErrorKind::variants()
|
||||
.iter()
|
||||
.filter(|kind| {
|
||||
// use explicit match statement so we never forget to add new variants
|
||||
match kind {
|
||||
ErrorKind::OutOfMemory | ErrorKind::Timeout | ErrorKind::Unknown => true,
|
||||
ErrorKind::ObjectStore => false,
|
||||
}
|
||||
})
|
||||
.copied()
|
||||
.collect(),
|
||||
))
|
||||
};
|
||||
|
||||
// Note: Place "not empty" wrapper at the very last so that the logging and metric wrapper work even when there
|
||||
// is not data.
|
||||
let partitions_source = NotEmptyPartitionsSourceWrapper::new(
|
||||
let partitions_source =
|
||||
LoggingPartitionsSourceWrapper::new(MetricsPartitionsSourceWrapper::new(
|
||||
RandomizeOrderPartitionsSourcesWrapper::new(partitions_source, 1234),
|
||||
&config.metric_registry,
|
||||
)),
|
||||
Duration::from_secs(5),
|
||||
Arc::clone(&config.time_provider),
|
||||
);
|
||||
));
|
||||
let partitions_source: Arc<dyn PartitionsSource> = if config.process_once {
|
||||
// do not wrap into the "not empty" filter because we do NOT wanna throttle in this case but just exit early
|
||||
Arc::new(partitions_source)
|
||||
} else {
|
||||
Arc::new(NotEmptyPartitionsSourceWrapper::new(
|
||||
partitions_source,
|
||||
Duration::from_secs(5),
|
||||
Arc::clone(&config.time_provider),
|
||||
))
|
||||
};
|
||||
|
||||
let partition_stream: Arc<dyn PartitionStream> = if config.process_once {
|
||||
Arc::new(OncePartititionStream::new(partitions_source))
|
||||
} else {
|
||||
Arc::new(EndlessPartititionStream::new(partitions_source))
|
||||
};
|
||||
let partition_continue_conditions = "continue_conditions";
|
||||
let partition_resource_limit_conditions = "resource_limit_conditions";
|
||||
|
||||
let scratchpad_gen: Arc<dyn ScratchpadGen> = if config.simulate_without_object_store {
|
||||
Arc::new(NoopScratchpadGen::new())
|
||||
} else {
|
||||
Arc::new(ProdScratchpadGen::new(
|
||||
config.partition_scratchpad_concurrency,
|
||||
config.backoff_config.clone(),
|
||||
Arc::clone(config.parquet_store_real.object_store()),
|
||||
Arc::clone(config.parquet_store_scratchpad.object_store()),
|
||||
scratchpad_store_output,
|
||||
))
|
||||
};
|
||||
let df_plan_exec: Arc<dyn DataFusionPlanExec> = if config.simulate_without_object_store {
|
||||
Arc::new(NoopDataFusionPlanExec::new())
|
||||
} else {
|
||||
Arc::new(DedicatedDataFusionPlanExec::new(Arc::clone(&config.exec)))
|
||||
};
|
||||
let parquet_file_sink: Arc<dyn ParquetFileSink> = if config.simulate_without_object_store {
|
||||
Arc::new(MockParquetFileSink::new(false))
|
||||
} else {
|
||||
Arc::new(LoggingParquetFileSinkWrapper::new(
|
||||
DedicatedExecParquetFileSinkWrapper::new(
|
||||
ObjectStoreParquetFileSink::new(
|
||||
config.shard_id,
|
||||
config.parquet_store_scratchpad.clone(),
|
||||
Arc::clone(&config.time_provider),
|
||||
),
|
||||
Arc::clone(&config.exec),
|
||||
),
|
||||
))
|
||||
};
|
||||
|
||||
Arc::new(Components {
|
||||
partition_stream,
|
||||
|
@ -197,28 +274,12 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
MetricsPartitionFilterWrapper::new(
|
||||
AndPartitionFilter::new(partition_filters),
|
||||
&config.metric_registry,
|
||||
partition_continue_conditions,
|
||||
),
|
||||
partition_continue_conditions,
|
||||
)),
|
||||
partition_done_sink: Arc::new(LoggingPartitionDoneSinkWrapper::new(
|
||||
MetricsPartitionDoneSinkWrapper::new(
|
||||
ErrorKindPartitionDoneSinkWrapper::new(
|
||||
partition_done_sink,
|
||||
ErrorKind::variants()
|
||||
.iter()
|
||||
.filter(|kind| {
|
||||
// use explicit match statement so we never forget to add new variants
|
||||
match kind {
|
||||
ErrorKind::OutOfMemory
|
||||
| ErrorKind::Timeout
|
||||
| ErrorKind::Unknown => true,
|
||||
ErrorKind::ObjectStore => false,
|
||||
}
|
||||
})
|
||||
.copied()
|
||||
.collect(),
|
||||
),
|
||||
&config.metric_registry,
|
||||
),
|
||||
MetricsPartitionDoneSinkWrapper::new(partition_done_sink, &config.metric_registry),
|
||||
)),
|
||||
commit: Arc::new(LoggingCommitWrapper::new(MetricsCommitWrapper::new(
|
||||
commit,
|
||||
|
@ -241,76 +302,56 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
config.split_percentage,
|
||||
),
|
||||
)),
|
||||
df_plan_exec: Arc::new(DedicatedDataFusionPlanExec::new(Arc::clone(&config.exec))),
|
||||
parquet_file_sink: Arc::new(LoggingParquetFileSinkWrapper::new(
|
||||
DedicatedExecParquetFileSinkWrapper::new(
|
||||
ObjectStoreParquetFileSink::new(
|
||||
config.shard_id,
|
||||
config.parquet_store_scratchpad.clone(),
|
||||
Arc::clone(&config.time_provider),
|
||||
),
|
||||
Arc::clone(&config.exec),
|
||||
),
|
||||
)),
|
||||
df_plan_exec,
|
||||
parquet_file_sink,
|
||||
round_split: Arc::new(AllNowRoundSplit::new()),
|
||||
divide_initial: Arc::new(SingleBranchDivideInitial::new()),
|
||||
scratchpad_gen: Arc::new(ProdScratchpadGen::new(
|
||||
config.partition_scratchpad_concurrency,
|
||||
config.backoff_config.clone(),
|
||||
Arc::clone(config.parquet_store_real.object_store()),
|
||||
Arc::clone(config.parquet_store_scratchpad.object_store()),
|
||||
scratchpad_store_output,
|
||||
)),
|
||||
scratchpad_gen,
|
||||
target_level_chooser: version_specific_target_level_chooser(config),
|
||||
target_level_split: version_specific_target_level_split(config),
|
||||
non_overlap_split: version_specific_non_ovverlapping_split(config),
|
||||
upgrade_split: version_specific_upgrade_split(config),
|
||||
partition_resource_limit_filter: Arc::new(LoggingPartitionFilterWrapper::new(
|
||||
MetricsPartitionFilterWrapper::new(
|
||||
AndPartitionFilter::new(partition_resource_limit_filters),
|
||||
&config.metric_registry,
|
||||
partition_resource_limit_conditions,
|
||||
),
|
||||
partition_resource_limit_conditions,
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
||||
// Conditions to commpact this partittion
|
||||
// Same for all versions to protect the system from OOMs
|
||||
// . Number of files < max_input_files_per_partition
|
||||
// . Total size of files < max_input_parquet_bytes_per_partition
|
||||
fn version_specific_partition_filters(config: &Config) -> Vec<Arc<dyn PartitionFilter>> {
|
||||
match config.compact_version {
|
||||
// Must has L0
|
||||
AlgoVersion::AllAtOnce => {
|
||||
vec![
|
||||
vec![Arc::new(HasMatchingFilePartitionFilter::new(
|
||||
LevelRangeFileFilter::new(CompactionLevel::Initial..=CompactionLevel::Initial),
|
||||
))]
|
||||
}
|
||||
// (Has-L0) OR -- to avoid overlaped files
|
||||
// (num(L1) > N) OR -- to avoid many files
|
||||
// (total_size(L1) > max_desired_file_size) -- to avoid compact and than split
|
||||
AlgoVersion::TargetLevel => {
|
||||
vec![Arc::new(OrPartitionFilter::new(vec![
|
||||
Arc::new(HasMatchingFilePartitionFilter::new(
|
||||
LevelRangeFileFilter::new(CompactionLevel::Initial..=CompactionLevel::Initial),
|
||||
)),
|
||||
Arc::new(MaxFilesPartitionFilter::new(
|
||||
config.max_input_files_per_partition,
|
||||
Arc::new(GreaterMatchingFilesPartitionFilter::new(
|
||||
LevelRangeFileFilter::new(
|
||||
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
|
||||
),
|
||||
config.min_num_l1_files_to_compact,
|
||||
)),
|
||||
Arc::new(MaxParquetBytesPartitionFilter::new(
|
||||
config.max_input_parquet_bytes_per_partition,
|
||||
Arc::new(GreaterSizeMatchingFilesPartitionFilter::new(
|
||||
LevelRangeFileFilter::new(
|
||||
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
|
||||
),
|
||||
config.max_desired_file_size_bytes,
|
||||
)),
|
||||
]
|
||||
}
|
||||
// (Has-L0) OR (num(L1) > N)
|
||||
AlgoVersion::TargetLevel => {
|
||||
vec![
|
||||
Arc::new(OrPartitionFilter::new(vec![
|
||||
Arc::new(HasMatchingFilePartitionFilter::new(
|
||||
LevelRangeFileFilter::new(
|
||||
CompactionLevel::Initial..=CompactionLevel::Initial,
|
||||
),
|
||||
)),
|
||||
Arc::new(GreaterMatchingFilesPartitionFilter::new(
|
||||
LevelRangeFileFilter::new(
|
||||
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
|
||||
),
|
||||
config.min_num_l1_files_to_compact,
|
||||
)),
|
||||
])),
|
||||
Arc::new(MaxFilesPartitionFilter::new(
|
||||
config.max_input_files_per_partition,
|
||||
)),
|
||||
Arc::new(MaxParquetBytesPartitionFilter::new(
|
||||
config.max_input_parquet_bytes_per_partition,
|
||||
)),
|
||||
]
|
||||
]))]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ pub struct ByIdPartitionFilter {
|
|||
}
|
||||
|
||||
impl ByIdPartitionFilter {
|
||||
#[allow(dead_code)] // not used anywhere
|
||||
pub fn new(ids: HashSet<PartitionId>) -> Self {
|
||||
Self { ids }
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ pub struct Components {
|
|||
pub partition_files_source: Arc<dyn PartitionFilesSource>,
|
||||
pub files_filter: Arc<dyn FilesFilter>,
|
||||
pub partition_filter: Arc<dyn PartitionFilter>,
|
||||
pub partition_resource_limit_filter: Arc<dyn PartitionFilter>,
|
||||
pub partition_done_sink: Arc<dyn PartitionDoneSink>,
|
||||
pub commit: Arc<dyn Commit>,
|
||||
pub namespaces_source: Arc<dyn NamespacesSource>,
|
||||
|
|
|
@ -79,7 +79,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_display() {
|
||||
let sink = DedicatedExecParquetFileSinkWrapper::new(
|
||||
MockParquetFileSink::new(),
|
||||
MockParquetFileSink::new(true),
|
||||
Arc::new(Executor::new_testing()),
|
||||
);
|
||||
assert_eq!(sink.to_string(), "dedicated_exec(mock)",)
|
||||
|
@ -88,7 +88,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_panic() {
|
||||
let sink = DedicatedExecParquetFileSinkWrapper::new(
|
||||
MockParquetFileSink::new(),
|
||||
MockParquetFileSink::new(true),
|
||||
Arc::new(Executor::new_testing()),
|
||||
);
|
||||
let schema = SchemaBuilder::new().build().unwrap().as_arrow();
|
||||
|
|
|
@ -28,15 +28,19 @@ pub struct StoredFile {
|
|||
pub schema: SchemaRef,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug)]
|
||||
pub struct MockParquetFileSink {
|
||||
filter_empty_files: bool,
|
||||
records: Mutex<Vec<StoredFile>>,
|
||||
}
|
||||
|
||||
impl MockParquetFileSink {
|
||||
#[allow(dead_code)] // not used anywhere
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
/// If filter_empty_files is true, parquet files that have "0" rows will not be written to `ParquetFile`s in the catalog.
|
||||
pub fn new(filter_empty_files: bool) -> Self {
|
||||
Self {
|
||||
filter_empty_files,
|
||||
records: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // not used anywhere
|
||||
|
@ -64,7 +68,7 @@ impl ParquetFileSink for MockParquetFileSink {
|
|||
let batches: Vec<_> = stream.try_collect().await?;
|
||||
let row_count = batches.iter().map(|b| b.num_rows()).sum::<usize>();
|
||||
let mut guard = self.records.lock().expect("not poisoned");
|
||||
let out = (row_count > 0).then(|| ParquetFileParams {
|
||||
let out = ((row_count > 0) || !self.filter_empty_files).then(|| ParquetFileParams {
|
||||
shard_id: ShardId::new(1),
|
||||
namespace_id: partition.namespace_id,
|
||||
table_id: partition.table.id,
|
||||
|
@ -106,12 +110,12 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
assert_eq!(MockParquetFileSink::new().to_string(), "mock");
|
||||
assert_eq!(MockParquetFileSink::new(false).to_string(), "mock");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_store() {
|
||||
let sink = MockParquetFileSink::new();
|
||||
async fn test_store_filter_empty() {
|
||||
let sink = MockParquetFileSink::new(true);
|
||||
|
||||
let schema = SchemaBuilder::new()
|
||||
.field("f", DataType::Int64)
|
||||
|
@ -202,4 +206,53 @@ mod tests {
|
|||
assert_eq!(records[2].level, level);
|
||||
assert_eq!(records[2].partition, partition);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_store_keep_empty() {
|
||||
let sink = MockParquetFileSink::new(false);
|
||||
|
||||
let schema = SchemaBuilder::new()
|
||||
.field("f", DataType::Int64)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap()
|
||||
.as_arrow();
|
||||
let partition = partition_info();
|
||||
let level = CompactionLevel::FileNonOverlapped;
|
||||
let max_l0_created_at = Time::from_timestamp_nanos(1);
|
||||
|
||||
let stream = Box::pin(RecordBatchStreamAdapter::new(
|
||||
Arc::clone(&schema),
|
||||
futures::stream::empty(),
|
||||
));
|
||||
assert_eq!(
|
||||
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
|
||||
.await
|
||||
.unwrap(),
|
||||
Some(ParquetFileParams {
|
||||
shard_id: ShardId::new(1),
|
||||
namespace_id: NamespaceId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
partition_id: PartitionId::new(1),
|
||||
object_store_id: Uuid::from_u128(0),
|
||||
max_sequence_number: SequenceNumber::new(0),
|
||||
min_time: Timestamp::new(0),
|
||||
max_time: Timestamp::new(0),
|
||||
file_size_bytes: 1,
|
||||
row_count: 1,
|
||||
compaction_level: CompactionLevel::FileNonOverlapped,
|
||||
created_at: Timestamp::new(1),
|
||||
column_set: ColumnSet::new([]),
|
||||
max_l0_created_at: max_l0_created_at.into(),
|
||||
}),
|
||||
);
|
||||
|
||||
let records = sink.records();
|
||||
assert_eq!(records.len(), 1);
|
||||
|
||||
assert_eq!(records[0].batches.len(), 0);
|
||||
assert_eq!(records[0].schema, schema);
|
||||
assert_eq!(records[0].level, level);
|
||||
assert_eq!(records[0].partition, partition);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{ParquetFile, PartitionId};
|
||||
|
||||
use crate::{components::file_filter::FileFilter, error::DynError};
|
||||
|
||||
use super::PartitionFilter;
|
||||
|
||||
/// A partition filter that matches partitions that have files
|
||||
/// matching the given file filter and their total size > max_desired_file_bytes
|
||||
/// The idea for doing this:
|
||||
/// 1. Not to compact large input size to avoid hitting OOM/crash.
|
||||
/// 2. Not to compact too-large input size that lead to unecessary split into many files.
|
||||
/// - Becasue we limit the size of a file. If the compacted result is too large, we will split them into many files.
|
||||
/// - Becasue Level-1 files do not overlap, it is a waste to compact too-large size and then split.
|
||||
#[derive(Debug)]
|
||||
pub struct GreaterSizeMatchingFilesPartitionFilter<T>
|
||||
where
|
||||
T: FileFilter,
|
||||
{
|
||||
filter: T,
|
||||
max_desired_file_bytes: u64,
|
||||
}
|
||||
|
||||
impl<T> GreaterSizeMatchingFilesPartitionFilter<T>
|
||||
where
|
||||
T: FileFilter,
|
||||
{
|
||||
pub fn new(filter: T, max_desired_file_bytes: u64) -> Self {
|
||||
Self {
|
||||
filter,
|
||||
max_desired_file_bytes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Display for GreaterSizeMatchingFilesPartitionFilter<T>
|
||||
where
|
||||
T: FileFilter,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"greater_size_matching_file({}, {})",
|
||||
self.filter, self.max_desired_file_bytes
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T> PartitionFilter for GreaterSizeMatchingFilesPartitionFilter<T>
|
||||
where
|
||||
T: FileFilter,
|
||||
{
|
||||
async fn apply(
|
||||
&self,
|
||||
_partition_id: PartitionId,
|
||||
files: &[ParquetFile],
|
||||
) -> Result<bool, DynError> {
|
||||
// Matching files
|
||||
let matching_files: Vec<&ParquetFile> = files
|
||||
.iter()
|
||||
.filter(|file| self.filter.apply(file))
|
||||
.collect();
|
||||
|
||||
// Sum of file_size_bytes matching files
|
||||
let sum: i64 = matching_files.iter().map(|file| file.file_size_bytes).sum();
|
||||
Ok(sum >= self.max_desired_file_bytes as i64)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use data_types::CompactionLevel;
|
||||
|
||||
use crate::{
|
||||
components::file_filter::level_range::LevelRangeFileFilter, test_util::ParquetFileBuilder,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
let filter = GreaterSizeMatchingFilesPartitionFilter::new(
|
||||
LevelRangeFileFilter::new(
|
||||
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
|
||||
),
|
||||
1,
|
||||
);
|
||||
assert_eq!(
|
||||
filter.to_string(),
|
||||
"greater_size_matching_file(level_range(1..=1), 1)"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply() {
|
||||
let filter = GreaterSizeMatchingFilesPartitionFilter::new(
|
||||
LevelRangeFileFilter::new(
|
||||
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
|
||||
),
|
||||
15,
|
||||
);
|
||||
let f1 = ParquetFileBuilder::new(0)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_file_size_bytes(10)
|
||||
.build();
|
||||
let f2 = ParquetFileBuilder::new(1)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_file_size_bytes(14)
|
||||
.build();
|
||||
let f3 = ParquetFileBuilder::new(2)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_file_size_bytes(15)
|
||||
.build();
|
||||
|
||||
let p_id = PartitionId::new(1);
|
||||
|
||||
// empty, not large enough
|
||||
assert!(!filter.apply(p_id, &[]).await.unwrap());
|
||||
|
||||
// Not large enough
|
||||
assert!(!filter.apply(p_id, &[f1.clone()]).await.unwrap());
|
||||
assert!(!filter.apply(p_id, &[f2.clone()]).await.unwrap());
|
||||
|
||||
// large enough
|
||||
assert!(filter.apply(p_id, &[f1.clone(), f2.clone()]).await.unwrap());
|
||||
assert!(filter.apply(p_id, &[f3.clone()]).await.unwrap());
|
||||
assert!(filter.apply(p_id, &[f1, f2, f3]).await.unwrap());
|
||||
}
|
||||
}
|
|
@ -14,14 +14,15 @@ where
|
|||
T: PartitionFilter,
|
||||
{
|
||||
inner: T,
|
||||
filter_type: &'static str,
|
||||
}
|
||||
|
||||
impl<T> LoggingPartitionFilterWrapper<T>
|
||||
where
|
||||
T: PartitionFilter,
|
||||
{
|
||||
pub fn new(inner: T) -> Self {
|
||||
Self { inner }
|
||||
pub fn new(inner: T, filter_type: &'static str) -> Self {
|
||||
Self { inner, filter_type }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -30,7 +31,7 @@ where
|
|||
T: PartitionFilter,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "logging({})", self.inner)
|
||||
write!(f, "logging({}, {})", self.inner, self.filter_type)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,13 +48,21 @@ where
|
|||
let res = self.inner.apply(partition_id, files).await;
|
||||
match &res {
|
||||
Ok(true) => {
|
||||
debug!(partition_id = partition_id.get(), "NOT filtered partition");
|
||||
debug!(
|
||||
partition_id = partition_id.get(),
|
||||
filter_type = self.filter_type,
|
||||
"NOT filtered partition"
|
||||
);
|
||||
}
|
||||
Ok(false) => {
|
||||
info!(partition_id = partition_id.get(), "filtered partition");
|
||||
info!(
|
||||
partition_id = partition_id.get(),
|
||||
filter_type = self.filter_type,
|
||||
"filtered partition"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(partition_id = partition_id.get(), %e, "error filtering filtered partition");
|
||||
error!(partition_id = partition_id.get(), filter_type = self.filter_type, %e, "error filtering filtered partition");
|
||||
}
|
||||
}
|
||||
res
|
||||
|
@ -73,13 +82,13 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new());
|
||||
assert_eq!(filter.to_string(), "logging(has_files)");
|
||||
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), "test");
|
||||
assert_eq!(filter.to_string(), "logging(has_files, test)");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply() {
|
||||
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new());
|
||||
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), "test");
|
||||
let f = ParquetFileBuilder::new(0).build();
|
||||
let p_id1 = PartitionId::new(1);
|
||||
let p_id2 = PartitionId::new(2);
|
||||
|
@ -91,8 +100,8 @@ mod tests {
|
|||
|
||||
assert_eq!(
|
||||
capture.to_string(),
|
||||
"level = INFO; message = filtered partition; partition_id = 1; \n\
|
||||
level = DEBUG; message = NOT filtered partition; partition_id = 2; ",
|
||||
"level = INFO; message = filtered partition; partition_id = 1; filter_type = \"test\";
|
||||
level = DEBUG; message = NOT filtered partition; partition_id = 2; filter_type = \"test\"; ",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,27 +17,29 @@ where
|
|||
filter_counter: U64Counter,
|
||||
error_counter: U64Counter,
|
||||
inner: T,
|
||||
filter_type: &'static str,
|
||||
}
|
||||
|
||||
impl<T> MetricsPartitionFilterWrapper<T>
|
||||
where
|
||||
T: PartitionFilter,
|
||||
{
|
||||
pub fn new(inner: T, registry: &Registry) -> Self {
|
||||
pub fn new(inner: T, registry: &Registry, filter_type: &'static str) -> Self {
|
||||
let metric = registry.register_metric::<U64Counter>(
|
||||
"iox_compactor_partition_filter_count",
|
||||
"Number of times the compactor fetched fresh partitions",
|
||||
);
|
||||
|
||||
let pass_counter = metric.recorder(&[("result", "pass")]);
|
||||
let filter_counter = metric.recorder(&[("result", "filter")]);
|
||||
let error_counter = metric.recorder(&[("result", "error")]);
|
||||
let pass_counter = metric.recorder(&[("result", "pass"), ("filter_type", filter_type)]);
|
||||
let filter_counter = metric.recorder(&[("result", "filter"), ("filter_type", filter_type)]);
|
||||
let error_counter = metric.recorder(&[("result", "error"), ("filter_type", filter_type)]);
|
||||
|
||||
Self {
|
||||
pass_counter,
|
||||
filter_counter,
|
||||
error_counter,
|
||||
inner,
|
||||
filter_type,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -47,7 +49,7 @@ where
|
|||
T: PartitionFilter,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "metrics({})", self.inner)
|
||||
write!(f, "metrics({}, {})", self.inner, self.filter_type)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,14 +93,16 @@ mod tests {
|
|||
#[test]
|
||||
fn test_display() {
|
||||
let registry = Registry::new();
|
||||
let filter = MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), ®istry);
|
||||
assert_eq!(filter.to_string(), "metrics(has_files)",);
|
||||
let filter =
|
||||
MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), ®istry, "test");
|
||||
assert_eq!(filter.to_string(), "metrics(has_files, test)",);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply() {
|
||||
let registry = Registry::new();
|
||||
let filter = MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), ®istry);
|
||||
let filter =
|
||||
MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), ®istry, "test");
|
||||
let p_id = PartitionId::new(1);
|
||||
let f = ParquetFileBuilder::new(0).build();
|
||||
|
||||
|
@ -119,7 +123,10 @@ mod tests {
|
|||
registry
|
||||
.get_instrument::<Metric<U64Counter>>("iox_compactor_partition_filter_count")
|
||||
.expect("instrument not found")
|
||||
.get_observer(&Attributes::from(&[("result", "pass")]))
|
||||
.get_observer(&Attributes::from(&[
|
||||
("result", "pass"),
|
||||
("filter_type", "test"),
|
||||
]))
|
||||
.expect("observer not found")
|
||||
.fetch()
|
||||
}
|
||||
|
@ -128,7 +135,10 @@ mod tests {
|
|||
registry
|
||||
.get_instrument::<Metric<U64Counter>>("iox_compactor_partition_filter_count")
|
||||
.expect("instrument not found")
|
||||
.get_observer(&Attributes::from(&[("result", "filter")]))
|
||||
.get_observer(&Attributes::from(&[
|
||||
("result", "filter"),
|
||||
("filter_type", "test"),
|
||||
]))
|
||||
.expect("observer not found")
|
||||
.fetch()
|
||||
}
|
||||
|
@ -137,7 +147,10 @@ mod tests {
|
|||
registry
|
||||
.get_instrument::<Metric<U64Counter>>("iox_compactor_partition_filter_count")
|
||||
.expect("instrument not found")
|
||||
.get_observer(&Attributes::from(&[("result", "error")]))
|
||||
.get_observer(&Attributes::from(&[
|
||||
("result", "error"),
|
||||
("filter_type", "test"),
|
||||
]))
|
||||
.expect("observer not found")
|
||||
.fetch()
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ use crate::error::DynError;
|
|||
|
||||
pub mod and;
|
||||
pub mod greater_matching_files;
|
||||
pub mod greater_size_matching_files;
|
||||
pub mod has_files;
|
||||
pub mod has_matching_file;
|
||||
pub mod logging;
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
use std::{fmt::Display, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::PartitionId;
|
||||
use iox_catalog::interface::Catalog;
|
||||
|
||||
use super::PartitionsSource;
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Returns all partitions in the catalog, regardless of any other condition
|
||||
pub struct CatalogAllPartitionsSource {
|
||||
backoff_config: BackoffConfig,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
}
|
||||
|
||||
impl CatalogAllPartitionsSource {
|
||||
pub fn new(backoff_config: BackoffConfig, catalog: Arc<dyn Catalog>) -> Self {
|
||||
Self {
|
||||
backoff_config,
|
||||
catalog,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for CatalogAllPartitionsSource {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "catalog_all")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionsSource for CatalogAllPartitionsSource {
|
||||
async fn fetch(&self) -> Vec<PartitionId> {
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("list_ids", || async {
|
||||
self.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.list_ids()
|
||||
.await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever")
|
||||
}
|
||||
}
|
|
@ -9,14 +9,15 @@ use iox_time::TimeProvider;
|
|||
use super::PartitionsSource;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CatalogPartitionsSource {
|
||||
/// Returns all partitions that had a new parquet file written more than `threshold` ago.
|
||||
pub struct CatalogToCompactPartitionsSource {
|
||||
backoff_config: BackoffConfig,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
threshold: Duration,
|
||||
time_provider: Arc<dyn TimeProvider>,
|
||||
}
|
||||
|
||||
impl CatalogPartitionsSource {
|
||||
impl CatalogToCompactPartitionsSource {
|
||||
pub fn new(
|
||||
backoff_config: BackoffConfig,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
|
@ -32,14 +33,14 @@ impl CatalogPartitionsSource {
|
|||
}
|
||||
}
|
||||
|
||||
impl Display for CatalogPartitionsSource {
|
||||
impl Display for CatalogToCompactPartitionsSource {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "catalog")
|
||||
write!(f, "catalog_to_compact")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionsSource for CatalogPartitionsSource {
|
||||
impl PartitionsSource for CatalogToCompactPartitionsSource {
|
||||
async fn fetch(&self) -> Vec<PartitionId> {
|
||||
let cutoff = self.time_provider.now() - self.threshold;
|
||||
|
|
@ -6,7 +6,8 @@ use std::{
|
|||
use async_trait::async_trait;
|
||||
use data_types::PartitionId;
|
||||
|
||||
pub mod catalog;
|
||||
pub mod catalog_all;
|
||||
pub mod catalog_to_compact;
|
||||
pub mod filter;
|
||||
pub mod logging;
|
||||
pub mod metrics;
|
||||
|
|
|
@ -26,7 +26,7 @@ pub fn log_config(config: &Config) {
|
|||
percentage_max_file_size,
|
||||
split_percentage,
|
||||
partition_timeout,
|
||||
partition_filter,
|
||||
partitions_source,
|
||||
shadow_mode,
|
||||
ignore_partition_skip_marker,
|
||||
max_input_files_per_partition,
|
||||
|
@ -35,6 +35,8 @@ pub fn log_config(config: &Config) {
|
|||
compact_version,
|
||||
min_num_l1_files_to_compact,
|
||||
process_once,
|
||||
simulate_without_object_store,
|
||||
all_errors_are_fatal,
|
||||
} = &config;
|
||||
|
||||
let (shard_cfg_n_shards, shard_cfg_shard_id) = match shard_config {
|
||||
|
@ -63,7 +65,7 @@ pub fn log_config(config: &Config) {
|
|||
percentage_max_file_size,
|
||||
split_percentage,
|
||||
partition_timeout_secs=partition_timeout.as_secs_f32(),
|
||||
partition_filter=?partition_filter.as_ref().map(|ids| ids.iter().map(|id| id.get()).collect::<Vec<_>>()),
|
||||
%partitions_source,
|
||||
shadow_mode,
|
||||
ignore_partition_skip_marker,
|
||||
max_input_files_per_partition,
|
||||
|
@ -73,6 +75,8 @@ pub fn log_config(config: &Config) {
|
|||
?compact_version,
|
||||
min_num_l1_files_to_compact,
|
||||
process_once,
|
||||
simulate_without_object_store,
|
||||
all_errors_are_fatal,
|
||||
"config",
|
||||
);
|
||||
}
|
||||
|
@ -86,6 +90,7 @@ pub fn log_components(components: &Components) {
|
|||
partition_files_source,
|
||||
files_filter,
|
||||
partition_filter,
|
||||
partition_resource_limit_filter,
|
||||
partition_done_sink,
|
||||
commit,
|
||||
tables_source,
|
||||
|
@ -108,6 +113,7 @@ pub fn log_components(components: &Components) {
|
|||
%partition_files_source,
|
||||
%files_filter,
|
||||
%partition_filter,
|
||||
%partition_resource_limit_filter,
|
||||
%partition_done_sink,
|
||||
%commit,
|
||||
%tables_source,
|
||||
|
|
|
@ -5,6 +5,7 @@ use parquet_file::ParquetFilePath;
|
|||
use uuid::Uuid;
|
||||
|
||||
pub mod ignore_writes_object_store;
|
||||
pub mod noop;
|
||||
pub mod prod;
|
||||
mod util;
|
||||
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use parquet_file::ParquetFilePath;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{Scratchpad, ScratchpadGen};
|
||||
|
||||
/// A scratchpad that ignores all inputs and outputs, for use in testing
|
||||
#[derive(Debug, Default)]
|
||||
pub struct NoopScratchpadGen;
|
||||
|
||||
impl NoopScratchpadGen {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for NoopScratchpadGen {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "noop")
|
||||
}
|
||||
}
|
||||
|
||||
impl ScratchpadGen for NoopScratchpadGen {
|
||||
fn pad(&self) -> Box<dyn Scratchpad> {
|
||||
Box::new(NoopScratchpad)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct NoopScratchpad;
|
||||
|
||||
#[async_trait]
|
||||
impl Scratchpad for NoopScratchpad {
|
||||
async fn load_to_scratchpad(&mut self, files: &[ParquetFilePath]) -> Vec<Uuid> {
|
||||
files.iter().map(|f| f.objest_store_id()).collect()
|
||||
}
|
||||
|
||||
async fn make_public(&mut self, files: &[ParquetFilePath]) -> Vec<Uuid> {
|
||||
files.iter().map(|f| f.objest_store_id()).collect()
|
||||
}
|
||||
|
||||
async fn clean_from_scratchpad(&mut self, _files: &[ParquetFilePath]) {}
|
||||
|
||||
async fn clean(&mut self) {}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
//! Config-related stuff.
|
||||
use std::{collections::HashSet, num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
use std::{collections::HashSet, fmt::Display, num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::{PartitionId, ShardId, ShardIndex};
|
||||
|
@ -74,10 +74,8 @@ pub struct Config {
|
|||
/// Maximum duration of the per-partition compaction task.
|
||||
pub partition_timeout: Duration,
|
||||
|
||||
/// Filter partitions to the given set of IDs.
|
||||
///
|
||||
/// This is mostly useful for debugging.
|
||||
pub partition_filter: Option<HashSet<PartitionId>>,
|
||||
/// Source of partitions to consider for comapction.
|
||||
pub partitions_source: PartitionsSourceConfig,
|
||||
|
||||
/// Shadow mode.
|
||||
///
|
||||
|
@ -111,6 +109,19 @@ pub struct Config {
|
|||
|
||||
/// Only process all discovered partitions once.
|
||||
pub process_once: bool,
|
||||
|
||||
/// Simulate compactor w/o any object store interaction. No parquet
|
||||
/// files will be read or written.
|
||||
///
|
||||
/// This will still use the catalog
|
||||
///
|
||||
/// This is mostly useful for testing.
|
||||
pub simulate_without_object_store: bool,
|
||||
|
||||
/// Ensure that ALL errors (including object store errors) result in "skipped" partitions.
|
||||
///
|
||||
/// This is mostly useful for testing.
|
||||
pub all_errors_are_fatal: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
|
@ -191,3 +202,34 @@ pub enum AlgoVersion {
|
|||
/// NOT yet ready for production.
|
||||
TargetLevel,
|
||||
}
|
||||
|
||||
/// Partitions source config.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum PartitionsSourceConfig {
|
||||
/// Use the catalog to determine which partitions have recently received writes.
|
||||
CatalogRecentWrites,
|
||||
|
||||
/// Use all partitions from the catalog.
|
||||
///
|
||||
/// This does NOT consider if/when a partition received any writes.
|
||||
CatalogAll,
|
||||
|
||||
/// Use a fixed set of partitions.
|
||||
///
|
||||
/// This is mostly useful for debugging.
|
||||
Fixed(HashSet<PartitionId>),
|
||||
}
|
||||
|
||||
impl Display for PartitionsSourceConfig {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::CatalogRecentWrites => write!(f, "catalog_recent_writes"),
|
||||
Self::CatalogAll => write!(f, "catalog_all"),
|
||||
Self::Fixed(p_ids) => {
|
||||
let mut p_ids = p_ids.iter().copied().collect::<Vec<_>>();
|
||||
p_ids.sort();
|
||||
write!(f, "fixed({p_ids:?})")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -230,6 +230,21 @@ async fn try_compact_partition(
|
|||
// compaction
|
||||
let compaction_plan = build_compaction_plan(branch, Arc::clone(&components))?;
|
||||
|
||||
// Cannot run this plan and skip this partition because of over limit of input num_files or size.
|
||||
// The partition_resource_limit_filter will throw an error if one of the limits hit and will lead
|
||||
// to the partition is added to the `skipped_compactions` catalog table for us to not bother
|
||||
// compacting it again.
|
||||
// TODO: After https://github.com/influxdata/idpe/issues/17090 is iplemented (aka V3), we will
|
||||
// split files to smaller branches and aslo compact L0s into fewer L0s to deal with all kinds
|
||||
// of conidtions even with limited resource. Then we will remove this resrouce limit check.
|
||||
if !components
|
||||
.partition_resource_limit_filter
|
||||
.apply(partition_id, &compaction_plan.files_to_compact)
|
||||
.await?
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Compact
|
||||
let created_file_params = run_compaction_plan(
|
||||
&compaction_plan.files_to_compact,
|
||||
|
|
|
@ -178,7 +178,7 @@ mod error;
|
|||
mod partition_info;
|
||||
|
||||
#[cfg(test)]
|
||||
mod compactor_tests;
|
||||
mod tests;
|
||||
|
||||
pub mod file_group;
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
mod display;
|
||||
pub(crate) use display::{assert_parquet_files, assert_parquet_files_split};
|
||||
pub(crate) use display::{format_files, format_files_split};
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, HashSet},
|
||||
|
@ -19,7 +19,9 @@ use data_types::{
|
|||
};
|
||||
use datafusion::arrow::record_batch::RecordBatch;
|
||||
use futures::TryStreamExt;
|
||||
use iox_tests::util::{TestCatalog, TestParquetFileBuilder, TestTable};
|
||||
use iox_tests::util::{
|
||||
TestCatalog, TestNamespace, TestParquetFileBuilder, TestPartition, TestShard, TestTable,
|
||||
};
|
||||
use iox_time::TimeProvider;
|
||||
use object_store::{path::Path, DynObjectStore};
|
||||
use parquet_file::storage::{ParquetStorage, StorageId};
|
||||
|
@ -28,7 +30,7 @@ use uuid::Uuid;
|
|||
|
||||
use crate::{
|
||||
components::namespaces_source::mock::NamespaceWrapper,
|
||||
config::{AlgoVersion, Config},
|
||||
config::{AlgoVersion, Config, PartitionsSourceConfig},
|
||||
partition_info::PartitionInfo,
|
||||
};
|
||||
|
||||
|
@ -303,38 +305,18 @@ const SPLIT_PERCENTAGE: u16 = 80;
|
|||
const MIN_NUM_L1_FILES_TO_COMPACT: usize = 2;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TestSetupBuilder {
|
||||
with_files: bool,
|
||||
shadow_mode: bool,
|
||||
compact_version: AlgoVersion,
|
||||
pub struct TestSetupBuilder<const WITH_FILES: bool> {
|
||||
config: Config,
|
||||
catalog: Arc<TestCatalog>,
|
||||
ns: Arc<TestNamespace>,
|
||||
shard: Arc<TestShard>,
|
||||
table: Arc<TestTable>,
|
||||
partition: Arc<TestPartition>,
|
||||
files: Vec<ParquetFile>,
|
||||
}
|
||||
|
||||
impl Default for TestSetupBuilder {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
with_files: false,
|
||||
shadow_mode: false,
|
||||
compact_version: AlgoVersion::AllAtOnce,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TestSetupBuilder {
|
||||
pub fn with_files(self) -> Self {
|
||||
Self {
|
||||
with_files: true,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_shadow_mode(self) -> Self {
|
||||
Self {
|
||||
shadow_mode: true,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn build(self) -> TestSetup {
|
||||
impl TestSetupBuilder<false> {
|
||||
pub async fn new() -> Self {
|
||||
let catalog = TestCatalog::new();
|
||||
let ns = catalog.create_namespace_1hr_retention("ns").await;
|
||||
let shard = ns.create_shard(SHARD_INDEX).await;
|
||||
|
@ -344,7 +326,6 @@ impl TestSetupBuilder {
|
|||
table.create_column("tag2", ColumnType::Tag).await;
|
||||
table.create_column("tag3", ColumnType::Tag).await;
|
||||
table.create_column("time", ColumnType::Time).await;
|
||||
let table_schema = table.catalog_schema().await;
|
||||
|
||||
let partition = table
|
||||
.with_shard(&shard)
|
||||
|
@ -356,126 +337,7 @@ impl TestSetupBuilder {
|
|||
let sort_key = SortKey::from_columns(["tag1", "tag2", "tag3", "time"]);
|
||||
let partition = partition.update_sort_key(sort_key.clone()).await;
|
||||
|
||||
let candidate_partition = Arc::new(PartitionInfo {
|
||||
partition_id: partition.partition.id,
|
||||
namespace_id: ns.namespace.id,
|
||||
namespace_name: ns.namespace.name.clone(),
|
||||
table: Arc::new(table.table.clone()),
|
||||
table_schema: Arc::new(table_schema),
|
||||
sort_key: partition.partition.sort_key(),
|
||||
partition_key: partition.partition.partition_key.clone(),
|
||||
});
|
||||
|
||||
let time_provider = Arc::<iox_time::MockProvider>::clone(&catalog.time_provider);
|
||||
let mut parquet_files = vec![];
|
||||
if self.with_files {
|
||||
let time_1_minute_future = time_provider.minutes_into_future(1);
|
||||
let time_2_minutes_future = time_provider.minutes_into_future(2);
|
||||
let time_3_minutes_future = time_provider.minutes_into_future(3);
|
||||
let time_5_minutes_future = time_provider.minutes_into_future(5);
|
||||
|
||||
// L1 file
|
||||
let lp = vec![
|
||||
"table,tag2=PA,tag3=15 field_int=1601i 30000",
|
||||
"table,tag2=OH,tag3=21 field_int=21i 36000", // will be eliminated due to duplicate
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_3_minutes_future)
|
||||
.with_max_l0_created_at(time_1_minute_future)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
|
||||
let level_1_file_1_minute_ago = partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec![
|
||||
"table,tag1=WA field_int=1000i 8000", // will be eliminated due to duplicate
|
||||
"table,tag1=VT field_int=10i 10000", // latest L0 compared with duplicate in level_1_file_1_minute_ago_with_duplicates
|
||||
// keep it
|
||||
"table,tag1=UT field_int=70i 20000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_2_minutes_future)
|
||||
.with_max_l0_created_at(time_2_minutes_future)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let level_0_file_16_minutes_ago = partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec![
|
||||
"table,tag1=WA field_int=1500i 8000", // latest duplicate and kept
|
||||
"table,tag1=VT field_int=10i 6000",
|
||||
"table,tag1=UT field_int=270i 25000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_5_minutes_future)
|
||||
.with_max_l0_created_at(time_5_minutes_future)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let level_0_file_5_minutes_ago = partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L1 file
|
||||
let lp = vec![
|
||||
"table,tag1=VT field_int=88i 10000", // will be eliminated due to duplicate.
|
||||
// Note: created time more recent than level_0_file_16_minutes_ago
|
||||
// but always considered older ingested data
|
||||
"table,tag1=OR field_int=99i 12000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_5_minutes_future)
|
||||
.with_max_l0_created_at(time_3_minutes_future)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
|
||||
let level_1_file_1_minute_ago_with_duplicates: ParquetFile =
|
||||
partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec!["table,tag2=OH,tag3=21 field_int=22i 36000"].join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_min_time(0)
|
||||
.with_max_time(36000)
|
||||
.with_creation_time(time_5_minutes_future)
|
||||
.with_max_l0_created_at(time_5_minutes_future)
|
||||
// Will put the group size between "small" and "large"
|
||||
.with_size_override(50 * 1024 * 1024)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let medium_level_0_file_time_now = partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec![
|
||||
"table,tag1=VT field_int=10i 68000",
|
||||
"table,tag2=OH,tag3=21 field_int=210i 136000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_min_time(36001)
|
||||
.with_max_time(136000)
|
||||
.with_creation_time(time_2_minutes_future)
|
||||
.with_max_l0_created_at(time_2_minutes_future)
|
||||
// Will put the group size two multiples over "large"
|
||||
.with_size_override(180 * 1024 * 1024)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let large_level_0_file_2_2_minutes_ago =
|
||||
partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// Order here isn't relevant; the chunk order should ensure the level 1 files are ordered
|
||||
// first, then the other files by max seq num.
|
||||
parquet_files = vec![
|
||||
level_1_file_1_minute_ago,
|
||||
level_0_file_16_minutes_ago,
|
||||
level_0_file_5_minutes_ago,
|
||||
level_1_file_1_minute_ago_with_duplicates,
|
||||
medium_level_0_file_time_now,
|
||||
large_level_0_file_2_2_minutes_ago,
|
||||
];
|
||||
}
|
||||
|
||||
let config = Arc::new(Config {
|
||||
let config = Config {
|
||||
shard_id: shard.shard.id,
|
||||
metric_registry: catalog.metric_registry(),
|
||||
catalog: catalog.catalog(),
|
||||
|
@ -484,7 +346,7 @@ impl TestSetupBuilder {
|
|||
Arc::new(object_store::memory::InMemory::new()),
|
||||
StorageId::from("scratchpad"),
|
||||
),
|
||||
time_provider,
|
||||
time_provider: catalog.time_provider(),
|
||||
exec: Arc::clone(&catalog.exec),
|
||||
backoff_config: BackoffConfig::default(),
|
||||
partition_concurrency: NonZeroUsize::new(1).unwrap(),
|
||||
|
@ -495,23 +357,248 @@ impl TestSetupBuilder {
|
|||
percentage_max_file_size: PERCENTAGE_MAX_FILE_SIZE,
|
||||
split_percentage: SPLIT_PERCENTAGE,
|
||||
partition_timeout: Duration::from_secs(3_600),
|
||||
partition_filter: None,
|
||||
shadow_mode: self.shadow_mode,
|
||||
partitions_source: PartitionsSourceConfig::CatalogRecentWrites,
|
||||
shadow_mode: false,
|
||||
ignore_partition_skip_marker: false,
|
||||
max_input_files_per_partition: usize::MAX,
|
||||
max_input_parquet_bytes_per_partition: usize::MAX,
|
||||
shard_config: None,
|
||||
compact_version: self.compact_version,
|
||||
compact_version: AlgoVersion::AllAtOnce,
|
||||
min_num_l1_files_to_compact: MIN_NUM_L1_FILES_TO_COMPACT,
|
||||
process_once: true,
|
||||
simulate_without_object_store: false,
|
||||
all_errors_are_fatal: true,
|
||||
};
|
||||
|
||||
Self {
|
||||
config,
|
||||
catalog,
|
||||
ns,
|
||||
shard,
|
||||
table,
|
||||
partition,
|
||||
files: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn with_files(self) -> TestSetupBuilder<true> {
|
||||
let time_provider = self.catalog.time_provider();
|
||||
let time_1_minute_future = time_provider.minutes_into_future(1);
|
||||
let time_2_minutes_future = time_provider.minutes_into_future(2);
|
||||
let time_3_minutes_future = time_provider.minutes_into_future(3);
|
||||
let time_5_minutes_future = time_provider.minutes_into_future(5);
|
||||
|
||||
// L1 file
|
||||
let lp = vec![
|
||||
"table,tag2=PA,tag3=15 field_int=1601i 30000",
|
||||
"table,tag2=OH,tag3=21 field_int=21i 36000", // will be eliminated due to duplicate
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_3_minutes_future)
|
||||
.with_max_l0_created_at(time_1_minute_future)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
|
||||
let level_1_file_1_minute_ago = self.partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec![
|
||||
"table,tag1=WA field_int=1000i 8000", // will be eliminated due to duplicate
|
||||
"table,tag1=VT field_int=10i 10000", // latest L0 compared with duplicate in level_1_file_1_minute_ago_with_duplicates
|
||||
// keep it
|
||||
"table,tag1=UT field_int=70i 20000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_2_minutes_future)
|
||||
.with_max_l0_created_at(time_2_minutes_future)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let level_0_file_16_minutes_ago = self.partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec![
|
||||
"table,tag1=WA field_int=1500i 8000", // latest duplicate and kept
|
||||
"table,tag1=VT field_int=10i 6000",
|
||||
"table,tag1=UT field_int=270i 25000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_5_minutes_future)
|
||||
.with_max_l0_created_at(time_5_minutes_future)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let level_0_file_5_minutes_ago = self.partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L1 file
|
||||
let lp = vec![
|
||||
"table,tag1=VT field_int=88i 10000", // will be eliminated due to duplicate.
|
||||
// Note: created time more recent than level_0_file_16_minutes_ago
|
||||
// but always considered older ingested data
|
||||
"table,tag1=OR field_int=99i 12000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_creation_time(time_5_minutes_future)
|
||||
.with_max_l0_created_at(time_3_minutes_future)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
|
||||
let level_1_file_1_minute_ago_with_duplicates: ParquetFile =
|
||||
self.partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec!["table,tag2=OH,tag3=21 field_int=22i 36000"].join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_min_time(0)
|
||||
.with_max_time(36000)
|
||||
.with_creation_time(time_5_minutes_future)
|
||||
.with_max_l0_created_at(time_5_minutes_future)
|
||||
// Will put the group size between "small" and "large"
|
||||
.with_size_override(50 * 1024 * 1024)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let medium_level_0_file_time_now = self.partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// L0 file
|
||||
let lp = vec![
|
||||
"table,tag1=VT field_int=10i 68000",
|
||||
"table,tag2=OH,tag3=21 field_int=210i 136000",
|
||||
]
|
||||
.join("\n");
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_line_protocol(&lp)
|
||||
.with_min_time(36001)
|
||||
.with_max_time(136000)
|
||||
.with_creation_time(time_2_minutes_future)
|
||||
.with_max_l0_created_at(time_2_minutes_future)
|
||||
// Will put the group size two multiples over "large"
|
||||
.with_size_override(180 * 1024 * 1024)
|
||||
.with_compaction_level(CompactionLevel::Initial);
|
||||
let large_level_0_file_2_2_minutes_ago =
|
||||
self.partition.create_parquet_file(builder).await.into();
|
||||
|
||||
// Order here isn't relevant; the chunk order should ensure the level 1 files are ordered
|
||||
// first, then the other files by max seq num.
|
||||
let files = vec![
|
||||
level_1_file_1_minute_ago,
|
||||
level_0_file_16_minutes_ago,
|
||||
level_0_file_5_minutes_ago,
|
||||
level_1_file_1_minute_ago_with_duplicates,
|
||||
medium_level_0_file_time_now,
|
||||
large_level_0_file_2_2_minutes_ago,
|
||||
];
|
||||
|
||||
TestSetupBuilder::<true> {
|
||||
config: self.config,
|
||||
catalog: self.catalog,
|
||||
ns: self.ns,
|
||||
shard: self.shard,
|
||||
table: self.table,
|
||||
partition: self.partition,
|
||||
files,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TestSetupBuilder<true> {
|
||||
pub fn with_max_input_files_per_partition_relative_to_n_files(self, delta: isize) -> Self {
|
||||
Self {
|
||||
config: Config {
|
||||
max_input_parquet_bytes_per_partition: (self.files.len() as isize + delta) as usize,
|
||||
..self.config
|
||||
},
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set max_input_parquet_bytes_per_partition
|
||||
pub fn with_max_input_parquet_bytes_per_partition_relative_to_total_size(
|
||||
self,
|
||||
delta: isize,
|
||||
) -> Self {
|
||||
let total_size = self.files.iter().map(|f| f.file_size_bytes).sum::<i64>();
|
||||
Self {
|
||||
config: Config {
|
||||
max_input_parquet_bytes_per_partition: (total_size as isize + delta) as usize,
|
||||
..self.config
|
||||
},
|
||||
..self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
|
||||
pub fn with_shadow_mode(self) -> Self {
|
||||
Self {
|
||||
config: Config {
|
||||
shadow_mode: true,
|
||||
..self.config
|
||||
},
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set compact version
|
||||
pub fn with_compact_version(self, compact_version: AlgoVersion) -> Self {
|
||||
Self {
|
||||
config: Config {
|
||||
compact_version,
|
||||
..self.config
|
||||
},
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// set min_num_l1_files_to_compact
|
||||
pub fn with_min_num_l1_files_to_compact(self, min_num_l1_files_to_compact: usize) -> Self {
|
||||
Self {
|
||||
config: Config {
|
||||
min_num_l1_files_to_compact,
|
||||
..self.config
|
||||
},
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set max_input_files_per_partition
|
||||
pub fn with_max_input_files_per_partition(self, max_input_files_per_partition: usize) -> Self {
|
||||
Self {
|
||||
config: Config {
|
||||
max_input_files_per_partition,
|
||||
..self.config
|
||||
},
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simulate_without_object_store(self) -> Self {
|
||||
Self {
|
||||
config: Config {
|
||||
simulate_without_object_store: true,
|
||||
..self.config
|
||||
},
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn build(self) -> TestSetup {
|
||||
let candidate_partition = Arc::new(PartitionInfo {
|
||||
partition_id: self.partition.partition.id,
|
||||
namespace_id: self.ns.namespace.id,
|
||||
namespace_name: self.ns.namespace.name.clone(),
|
||||
table: Arc::new(self.table.table.clone()),
|
||||
table_schema: Arc::new(self.table.catalog_schema().await),
|
||||
sort_key: self.partition.partition.sort_key(),
|
||||
partition_key: self.partition.partition.partition_key.clone(),
|
||||
});
|
||||
|
||||
TestSetup {
|
||||
files: Arc::new(parquet_files),
|
||||
files: Arc::new(self.files),
|
||||
partition_info: candidate_partition,
|
||||
catalog,
|
||||
table,
|
||||
config,
|
||||
catalog: self.catalog,
|
||||
table: self.table,
|
||||
partition: self.partition,
|
||||
config: Arc::new(self.config),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -521,12 +608,13 @@ pub struct TestSetup {
|
|||
pub partition_info: Arc<PartitionInfo>,
|
||||
pub catalog: Arc<TestCatalog>,
|
||||
pub table: Arc<TestTable>,
|
||||
pub partition: Arc<TestPartition>,
|
||||
pub config: Arc<Config>,
|
||||
}
|
||||
|
||||
impl TestSetup {
|
||||
pub fn builder() -> TestSetupBuilder {
|
||||
TestSetupBuilder::default()
|
||||
pub async fn builder() -> TestSetupBuilder<false> {
|
||||
TestSetupBuilder::new().await
|
||||
}
|
||||
|
||||
/// Get the catalog files stored in the catalog
|
||||
|
@ -542,18 +630,6 @@ impl TestSetup {
|
|||
self.table.read_parquet_file(file).await
|
||||
}
|
||||
|
||||
/// Set compact version
|
||||
pub fn set_compact_version(&mut self, compact_version: AlgoVersion) {
|
||||
let mut config = Arc::get_mut(&mut self.config).unwrap();
|
||||
config.compact_version = compact_version;
|
||||
}
|
||||
|
||||
/// set min_num_l1_files_to_compact
|
||||
pub fn set_min_num_l1_files_to_compact(&mut self, min_num_l1_files_to_compact: usize) {
|
||||
let mut config = Arc::get_mut(&mut self.config).unwrap();
|
||||
config.min_num_l1_files_to_compact = min_num_l1_files_to_compact;
|
||||
}
|
||||
|
||||
/// return a set of times relative to config.time_provider.now()
|
||||
pub fn test_times(&self) -> TestTimes {
|
||||
TestTimes::new(self.config.time_provider.as_ref())
|
||||
|
|
|
@ -2,61 +2,29 @@ use std::collections::BTreeMap;
|
|||
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
|
||||
/// Compares the a vec of strs with the output of a set of parquet
|
||||
/// files. See docs on [`ParquetFileFormatter`] for example
|
||||
/// expected output.
|
||||
///
|
||||
/// Designed so that failure output can be directly copy/pasted
|
||||
/// into the test code as expected results.
|
||||
///
|
||||
/// Expects to be called about like this:
|
||||
/// assert_parquet_files!(expected_lines: &[&str], &files)
|
||||
#[track_caller]
|
||||
pub fn assert_parquet_files<'a>(
|
||||
expected_lines: impl IntoIterator<Item = &'a str>,
|
||||
files: &[ParquetFile],
|
||||
) {
|
||||
let expected_lines: Vec<String> = expected_lines.into_iter().map(|s| s.to_string()).collect();
|
||||
|
||||
let actual_lines = readable_list_of_files(None, files);
|
||||
|
||||
assert_eq!(
|
||||
expected_lines, actual_lines,
|
||||
"\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n",
|
||||
);
|
||||
/// Formats the list of files in the manner described on
|
||||
/// [`ParquetFileFormatter`] into strings suitable for comparison with
|
||||
/// `insta`.
|
||||
pub fn format_files<'a>(
|
||||
title: impl Into<String>,
|
||||
files: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
) -> Vec<String> {
|
||||
readable_list_of_files(Some(title.into()), files)
|
||||
}
|
||||
|
||||
/// Compares the a vec of strs with the output of a set of parquet
|
||||
/// files. This is used to compare the results of splitting files into
|
||||
/// two groups. See docs on [`ParquetFileFormatter`] for example
|
||||
/// expected output.
|
||||
///
|
||||
/// Designed so that failure output can be directly copy/pasted
|
||||
/// into the test code as expected results.
|
||||
///
|
||||
/// Expects to be called about like this:
|
||||
/// assert_parquet_files_split!(expected_lines: &[&str], &files1, &files2)
|
||||
#[track_caller]
|
||||
pub fn assert_parquet_files_split<'a>(
|
||||
expected_lines: impl IntoIterator<Item = &'a str>,
|
||||
files1: &[ParquetFile],
|
||||
files2: &[ParquetFile],
|
||||
) {
|
||||
let expected_lines: Vec<String> = expected_lines.into_iter().map(|s| s.to_string()).collect();
|
||||
/// Formats two lists of files in the manner described on
|
||||
/// [`ParquetFileFormatter`] into strings suitable for comparison with
|
||||
/// `insta`.
|
||||
pub fn format_files_split<'a>(
|
||||
title1: impl Into<String>,
|
||||
files1: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
title2: impl Into<String>,
|
||||
files2: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
) -> Vec<String> {
|
||||
let strings1 = readable_list_of_files(Some(title1.into()), files1);
|
||||
let strings2 = readable_list_of_files(Some(title2.into()), files2);
|
||||
|
||||
let actual_lines_one = readable_list_of_files(Some("left".into()), files1);
|
||||
|
||||
let actual_lines_two = readable_list_of_files(Some("right".into()), files2);
|
||||
|
||||
let actual_lines: Vec<_> = actual_lines_one
|
||||
.into_iter()
|
||||
.chain(actual_lines_two.into_iter())
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
expected_lines, actual_lines,
|
||||
"\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n",
|
||||
);
|
||||
strings1.into_iter().chain(strings2.into_iter()).collect()
|
||||
}
|
||||
|
||||
/// default width for printing
|
||||
|
@ -69,9 +37,8 @@ const DEFAULT_HEADING_WIDTH: usize = 20;
|
|||
/// parquet files arranged so they are lined up horizontally based on
|
||||
/// their relative time range.
|
||||
///
|
||||
/// See docs on [`ParquetFileFormatter`]
|
||||
/// for examples.
|
||||
pub fn readable_list_of_files<'a>(
|
||||
/// See docs on [`ParquetFileFormatter`]z for examples.
|
||||
fn readable_list_of_files<'a>(
|
||||
title: Option<String>,
|
||||
files: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
) -> Vec<String> {
|
||||
|
@ -127,7 +94,7 @@ pub fn readable_list_of_files<'a>(
|
|||
#[derive(Debug, Default)]
|
||||
struct ParquetFileFormatter {
|
||||
/// should the size of the files be shown (if they are different)
|
||||
show_size: bool,
|
||||
file_size_seen: FileSizeSeen,
|
||||
/// width in characater
|
||||
row_heading_chars: usize,
|
||||
/// width, in characters, of the entire min/max timerange
|
||||
|
@ -140,9 +107,10 @@ struct ParquetFileFormatter {
|
|||
max_time: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Default)]
|
||||
/// helper to track if there are multiple file sizes in a set of parquet files
|
||||
enum FileSizeSeen {
|
||||
#[default]
|
||||
None,
|
||||
One(i64),
|
||||
Many,
|
||||
|
@ -182,15 +150,12 @@ impl ParquetFileFormatter {
|
|||
file_size_seen.observe(file.file_size_bytes)
|
||||
});
|
||||
|
||||
// show the size if there are multiple sizes
|
||||
let show_size = matches!(file_size_seen, FileSizeSeen::Many);
|
||||
|
||||
let time_range = max_time - min_time;
|
||||
|
||||
let ns_per_char = (time_range as f64) / (width_chars as f64);
|
||||
|
||||
Self {
|
||||
show_size,
|
||||
file_size_seen,
|
||||
width_chars,
|
||||
ns_per_char,
|
||||
min_time,
|
||||
|
@ -212,9 +177,14 @@ impl ParquetFileFormatter {
|
|||
}
|
||||
|
||||
fn format_level(&self, level: &CompactionLevel) -> String {
|
||||
let level_heading = display_level(level);
|
||||
let level_heading = match self.file_size_seen {
|
||||
FileSizeSeen::One(sz) => format!("{level_heading}, all files {sz}b"),
|
||||
_ => level_heading.into(),
|
||||
};
|
||||
|
||||
format!(
|
||||
"{:width$}",
|
||||
display_level(level),
|
||||
"{level_heading:width$}",
|
||||
width = self.width_chars + self.row_heading_chars
|
||||
)
|
||||
}
|
||||
|
@ -239,7 +209,9 @@ impl ParquetFileFormatter {
|
|||
// Get compact display of the file, like 'L0.1'
|
||||
// add |--- ---| formatting (based on field width)
|
||||
let file_string = format!("|{:-^width$}|", display_file_id(file), width = field_width);
|
||||
let row_heading = display_format(file, self.show_size);
|
||||
// show indvidual file sizes if they are different
|
||||
let show_size = matches!(self.file_size_seen, FileSizeSeen::Many);
|
||||
let row_heading = display_format(file, show_size);
|
||||
|
||||
// special case "zero" width times
|
||||
if self.min_time == self.max_time {
|
||||
|
@ -298,7 +270,7 @@ fn display_format(file: &ParquetFile, show_size: bool) -> String {
|
|||
let max_time = file.max_time.get(); // display as i64
|
||||
let sz = file.file_size_bytes;
|
||||
if show_size {
|
||||
format!("{file_id}[{min_time},{max_time}]@{sz}")
|
||||
format!("{file_id}[{min_time},{max_time}] {sz}b")
|
||||
} else {
|
||||
format!("{file_id}[{min_time},{max_time}]")
|
||||
}
|
||||
|
@ -321,13 +293,16 @@ mod test {
|
|||
.build(),
|
||||
];
|
||||
|
||||
let expected = vec![
|
||||
"L0 ",
|
||||
"L0.1[0,0] |-------------------------------------L0.1-------------------------------------|",
|
||||
"L0.2[0,0] |-------------------------------------L0.2-------------------------------------|",
|
||||
];
|
||||
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("display", &files),
|
||||
@r###"
|
||||
---
|
||||
- display
|
||||
- "L0, all files 1b "
|
||||
- "L0.1[0,0] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[0,0] |-------------------------------------L0.2-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -345,15 +320,18 @@ mod test {
|
|||
.build(),
|
||||
];
|
||||
|
||||
let expected = vec![
|
||||
"L0 ",
|
||||
"L0.1[0,0]@1 |-------------------------------------L0.1-------------------------------------|",
|
||||
"L0.2[0,0]@1 |-------------------------------------L0.2-------------------------------------|",
|
||||
"L2 ",
|
||||
"L2.3[0,0]@42 |-------------------------------------L2.3-------------------------------------|",
|
||||
];
|
||||
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("display", &files),
|
||||
@r###"
|
||||
---
|
||||
- display
|
||||
- "L0 "
|
||||
- "L0.1[0,0] 1b |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[0,0] 1b |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L2 "
|
||||
- "L2.3[0,0] 42b |-------------------------------------L2.3-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -375,13 +353,16 @@ mod test {
|
|||
.build(),
|
||||
];
|
||||
|
||||
let expected = vec![
|
||||
"L0 ",
|
||||
"L0.1[100,200]@1 |----------L0.1----------| ",
|
||||
"L0.2[300,400]@1 |----------L0.2----------| ",
|
||||
"L0.11[150,350]@44 |-----------------------L0.11-----------------------| ",
|
||||
];
|
||||
|
||||
assert_parquet_files(expected, &files);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("display", &files),
|
||||
@r###"
|
||||
---
|
||||
- display
|
||||
- "L0 "
|
||||
- "L0.1[100,200] 1b |----------L0.1----------| "
|
||||
- "L0.2[300,400] 1b |----------L0.2----------| "
|
||||
- "L0.11[150,350] 44b |-----------------------L0.11-----------------------| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,692 @@
|
|||
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
use data_types::{CompactionLevel, ParquetFile, PartitionId};
|
||||
use iox_query::exec::ExecutorType;
|
||||
use iox_tests::util::TestParquetFileBuilder;
|
||||
use tracker::AsyncSemaphoreMetrics;
|
||||
|
||||
use crate::{
|
||||
components::{
|
||||
df_planner::panic::PanicDataFusionPlanner, hardcoded::hardcoded_components, Components,
|
||||
},
|
||||
config::AlgoVersion,
|
||||
driver::compact,
|
||||
test_util::{format_files, list_object_store, TestSetup},
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_no_file() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// no files
|
||||
let setup = TestSetup::builder().await.build().await;
|
||||
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert!(files.is_empty());
|
||||
|
||||
// compact
|
||||
run_compact(&setup).await;
|
||||
|
||||
// verify catalog is still empty
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert!(files.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_num_files_over_limit() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
for version in [AlgoVersion::AllAtOnce, AlgoVersion::TargetLevel] {
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.with_files()
|
||||
.await
|
||||
.with_compact_version(version)
|
||||
// Set max num file to 4 (< num files) --> it won't get comapcted
|
||||
.with_max_input_files_per_partition(4)
|
||||
.build()
|
||||
.await;
|
||||
|
||||
// verify 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 6);
|
||||
// verify ID and compaction level of the files
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
|
||||
run_compact(&setup).await;
|
||||
//
|
||||
// read files and verify they are not compacted
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 6);
|
||||
//
|
||||
// verify ID and compaction level of the files
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_total_file_size_over_limit() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
for version in [AlgoVersion::AllAtOnce, AlgoVersion::TargetLevel] {
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.with_files()
|
||||
.await
|
||||
// Set max size < the input file size --> it won't get compacted
|
||||
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
|
||||
.with_compact_version(version)
|
||||
.build()
|
||||
.await;
|
||||
|
||||
// verify 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 6);
|
||||
|
||||
// verify ID and compaction level of the files
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
|
||||
run_compact(&setup).await;
|
||||
|
||||
// read files and verify they are not compacted
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 6);
|
||||
|
||||
// verify ID and compaction level of the files
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_all_at_once() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.with_files()
|
||||
.await
|
||||
// Ensure we have enough resource to compact the files
|
||||
.with_max_input_files_per_partition_relative_to_n_files(10)
|
||||
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(1000)
|
||||
.with_compact_version(AlgoVersion::AllAtOnce)
|
||||
.build()
|
||||
.await;
|
||||
|
||||
// verify 6 files
|
||||
// verify ID and compaction level of the files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
|
||||
// verify ID and max_l0_created_at
|
||||
let times = setup.test_times();
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
vec![
|
||||
(1, times.time_1_minute_future),
|
||||
(2, times.time_2_minutes_future),
|
||||
(3, times.time_5_minutes_future),
|
||||
(4, times.time_3_minutes_future),
|
||||
(5, times.time_5_minutes_future),
|
||||
(6, times.time_2_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// compact
|
||||
run_compact(&setup).await;
|
||||
|
||||
// verify number of files: 6 files are compacted into 2 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(7, CompactionLevel::FileNonOverlapped),
|
||||
(8, CompactionLevel::FileNonOverlapped),
|
||||
],
|
||||
);
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
// both files have max_l0_created time_5_minutes_future
|
||||
// which is the max of all L0 input's max_l0_created_at
|
||||
vec![
|
||||
(7, times.time_5_minutes_future),
|
||||
(8, times.time_5_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// verify the content of files
|
||||
// Compacted smaller file with the later data
|
||||
let mut files = setup.list_by_table_not_to_delete().await;
|
||||
let file1 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file1).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
// Compacted larger file with the earlier data
|
||||
let file0 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file0).await;
|
||||
assert_batches_sorted_eq!(
|
||||
[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_target_level() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.with_files()
|
||||
.await
|
||||
// Ensure we have enough resource to compact the files
|
||||
.with_max_input_files_per_partition_relative_to_n_files(10)
|
||||
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(1000)
|
||||
.with_compact_version(AlgoVersion::TargetLevel)
|
||||
.with_min_num_l1_files_to_compact(2)
|
||||
.build()
|
||||
.await;
|
||||
|
||||
// verify 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
|
||||
// verify ID and max_l0_created_at
|
||||
let times = setup.test_times();
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
vec![
|
||||
(1, times.time_1_minute_future),
|
||||
(2, times.time_2_minutes_future),
|
||||
(3, times.time_5_minutes_future),
|
||||
(4, times.time_3_minutes_future),
|
||||
(5, times.time_5_minutes_future),
|
||||
(6, times.time_2_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// compact
|
||||
run_compact(&setup).await;
|
||||
|
||||
// verify number of files: 6 files are compacted into 2 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 2);
|
||||
|
||||
assert_levels(
|
||||
&files,
|
||||
// This is the result of 2-round compaction fomr L0s -> L1s and then L1s -> L2s
|
||||
// The first round will create two L1 files IDs 7 and 8
|
||||
// The second round will create tow L2 file IDs 9 and 10
|
||||
vec![(9, CompactionLevel::Final), (10, CompactionLevel::Final)],
|
||||
);
|
||||
|
||||
assert_max_l0_created_at(
|
||||
&files,
|
||||
// both files have max_l0_created time_5_minutes_future
|
||||
// which is the max of all L0 input's max_l0_created_at
|
||||
vec![
|
||||
(9, times.time_5_minutes_future),
|
||||
(10, times.time_5_minutes_future),
|
||||
],
|
||||
);
|
||||
|
||||
// verify the content of files
|
||||
// Compacted smaller file with the later data
|
||||
let mut files = setup.list_by_table_not_to_delete().await;
|
||||
let file1 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file1).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
// Compacted larger file with the earlier data
|
||||
let file0 = files.pop().unwrap();
|
||||
let batches = setup.read_parquet_file(file0).await;
|
||||
assert_batches_sorted_eq!(
|
||||
[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_skip_compact() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder().await.with_files().await.build().await;
|
||||
|
||||
let expected_files_and_levels = vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
];
|
||||
|
||||
// verify 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(&files, expected_files_and_levels.clone());
|
||||
|
||||
// add the partition into skipped compaction
|
||||
setup
|
||||
.catalog
|
||||
.add_to_skipped_compaction(setup.partition_info.partition_id, "test reason")
|
||||
.await;
|
||||
|
||||
// compact but nothing will be compacted because the partition is skipped
|
||||
run_compact(&setup).await;
|
||||
|
||||
// verify still 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_levels(&files, expected_files_and_levels.clone());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_partition_fail() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder().await.with_files().await.build().await;
|
||||
|
||||
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
|
||||
assert!(!catalog_files_pre.is_empty());
|
||||
|
||||
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
|
||||
assert!(!object_store_files_pre.is_empty());
|
||||
|
||||
run_compact_failing(&setup).await;
|
||||
|
||||
let catalog_files_post = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(catalog_files_pre, catalog_files_post);
|
||||
|
||||
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
|
||||
assert_eq!(object_store_files_pre, object_store_files_post);
|
||||
|
||||
assert_skipped_compactions(
|
||||
&setup,
|
||||
[(
|
||||
setup.partition_info.partition_id,
|
||||
"serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: foo",
|
||||
)],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shadow_mode() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.with_files()
|
||||
.await
|
||||
.with_shadow_mode()
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
|
||||
assert!(!catalog_files_pre.is_empty());
|
||||
|
||||
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
|
||||
assert!(!object_store_files_pre.is_empty());
|
||||
|
||||
run_compact(&setup).await;
|
||||
|
||||
let catalog_files_post = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(catalog_files_pre, catalog_files_post);
|
||||
|
||||
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
|
||||
assert_eq!(object_store_files_pre, object_store_files_post);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shadow_mode_partition_fail() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.with_files()
|
||||
.await
|
||||
.with_shadow_mode()
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
|
||||
assert!(!catalog_files_pre.is_empty());
|
||||
|
||||
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
|
||||
assert!(!object_store_files_pre.is_empty());
|
||||
|
||||
run_compact_failing(&setup).await;
|
||||
|
||||
let catalog_files_post = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(catalog_files_pre, catalog_files_post);
|
||||
|
||||
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
|
||||
assert_eq!(object_store_files_pre, object_store_files_post);
|
||||
|
||||
assert_skipped_compactions(&setup, []).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pr6890() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.simulate_without_object_store()
|
||||
.build()
|
||||
.await;
|
||||
|
||||
// create virtual files
|
||||
let mut input_files = vec![];
|
||||
for _ in 0..10 {
|
||||
let file = setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
TestParquetFileBuilder::default()
|
||||
.with_min_time(100)
|
||||
.with_max_time(200)
|
||||
.with_file_size_bytes(1_000_000) // 1MB
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
// need some LP to generate the schema
|
||||
.with_line_protocol("table,tag1=A,tag2=B,tag3=C field_int=1i 100"),
|
||||
)
|
||||
.await
|
||||
.parquet_file;
|
||||
input_files.push(file);
|
||||
}
|
||||
|
||||
setup.catalog.time_provider.inc(Duration::from_nanos(200));
|
||||
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("input", &input_files),
|
||||
@r###"
|
||||
---
|
||||
- input
|
||||
- "L0, all files 1000000b "
|
||||
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,200] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.3[100,200] |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.4[100,200] |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.5[100,200] |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.6[100,200] |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.7[100,200] |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.8[100,200] |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.9[100,200] |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.10[100,200] |------------------------------------L0.10-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
|
||||
run_compact(&setup).await;
|
||||
assert_skipped_compactions(&setup, []).await;
|
||||
|
||||
let output_files = setup.list_by_table_not_to_delete().await;
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("input", &output_files),
|
||||
@r###"
|
||||
---
|
||||
- input
|
||||
- "L1, all files 1b "
|
||||
- "L1.11[0,0] |------------------------------------L1.11-------------------------------------|"
|
||||
- "L1.12[0,0] |------------------------------------L1.12-------------------------------------|"
|
||||
- "L1.13[0,0] |------------------------------------L1.13-------------------------------------|"
|
||||
- "L1.14[0,0] |------------------------------------L1.14-------------------------------------|"
|
||||
- "L1.15[0,0] |------------------------------------L1.15-------------------------------------|"
|
||||
- "L1.16[0,0] |------------------------------------L1.16-------------------------------------|"
|
||||
- "L1.17[0,0] |------------------------------------L1.17-------------------------------------|"
|
||||
- "L1.18[0,0] |------------------------------------L1.18-------------------------------------|"
|
||||
- "L1.19[0,0] |------------------------------------L1.19-------------------------------------|"
|
||||
- "L1.20[0,0] |------------------------------------L1.20-------------------------------------|"
|
||||
- "L1.21[0,0] |------------------------------------L1.21-------------------------------------|"
|
||||
- "L1.22[0,0] |------------------------------------L1.22-------------------------------------|"
|
||||
- "L1.23[0,0] |------------------------------------L1.23-------------------------------------|"
|
||||
- "L1.24[0,0] |------------------------------------L1.24-------------------------------------|"
|
||||
- "L1.25[0,0] |------------------------------------L1.25-------------------------------------|"
|
||||
- "L1.26[0,0] |------------------------------------L1.26-------------------------------------|"
|
||||
- "L1.27[0,0] |------------------------------------L1.27-------------------------------------|"
|
||||
- "L1.28[0,0] |------------------------------------L1.28-------------------------------------|"
|
||||
- "L1.29[0,0] |------------------------------------L1.29-------------------------------------|"
|
||||
- "L1.30[0,0] |------------------------------------L1.30-------------------------------------|"
|
||||
- "L1.31[0,0] |------------------------------------L1.31-------------------------------------|"
|
||||
- "L1.32[0,0] |------------------------------------L1.32-------------------------------------|"
|
||||
- "L1.33[0,0] |------------------------------------L1.33-------------------------------------|"
|
||||
- "L1.34[0,0] |------------------------------------L1.34-------------------------------------|"
|
||||
- "L1.35[0,0] |------------------------------------L1.35-------------------------------------|"
|
||||
- "L1.36[0,0] |------------------------------------L1.36-------------------------------------|"
|
||||
- "L1.37[0,0] |------------------------------------L1.37-------------------------------------|"
|
||||
- "L1.38[0,0] |------------------------------------L1.38-------------------------------------|"
|
||||
- "L1.39[0,0] |------------------------------------L1.39-------------------------------------|"
|
||||
- "L1.40[0,0] |------------------------------------L1.40-------------------------------------|"
|
||||
- "L1.41[0,0] |------------------------------------L1.41-------------------------------------|"
|
||||
- "L1.42[0,0] |------------------------------------L1.42-------------------------------------|"
|
||||
- "L1.43[0,0] |------------------------------------L1.43-------------------------------------|"
|
||||
- "L1.44[0,0] |------------------------------------L1.44-------------------------------------|"
|
||||
- "L1.45[0,0] |------------------------------------L1.45-------------------------------------|"
|
||||
- "L1.46[0,0] |------------------------------------L1.46-------------------------------------|"
|
||||
- "L1.47[0,0] |------------------------------------L1.47-------------------------------------|"
|
||||
- "L1.48[0,0] |------------------------------------L1.48-------------------------------------|"
|
||||
- "L1.49[0,0] |------------------------------------L1.49-------------------------------------|"
|
||||
- "L1.50[0,0] |------------------------------------L1.50-------------------------------------|"
|
||||
- "L1.51[0,0] |------------------------------------L1.51-------------------------------------|"
|
||||
- "L1.52[0,0] |------------------------------------L1.52-------------------------------------|"
|
||||
- "L1.53[0,0] |------------------------------------L1.53-------------------------------------|"
|
||||
- "L1.54[0,0] |------------------------------------L1.54-------------------------------------|"
|
||||
- "L1.55[0,0] |------------------------------------L1.55-------------------------------------|"
|
||||
- "L1.56[0,0] |------------------------------------L1.56-------------------------------------|"
|
||||
- "L1.57[0,0] |------------------------------------L1.57-------------------------------------|"
|
||||
- "L1.58[0,0] |------------------------------------L1.58-------------------------------------|"
|
||||
- "L1.59[0,0] |------------------------------------L1.59-------------------------------------|"
|
||||
- "L1.60[0,0] |------------------------------------L1.60-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
async fn run_compact(setup: &TestSetup) {
|
||||
let components = hardcoded_components(&setup.config);
|
||||
run_compact_impl(setup, components).await;
|
||||
}
|
||||
|
||||
async fn run_compact_failing(setup: &TestSetup) {
|
||||
let components = hardcoded_components(&setup.config);
|
||||
let components = Arc::new(Components {
|
||||
df_planner: Arc::new(PanicDataFusionPlanner::new()),
|
||||
..components.as_ref().clone()
|
||||
});
|
||||
run_compact_impl(setup, components).await;
|
||||
}
|
||||
|
||||
async fn run_compact_impl(setup: &TestSetup, components: Arc<Components>) {
|
||||
let config = Arc::clone(&setup.config);
|
||||
let job_semaphore = Arc::new(
|
||||
Arc::new(AsyncSemaphoreMetrics::new(&config.metric_registry, [])).new_semaphore(10),
|
||||
);
|
||||
|
||||
// register scratchpad store
|
||||
setup
|
||||
.catalog
|
||||
.exec()
|
||||
.new_context(ExecutorType::Reorg)
|
||||
.inner()
|
||||
.runtime_env()
|
||||
.register_object_store(
|
||||
"iox",
|
||||
config.parquet_store_scratchpad.id(),
|
||||
Arc::clone(config.parquet_store_scratchpad.object_store()),
|
||||
);
|
||||
|
||||
compact(
|
||||
NonZeroUsize::new(10).unwrap(),
|
||||
Duration::from_secs(3_6000),
|
||||
job_semaphore,
|
||||
&components,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_levels<'a>(
|
||||
files: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
expected_files_and_levels: impl IntoIterator<Item = (i64, CompactionLevel)>,
|
||||
) {
|
||||
let files_and_levels: Vec<_> = files
|
||||
.into_iter()
|
||||
.map(|f| (f.id.get(), f.compaction_level))
|
||||
.collect();
|
||||
|
||||
let expected_files_and_levels: Vec<_> = expected_files_and_levels.into_iter().collect();
|
||||
|
||||
assert_eq!(files_and_levels, expected_files_and_levels);
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
/// Asserts each parquet file has (id, max_l0_created_at)
|
||||
fn assert_max_l0_created_at<'a>(
|
||||
files: impl IntoIterator<Item = &'a ParquetFile>,
|
||||
expected_files_and_max_l0_created_ats: impl IntoIterator<Item = (i64, i64)>,
|
||||
) {
|
||||
let files_and_max_l0_created_ats: Vec<_> = files
|
||||
.into_iter()
|
||||
.map(|f| (f.id.get(), f.max_l0_created_at.get()))
|
||||
.collect();
|
||||
|
||||
let expected_files_and_max_l0_created_ats: Vec<_> =
|
||||
expected_files_and_max_l0_created_ats.into_iter().collect();
|
||||
|
||||
assert_eq!(
|
||||
files_and_max_l0_created_ats,
|
||||
expected_files_and_max_l0_created_ats
|
||||
);
|
||||
}
|
||||
|
||||
async fn assert_skipped_compactions<const N: usize>(
|
||||
setup: &TestSetup,
|
||||
expected: [(PartitionId, &'static str); N],
|
||||
) {
|
||||
let skipped = setup
|
||||
.catalog
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.list_skipped_compactions()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let actual = skipped
|
||||
.iter()
|
||||
.map(|skipped| (skipped.partition_id, skipped.reason.as_str()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(actual, expected);
|
||||
}
|
|
@ -14,7 +14,7 @@ data_types = { path = "../data_types" }
|
|||
futures = "0.3"
|
||||
humantime = "2.1.0"
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
object_store = { version = "0.5.2" }
|
||||
object_store = { version = "0.5.4" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1", features = ["macros", "rt", "sync"] }
|
||||
|
|
|
@ -13,7 +13,7 @@ futures = "0.3"
|
|||
generated_types = { path = "../generated_types" }
|
||||
influxdb_iox_client = { path = "../influxdb_iox_client" }
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
object_store = { version = "0.5.2", features = ["aws"] }
|
||||
object_store = { version = "0.5.4", features = ["aws"] }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
|
|
@ -15,7 +15,7 @@ pub struct Dialect {
|
|||
pub delimiter: Option<String>,
|
||||
/// <https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#columns>
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub annotations: Option<Annotations>,
|
||||
pub annotations: Option<Vec<Annotations>>,
|
||||
/// Character prefixed to comment strings
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub comment_prefix: Option<String>,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
use crate::models::ast::Package;
|
||||
use crate::models::File;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Number;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Query influx using the Flux language
|
||||
|
@ -23,6 +24,21 @@ pub struct Query {
|
|||
/// Default is the server's now time.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub now: Option<String>,
|
||||
|
||||
/// Params for use in query via params.param_name
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub params: Option<HashMap<String, Param>>,
|
||||
}
|
||||
|
||||
/// Query Param Enum for Flux
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(untagged)]
|
||||
pub enum Param {
|
||||
/// A number param
|
||||
Number(Number),
|
||||
/// A string param
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl Query {
|
||||
|
|
|
@ -30,7 +30,7 @@ ioxd_querier = { path = "../ioxd_querier"}
|
|||
ioxd_router = { path = "../ioxd_router"}
|
||||
ioxd_test = { path = "../ioxd_test"}
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
object_store_metrics = { path = "../object_store_metrics" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
panic_logging = { path = "../panic_logging" }
|
||||
|
|
|
@ -433,6 +433,7 @@ impl Config {
|
|||
compact_version: CompactorAlgoVersion::AllAtOnce,
|
||||
min_num_l1_files_to_compact: 1,
|
||||
process_once: false,
|
||||
process_all_partitions: false,
|
||||
};
|
||||
|
||||
let querier_config = QuerierConfig {
|
||||
|
|
|
@ -112,6 +112,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
}));
|
||||
let time_provider = Arc::new(SystemProvider::new());
|
||||
|
||||
let process_once = config.compactor_config.process_once;
|
||||
let server_type = create_compactor2_server_type(
|
||||
&common_state,
|
||||
Arc::clone(&metric_registry),
|
||||
|
@ -127,5 +128,14 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
info!("starting compactor");
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services, metric_registry).await?)
|
||||
|
||||
let res = main::main(common_state, services, metric_registry).await;
|
||||
match res {
|
||||
Ok(()) => Ok(()),
|
||||
// compactor2 is allowed to shut itself down
|
||||
Err(main::Error::Wrapper {
|
||||
source: _source @ ioxd_common::Error::LostServer,
|
||||
}) if process_once => Ok(()),
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -310,7 +310,7 @@ mod influxql {
|
|||
|
||||
TestCase {
|
||||
input: "cases/in/issue_6112.influxql",
|
||||
chunk_stage: ChunkStage::All,
|
||||
chunk_stage: ChunkStage::Ingester,
|
||||
}
|
||||
.run()
|
||||
.await;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
-- Single measurement queries
|
||||
--
|
||||
|
||||
-- Validates expected data is returned
|
||||
-- Projection wildcard, all tags and fields
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT * FROM m0;
|
||||
|
@ -29,21 +30,6 @@ SELECT f64, tag0 FROM m0;
|
|||
-- IOX_COMPARE: sorted
|
||||
SELECT f64, tag0, time FROM m0;
|
||||
|
||||
-- Validate some math functions
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64, floor(f64), ceil(f64) FROM m0;
|
||||
|
||||
-- Validate all scalar functions
|
||||
-- -- IOX_COMPARE: sorted
|
||||
-- TODO(sgc): log expects two arguments
|
||||
-- TODO(sgc): asin and acos should cast NaN to NULL
|
||||
-- SELECT f64, abs(f64), sin(f64), cos(f64), tan(f64),
|
||||
-- asin(f64), acos(f64), atan(f64), atan2(f64, 1),
|
||||
-- exp(f64), log(f64), ln(f64), log2(f64),
|
||||
-- log10(f64), sqrt(f64), pow(f64, 2), floor(f64),
|
||||
-- ceil(f64), round(f64)
|
||||
-- FROM m0 LIMIT 1;
|
||||
|
||||
-- arithmetic operators
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
|
||||
|
@ -121,3 +107,105 @@ SELECT tag1, f64 FROM m0 WHERE tag1 != '';
|
|||
-- TODO(sgc): Not working, as expected
|
||||
-- -- IOX_COMPARE: sorted
|
||||
-- SELECT tag1, f64 FROM m0 WHERE tag1 = '';
|
||||
|
||||
--
|
||||
-- LIMIT and OFFSET clauses
|
||||
-- NOTE: these are working, but due to incorrect default ordering
|
||||
-- some tests fail
|
||||
--
|
||||
|
||||
SELECT tag0, f64 FROM m0 LIMIT 1;
|
||||
|
||||
SELECT tag0, f64 FROM m0 WHERE tag0 = 'val00' LIMIT 2 OFFSET 1;
|
||||
|
||||
SELECT tag0, f64 FROM m0 LIMIT 1 OFFSET 1;
|
||||
|
||||
-- OFFSET clause, no LIMIT clause
|
||||
-- TODO(sgc): Fails due to a bug in InfluxQL that utilises the following optimisation
|
||||
-- https://github.com/influxdata/influxdb/blob/dee8977d2c6598cb2d17e9334ea997c99853640a/tsdb/engine/tsm1/iterator.gen.go#L344-L347
|
||||
-- which breaks after returning the first point after the offset, because itr.opt.Limit == 0
|
||||
-- SELECT tag0, f64 FROM m0 OFFSET 1;
|
||||
|
||||
--
|
||||
-- Sort ordering
|
||||
--
|
||||
|
||||
-- No GROUP BY clause
|
||||
|
||||
-- Default sort: expected output should default to ORDER BY TIME asc
|
||||
SELECT * FROM m0;
|
||||
|
||||
-- Sort time in descending order
|
||||
SELECT * FROM m0 ORDER BY time DESC;
|
||||
|
||||
--
|
||||
-- Scalar functions in projection
|
||||
--
|
||||
|
||||
-- Validate all scalar functions with a float field
|
||||
SELECT
|
||||
f64,
|
||||
abs(f64 * -1),
|
||||
sin(f64),
|
||||
cos(f64),
|
||||
tan(f64),
|
||||
asin(1/f64),
|
||||
acos(1/f64),
|
||||
atan(f64),
|
||||
atan2(f64, 2),
|
||||
exp(f64),
|
||||
-- TODO(sgc): Dependent on https://github.com/apache/arrow-datafusion/issues/5206
|
||||
-- log(f64, 8),
|
||||
ln(f64),
|
||||
log2(f64),
|
||||
log10(f64),
|
||||
sqrt(f64),
|
||||
pow(f64, 2),
|
||||
floor(f64),
|
||||
ceil(f64),
|
||||
round(f64)
|
||||
FROM m0 LIMIT 1;
|
||||
|
||||
-- Validate all scalar functions with an integer field
|
||||
SELECT
|
||||
i64,
|
||||
abs(i64 * -1),
|
||||
sin(i64),
|
||||
cos(i64),
|
||||
tan(i64),
|
||||
-- TODO(sgc): Not coerced to float, so returns incorrect result
|
||||
-- asin(1/i64),
|
||||
acos(1/i64),
|
||||
atan(i64),
|
||||
atan2(i64, 2),
|
||||
exp(i64),
|
||||
-- TODO(sgc): Dependent on https://github.com/apache/arrow-datafusion/issues/5206
|
||||
-- log(i64, 8),
|
||||
ln(i64),
|
||||
log2(i64),
|
||||
log10(i64),
|
||||
sqrt(i64),
|
||||
pow(i64, 2),
|
||||
floor(i64),
|
||||
ceil(i64),
|
||||
round(i64)
|
||||
FROM m0 LIMIT 1;
|
||||
|
||||
-- Deviation from InfluxQL is that NaNs are not coalesced to NULL
|
||||
-- The InfluxQL compatibility later will be responsible for this translation
|
||||
SELECT f64, asin(f64), acos(f64) FROM m0 LIMIT 1;
|
||||
|
||||
-- INF support
|
||||
SELECT f64, pow(f64, pow(2, 10)) FROM m0 LIMIT 1;
|
||||
|
||||
--
|
||||
-- TZ clause support
|
||||
--
|
||||
|
||||
-- Interpret date/time (%Y-%M-%D %h:%m:%s) in timezone specified by TZ clause
|
||||
-- TODO(sgc): condition is correct, but `time` column is not display in local timezone
|
||||
-- as DataFusion does not support timestamp with timezone data types, and displaying
|
||||
-- the values in the local timezone
|
||||
|
||||
---- SELECT f64 FROM m0 WHERE time = '2022-10-31 13:00:00' TZ('Australia/Hobart');
|
||||
---- SELECT f64 FROM m0 WHERE time = '2022-10-31T13:00:00Z' TZ('Australia/Hobart');
|
|
@ -77,19 +77,6 @@
|
|||
| 19.2 | val00 | 2022-10-31T02:00:30Z |
|
||||
| 21.2 | val00 | 2022-10-31T02:00:10Z |
|
||||
+------+-------+----------------------+
|
||||
-- InfluxQL: SELECT f64, floor(f64), ceil(f64) FROM m0;
|
||||
-- Results After Sorting
|
||||
+----------------------+------+-------+------+
|
||||
| time | f64 | floor | ceil |
|
||||
+----------------------+------+-------+------+
|
||||
| 2022-10-31T02:00:00Z | 10.1 | 10 | 11 |
|
||||
| 2022-10-31T02:00:00Z | 10.4 | 10 | 11 |
|
||||
| 2022-10-31T02:00:00Z | 11.3 | 11 | 12 |
|
||||
| 2022-10-31T02:00:10Z | 18.9 | 18 | 19 |
|
||||
| 2022-10-31T02:00:10Z | 21.2 | 21 | 22 |
|
||||
| 2022-10-31T02:00:20Z | 11.2 | 11 | 12 |
|
||||
| 2022-10-31T02:00:30Z | 19.2 | 19 | 20 |
|
||||
+----------------------+------+-------+------+
|
||||
-- InfluxQL: SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
|
||||
-- Results After Sorting
|
||||
+----------------------+------+-------+-----+---------+
|
||||
|
@ -228,3 +215,70 @@
|
|||
+----------------------+-------+------+
|
||||
| 2022-10-31T02:00:10Z | val10 | 18.9 |
|
||||
+----------------------+-------+------+
|
||||
-- InfluxQL: SELECT tag0, f64 FROM m0 LIMIT 1;
|
||||
+----------------------+-------+------+
|
||||
| time | tag0 | f64 |
|
||||
+----------------------+-------+------+
|
||||
| 2022-10-31T02:00:00Z | val00 | 10.1 |
|
||||
+----------------------+-------+------+
|
||||
-- InfluxQL: SELECT tag0, f64 FROM m0 WHERE tag0 = 'val00' LIMIT 2 OFFSET 1;
|
||||
+----------------------+-------+------+
|
||||
| time | tag0 | f64 |
|
||||
+----------------------+-------+------+
|
||||
| 2022-10-31T02:00:10Z | val00 | 21.2 |
|
||||
| 2022-10-31T02:00:10Z | val00 | 18.9 |
|
||||
+----------------------+-------+------+
|
||||
-- InfluxQL: SELECT tag0, f64 FROM m0 LIMIT 1 OFFSET 1;
|
||||
+----------------------+-------+------+
|
||||
| time | tag0 | f64 |
|
||||
+----------------------+-------+------+
|
||||
| 2022-10-31T02:00:00Z | val01 | 11.3 |
|
||||
+----------------------+-------+------+
|
||||
-- InfluxQL: SELECT * FROM m0;
|
||||
+----------------------+------+-----+-----+-------+-------+
|
||||
| time | f64 | i64 | str | tag0 | tag1 |
|
||||
+----------------------+------+-----+-----+-------+-------+
|
||||
| 2022-10-31T02:00:00Z | 10.1 | 101 | hi | val00 | |
|
||||
| 2022-10-31T02:00:00Z | 11.3 | 211 | lo | val01 | |
|
||||
| 2022-10-31T02:00:00Z | 10.4 | 101 | lo | val02 | |
|
||||
| 2022-10-31T02:00:10Z | 21.2 | 211 | hi | val00 | |
|
||||
| 2022-10-31T02:00:10Z | 18.9 | 211 | lo | val00 | val10 |
|
||||
| 2022-10-31T02:00:20Z | 11.2 | 191 | lo | val00 | |
|
||||
| 2022-10-31T02:00:30Z | 19.2 | 392 | lo | val00 | |
|
||||
+----------------------+------+-----+-----+-------+-------+
|
||||
-- InfluxQL: SELECT * FROM m0 ORDER BY time DESC;
|
||||
+----------------------+------+-----+-----+-------+-------+
|
||||
| time | f64 | i64 | str | tag0 | tag1 |
|
||||
+----------------------+------+-----+-----+-------+-------+
|
||||
| 2022-10-31T02:00:30Z | 19.2 | 392 | lo | val00 | |
|
||||
| 2022-10-31T02:00:20Z | 11.2 | 191 | lo | val00 | |
|
||||
| 2022-10-31T02:00:10Z | 21.2 | 211 | hi | val00 | |
|
||||
| 2022-10-31T02:00:10Z | 18.9 | 211 | lo | val00 | val10 |
|
||||
| 2022-10-31T02:00:00Z | 10.1 | 101 | hi | val00 | |
|
||||
| 2022-10-31T02:00:00Z | 11.3 | 211 | lo | val01 | |
|
||||
| 2022-10-31T02:00:00Z | 10.4 | 101 | lo | val02 | |
|
||||
+----------------------+------+-----+-----+-------+-------+
|
||||
-- InfluxQL: SELECT f64, abs(f64 * -1), sin(f64), cos(f64), tan(f64), asin(1/f64), acos(1/f64), atan(f64), atan2(f64, 2), exp(f64), ln(f64), log2(f64), log10(f64), sqrt(f64), pow(f64, 2), floor(f64), ceil(f64), round(f64) FROM m0 LIMIT 1;
|
||||
+----------------------+------+------+---------------------+---------------------+--------------------+--------------------+-------------------+------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+-------+------+-------+
|
||||
| time | f64 | abs | sin | cos | tan | asin | acos | atan | atan2 | exp | ln | log2 | log10 | sqrt | pow | floor | ceil | round |
|
||||
+----------------------+------+------+---------------------+---------------------+--------------------+--------------------+-------------------+------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+-------+------+-------+
|
||||
| 2022-10-31T02:00:00Z | 10.1 | 10.1 | -0.6250706488928821 | -0.7805681801691837 | 0.8007893029375109 | 0.0991723838059207 | 1.471623942988976 | 1.47210806614649 | 1.3753055265462157 | 24343.00942440838 | 2.312535423847214 | 3.3362833878644325 | 1.0043213737826426 | 3.1780497164141406 | 102.00999999999999 | 10 | 11 | 10 |
|
||||
+----------------------+------+------+---------------------+---------------------+--------------------+--------------------+-------------------+------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+-------+------+-------+
|
||||
-- InfluxQL: SELECT i64, abs(i64 * -1), sin(i64), cos(i64), tan(i64), acos(1/i64), atan(i64), atan2(i64, 2), exp(i64), ln(i64), log2(i64), log10(i64), sqrt(i64), pow(i64, 2), floor(i64), ceil(i64), round(i64) FROM m0 LIMIT 1;
|
||||
+----------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+-------------------+-----------+----------------------------------------------+------------------+-------------------+--------------------+-------------------+-------+-------+------+-------+
|
||||
| time | i64 | abs | sin | cos | tan | acos | atan | atan2 | exp | ln | log2 | log10 | sqrt | pow | floor | ceil | round |
|
||||
+----------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+-------------------+-----------+----------------------------------------------+------------------+-------------------+--------------------+-------------------+-------+-------+------+-------+
|
||||
| 2022-10-31T02:00:00Z | 101 | 101 | 0.45202578717835057 | 0.8920048697881602 | 0.5067526002248183 | 1.5707963267948966 | 1.560895660206908 | 1.5509969 | 73070599793680670000000000000000000000000000 | 4.61512051684126 | 6.658211482751795 | 2.0043213737826426 | 10.04987562112089 | 10201 | 101 | 101 | 101 |
|
||||
+----------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+-------------------+-----------+----------------------------------------------+------------------+-------------------+--------------------+-------------------+-------+-------+------+-------+
|
||||
-- InfluxQL: SELECT f64, asin(f64), acos(f64) FROM m0 LIMIT 1;
|
||||
+----------------------+------+------+------+
|
||||
| time | f64 | asin | acos |
|
||||
+----------------------+------+------+------+
|
||||
| 2022-10-31T02:00:00Z | 10.1 | NaN | NaN |
|
||||
+----------------------+------+------+------+
|
||||
-- InfluxQL: SELECT f64, pow(f64, pow(2, 10)) FROM m0 LIMIT 1;
|
||||
+----------------------+------+-----+
|
||||
| time | f64 | pow |
|
||||
+----------------------+------+-----+
|
||||
| 2022-10-31T02:00:00Z | 10.1 | inf |
|
||||
+----------------------+------+-----+
|
|
@ -28,7 +28,7 @@ iox_time = { path = "../iox_time" }
|
|||
metric = { path = "../metric" }
|
||||
mutable_batch = { path = "../mutable_batch"}
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
once_cell = "1"
|
||||
parking_lot = "0.12"
|
||||
|
|
|
@ -28,7 +28,7 @@ iox_time = { path = "../iox_time" }
|
|||
metric = { version = "0.1.0", path = "../metric" }
|
||||
mutable_batch = { version = "0.1.0", path = "../mutable_batch" }
|
||||
mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { version = "0.1.0", path = "../observability_deps" }
|
||||
once_cell = "1.17"
|
||||
parking_lot = "0.12.1"
|
||||
|
|
|
@ -459,6 +459,9 @@ pub trait PartitionRepo: Send + Sync {
|
|||
/// return the partitions by table id
|
||||
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
|
||||
|
||||
/// return all partitions IDs
|
||||
async fn list_ids(&mut self) -> Result<Vec<PartitionId>>;
|
||||
|
||||
/// Update the sort key for the partition, setting it to `new_sort_key` iff
|
||||
/// the current value matches `old_sort_key`.
|
||||
///
|
||||
|
@ -955,6 +958,7 @@ pub(crate) mod test_helpers {
|
|||
};
|
||||
use metric::{Attributes, DurationHistogram, Metric};
|
||||
use std::{
|
||||
collections::BTreeSet,
|
||||
ops::{Add, DerefMut},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
|
@ -1645,6 +1649,16 @@ pub(crate) mod test_helpers {
|
|||
created.insert(other_partition.id, other_partition.clone());
|
||||
assert_eq!(created, listed);
|
||||
|
||||
let listed = repos
|
||||
.partitions()
|
||||
.list_ids()
|
||||
.await
|
||||
.expect("failed to list partitions")
|
||||
.into_iter()
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
assert_eq!(created.keys().copied().collect::<BTreeSet<_>>(), listed);
|
||||
|
||||
// test list_by_namespace
|
||||
let namespace2 = repos
|
||||
.namespaces()
|
||||
|
|
|
@ -873,6 +873,14 @@ impl PartitionRepo for MemTxn {
|
|||
Ok(partitions)
|
||||
}
|
||||
|
||||
async fn list_ids(&mut self) -> Result<Vec<PartitionId>> {
|
||||
let stage = self.stage();
|
||||
|
||||
let partitions: Vec<_> = stage.partitions.iter().map(|p| p.id).collect();
|
||||
|
||||
Ok(partitions)
|
||||
}
|
||||
|
||||
async fn cas_sort_key(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
|
|
|
@ -246,6 +246,7 @@ decorate!(
|
|||
"partition_list_by_shard" = list_by_shard(&mut self, shard_id: ShardId) -> Result<Vec<Partition>>;
|
||||
"partition_list_by_namespace" = list_by_namespace(&mut self, namespace_id: NamespaceId) -> Result<Vec<Partition>>;
|
||||
"partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
|
||||
"partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>;
|
||||
"partition_update_sort_key" = cas_sort_key(&mut self, partition_id: PartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>;
|
||||
"partition_record_skipped_compaction" = record_skipped_compaction(&mut self, partition_id: PartitionId, reason: &str, num_files: usize, limit_num_files: usize, limit_num_files_first_in_partition: usize, estimated_bytes: u64, limit_bytes: u64) -> Result<()>;
|
||||
"partition_list_skipped_compactions" = list_skipped_compactions(&mut self) -> Result<Vec<SkippedCompaction>>;
|
||||
|
|
|
@ -1287,6 +1287,18 @@ WHERE table_id = $1;
|
|||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn list_ids(&mut self) -> Result<Vec<PartitionId>> {
|
||||
sqlx::query_as(
|
||||
r#"
|
||||
SELECT p.id as partition_id
|
||||
FROM partition p
|
||||
"#,
|
||||
)
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
/// Update the sort key for `partition_id` if and only if `old_sort_key`
|
||||
/// matches the current value in the database.
|
||||
///
|
||||
|
|
|
@ -1114,6 +1114,18 @@ WHERE table_id = $1;
|
|||
.collect())
|
||||
}
|
||||
|
||||
async fn list_ids(&mut self) -> Result<Vec<PartitionId>> {
|
||||
sqlx::query_as(
|
||||
r#"
|
||||
SELECT p.id as partition_id
|
||||
FROM partition p
|
||||
"#,
|
||||
)
|
||||
.fetch_all(self.inner.get_mut())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
/// Update the sort key for `partition_id` if and only if `old_sort_key`
|
||||
/// matches the current value in the database.
|
||||
///
|
||||
|
|
|
@ -27,7 +27,7 @@ serde = { version = "1.0", features = ["derive"] }
|
|||
serde_json = "1.0.92"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.25", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
toml = "0.7.1"
|
||||
toml = "0.7.2"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
uuid = { version = "1", default_features = false }
|
||||
|
|
|
@ -29,7 +29,7 @@ hashbrown = { workspace = true }
|
|||
influxdb_influxql_parser = { path = "../influxdb_influxql_parser" }
|
||||
itertools = "0.10.5"
|
||||
once_cell = "1"
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.12"
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
|
|
|
@ -464,7 +464,6 @@ mod test {
|
|||
physical_plan::displayable,
|
||||
prelude::{col, lit, lit_timestamp_nano},
|
||||
scalar::ScalarValue,
|
||||
sql::TableReference,
|
||||
};
|
||||
|
||||
fn schema() -> Schema {
|
||||
|
@ -546,10 +545,9 @@ mod test {
|
|||
async fn assert_explain(sql: &str, expected: &str) -> Result<()> {
|
||||
let executor = Executor::new_testing();
|
||||
let context = executor.new_context(ExecutorType::Query);
|
||||
context.inner().register_table(
|
||||
TableReference::Bare { table: "temps" },
|
||||
Arc::new(EmptyTable::new(Arc::new(schema()))),
|
||||
)?;
|
||||
context
|
||||
.inner()
|
||||
.register_table("temps", Arc::new(EmptyTable::new(Arc::new(schema()))))?;
|
||||
let physical_plan = context.prepare_sql(sql).await?;
|
||||
let actual_plan = displayable(physical_plan.as_ref()).indent().to_string();
|
||||
let actual_iter = actual_plan.split('\n');
|
||||
|
|
|
@ -155,9 +155,7 @@ mod tests {
|
|||
error::Result as ArrowResult,
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use datafusion::{
|
||||
datasource::empty::EmptyTable, error::Result, from_slice::FromSlice, sql::TableReference,
|
||||
};
|
||||
use datafusion::{datasource::empty::EmptyTable, error::Result, from_slice::FromSlice};
|
||||
|
||||
use crate::exec::{gapfill::GapFillExec, Executor, ExecutorType};
|
||||
|
||||
|
@ -193,10 +191,9 @@ mod tests {
|
|||
async fn plan_statement_and_get_params(sql: &str) -> Result<GapFillParams> {
|
||||
let executor = Executor::new_testing();
|
||||
let context = executor.new_context(ExecutorType::Query);
|
||||
context.inner().register_table(
|
||||
TableReference::Bare { table: "t" },
|
||||
Arc::new(EmptyTable::new(Arc::new(schema()))),
|
||||
)?;
|
||||
context
|
||||
.inner()
|
||||
.register_table("t", Arc::new(EmptyTable::new(Arc::new(schema()))))?;
|
||||
let physical_plan = context.prepare_sql(sql).await?;
|
||||
let gapfill_node = &physical_plan.children()[0];
|
||||
let gapfill_node = gapfill_node.as_any().downcast_ref::<GapFillExec>().unwrap();
|
||||
|
|
|
@ -3,16 +3,20 @@ use crate::plan::influxql::rewriter::rewrite_statement;
|
|||
use crate::plan::influxql::util::binary_operator_to_df_operator;
|
||||
use crate::{DataFusionError, IOxSessionContext, QueryNamespace};
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::common::{DFSchema, DFSchemaRef, Result, ScalarValue};
|
||||
use datafusion::common::{DFSchema, DFSchemaRef, Result, ScalarValue, ToDFSchema};
|
||||
use datafusion::datasource::provider_as_source;
|
||||
use datafusion::logical_expr::expr::Sort;
|
||||
use datafusion::logical_expr::expr_rewriter::{normalize_col, ExprRewritable, ExprRewriter};
|
||||
use datafusion::logical_expr::logical_plan::builder::project;
|
||||
use datafusion::logical_expr::logical_plan::Analyze;
|
||||
use datafusion::logical_expr::{
|
||||
lit, BinaryExpr, BuiltinScalarFunction, Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder,
|
||||
Operator,
|
||||
lit, BinaryExpr, BuiltinScalarFunction, Explain, Expr, ExprSchemable, LogicalPlan,
|
||||
LogicalPlanBuilder, Operator, PlanType, ToStringifiedPlan,
|
||||
};
|
||||
use datafusion::prelude::Column;
|
||||
use datafusion::sql::TableReference;
|
||||
use influxdb_influxql_parser::common::OrderByClause;
|
||||
use influxdb_influxql_parser::explain::{ExplainOption, ExplainStatement};
|
||||
use influxdb_influxql_parser::expression::{
|
||||
BinaryOperator, ConditionalExpression, ConditionalOperator, VarRefDataType,
|
||||
};
|
||||
|
@ -29,6 +33,7 @@ use once_cell::sync::Lazy;
|
|||
use query_functions::clean_non_meta_escapes;
|
||||
use schema::{InfluxColumnType, InfluxFieldType, Schema};
|
||||
use std::collections::HashSet;
|
||||
use std::iter;
|
||||
use std::ops::Deref;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
@ -70,11 +75,8 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
Statement::DropMeasurement(_) => {
|
||||
Err(DataFusionError::NotImplemented("DROP MEASUREMENT".into()))
|
||||
}
|
||||
Statement::Explain(_) => Err(DataFusionError::NotImplemented("EXPLAIN".into())),
|
||||
Statement::Select(select) => {
|
||||
let select = rewrite_statement(self.database.as_meta(), &select)?;
|
||||
self.select_statement_to_plan(select).await
|
||||
}
|
||||
Statement::Explain(explain) => self.explain_statement_to_plan(*explain).await,
|
||||
Statement::Select(select) => self.select_statement_to_plan(*select).await,
|
||||
Statement::ShowDatabases(_) => {
|
||||
Err(DataFusionError::NotImplemented("SHOW DATABASES".into()))
|
||||
}
|
||||
|
@ -96,8 +98,41 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
async fn explain_statement_to_plan(&self, explain: ExplainStatement) -> Result<LogicalPlan> {
|
||||
let plan = self.select_statement_to_plan(*explain.select).await?;
|
||||
let plan = Arc::new(plan);
|
||||
let schema = LogicalPlan::explain_schema();
|
||||
let schema = schema.to_dfschema_ref()?;
|
||||
|
||||
let (analyze, verbose) = match explain.options {
|
||||
Some(ExplainOption::AnalyzeVerbose) => (true, true),
|
||||
Some(ExplainOption::Analyze) => (true, false),
|
||||
Some(ExplainOption::Verbose) => (false, true),
|
||||
None => (false, false),
|
||||
};
|
||||
|
||||
if analyze {
|
||||
Ok(LogicalPlan::Analyze(Analyze {
|
||||
verbose,
|
||||
input: plan,
|
||||
schema,
|
||||
}))
|
||||
} else {
|
||||
let stringified_plans = vec![plan.to_stringified(PlanType::InitialLogicalPlan)];
|
||||
Ok(LogicalPlan::Explain(Explain {
|
||||
verbose,
|
||||
plan,
|
||||
stringified_plans,
|
||||
schema,
|
||||
logical_optimization_succeeded: false,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a [`LogicalPlan`] from the specified InfluxQL `SELECT` statement.
|
||||
async fn select_statement_to_plan(&self, select: SelectStatement) -> Result<LogicalPlan> {
|
||||
let select = rewrite_statement(self.database.as_meta(), &select)?;
|
||||
|
||||
// Process FROM clause
|
||||
let plans = self.plan_from_tables(select.from).await?;
|
||||
|
||||
|
@ -114,6 +149,27 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
let tz = select.timezone.map(|tz| *tz);
|
||||
let plan = self.plan_where_clause(select.condition, plan, tz)?;
|
||||
|
||||
let plan = if select.group_by.is_none() {
|
||||
LogicalPlanBuilder::from(plan)
|
||||
.sort(iter::once(Expr::Sort(Sort {
|
||||
expr: Box::new(Expr::Column(Column {
|
||||
relation: None,
|
||||
name: "time".to_string(),
|
||||
})),
|
||||
asc: match select.order_by {
|
||||
// Default behaviour is to sort by time in ascending order if there is no ORDER BY
|
||||
None | Some(OrderByClause::Ascending) => true,
|
||||
Some(OrderByClause::Descending) => false,
|
||||
},
|
||||
nulls_first: false,
|
||||
})))?
|
||||
.build()
|
||||
} else {
|
||||
Err(DataFusionError::NotImplemented(
|
||||
"GROUP BY not supported".into(),
|
||||
))
|
||||
}?;
|
||||
|
||||
// Process and validate the field expressions in the SELECT projection list
|
||||
let select_exprs = self.field_list_to_exprs(&plan, select.fields)?;
|
||||
|
||||
|
@ -647,7 +703,6 @@ mod test {
|
|||
assert_snapshot!(plan("CREATE DATABASE foo").await);
|
||||
assert_snapshot!(plan("DELETE FROM foo").await);
|
||||
assert_snapshot!(plan("DROP MEASUREMENT foo").await);
|
||||
assert_snapshot!(plan("EXPLAIN SELECT bar FROM foo").await);
|
||||
assert_snapshot!(plan("SHOW DATABASES").await);
|
||||
assert_snapshot!(plan("SHOW MEASUREMENTS").await);
|
||||
assert_snapshot!(plan("SHOW RETENTION POLICIES").await);
|
||||
|
@ -702,6 +757,14 @@ mod test {
|
|||
plan("SELECT foo, f64_field FROM data where non_existent !~ /f/").await
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_explain() {
|
||||
assert_snapshot!(plan("EXPLAIN SELECT foo, f64_field FROM data").await);
|
||||
assert_snapshot!(plan("EXPLAIN VERBOSE SELECT foo, f64_field FROM data").await);
|
||||
assert_snapshot!(plan("EXPLAIN ANALYZE SELECT foo, f64_field FROM data").await);
|
||||
assert_snapshot!(plan("EXPLAIN ANALYZE VERBOSE SELECT foo, f64_field FROM data").await);
|
||||
}
|
||||
}
|
||||
|
||||
/// Tests to validate InfluxQL `SELECT` statements that project columns without specifying
|
||||
|
|
|
@ -625,6 +625,22 @@ mod test {
|
|||
"SELECT usage_idle::float AS usage_idle FROM cpu GROUP BY host, region"
|
||||
);
|
||||
|
||||
// Does not include tags in projection when expanded in GROUP BY
|
||||
let stmt = parse_select("SELECT * FROM cpu GROUP BY *");
|
||||
let stmt = rewrite_statement(&namespace, &stmt).unwrap();
|
||||
assert_eq!(
|
||||
stmt.to_string(),
|
||||
"SELECT usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY host, region"
|
||||
);
|
||||
|
||||
// Does include explicitly listed tags in projection
|
||||
let stmt = parse_select("SELECT host, * FROM cpu GROUP BY *");
|
||||
let stmt = rewrite_statement(&namespace, &stmt).unwrap();
|
||||
assert_eq!(
|
||||
stmt.to_string(),
|
||||
"SELECT host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY host, region"
|
||||
);
|
||||
|
||||
// Fallible
|
||||
|
||||
// Invalid regex
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"EXPLAIN VERBOSE SELECT foo, f64_field FROM data\").await"
|
||||
---
|
||||
Explain [plan_type:Utf8, plan:Utf8]
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"EXPLAIN ANALYZE SELECT foo, f64_field FROM data\").await"
|
||||
---
|
||||
Analyze [plan_type:Utf8, plan:Utf8]
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"EXPLAIN ANALYZE VERBOSE SELECT foo, f64_field FROM data\").await"
|
||||
---
|
||||
Analyze [plan_type:Utf8, plan:Utf8]
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"EXPLAIN SELECT foo, f64_field FROM data\").await"
|
||||
---
|
||||
Explain [plan_type:Utf8, plan:Utf8]
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where non_existent !~ /f/\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where time > '2004-04-09T02:33:45Z'\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: data.time > TimestampNanosecond(1081478025000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time > TimestampNanosecond(1081478025000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where now() - 10s < time\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: now() - IntervalMonthDayNano("10000000000") < data.time [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: now() - IntervalMonthDayNano("10000000000") < data.time [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where foo =~ /f/\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: CAST(data.foo AS Utf8) ~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: CAST(data.foo AS Utf8) ~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where f64_field =~ /f/\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where non_existent =~ /f/\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where foo !~ /f/\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: CAST(data.foo AS Utf8) !~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: CAST(data.foo AS Utf8) !~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where f64_field !~ /f/\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
|
|||
expression: "plan(\"SELECT foo, f64_field FROM data where time > now() - 10s\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
Filter: data.time > now() - IntervalMonthDayNano("10000000000") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time > now() - IntervalMonthDayNano("10000000000") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT foo, sin(f64_field) FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT foo, sin(f64_field) FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, sin(data.f64_field) AS sin [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, sin:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT foo, atan2(f64_field, 2) FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT foo, atan2(f64_field, 2) FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, atan2(data.f64_field, Int64(2)) AS atan2 [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, atan2:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT foo, f64_field + 0.5 FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT foo, f64_field + 0.5 FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field + Float64(0.5) AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT foo, f64_field + f64_field FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT foo, f64_field + f64_field FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field + data.f64_field AS f64_field_f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field_f64_field:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT time, f64_field FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT time, f64_field FROM data\").await"
|
||||
---
|
||||
Projection: data.time AS time, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), f64_field:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT time as timestamp, f64_field FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT time as timestamp, f64_field FROM data\").await"
|
||||
---
|
||||
Projection: data.time AS timestamp, data.f64_field AS f64_field [timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT foo, f64_field FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT foo, f64_field FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT foo, f64_field, i64_field FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT foo, f64_field, i64_field FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field, data.i64_field AS i64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N, i64_field:Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT /^f/ FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT /^f/ FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.f64_field AS f64_field, data.foo AS foo [time:Timestamp(Nanosecond, None), f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT * FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT * FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.TIME AS TIME, data.bar AS bar, data.bool_field AS bool_field, data.f64_field AS f64_field, data.foo AS foo, data.i64_field AS i64_field, data.mixedCase AS mixedCase, data.str_field AS str_field, data.with space AS with space [time:Timestamp(Nanosecond, None), TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT TIME FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT TIME FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.TIME AS TIME [time:Timestamp(Nanosecond, None), TIME:Boolean;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SELECT f64_field FROM data\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SELECT f64_field FROM data\").await"
|
||||
---
|
||||
Projection: data.time, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), f64_field:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SHOW FIELD KEYS\")"
|
||||
---
|
||||
This feature is not implemented: SHOW FIELD KEYS
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"EXPLAIN SELECT bar FROM foo\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SHOW DATABASES\").await"
|
||||
---
|
||||
This feature is not implemented: EXPLAIN
|
||||
This feature is not implemented: SHOW DATABASES
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SHOW DATABASES\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SHOW MEASUREMENTS\").await"
|
||||
---
|
||||
This feature is not implemented: SHOW DATABASES
|
||||
This feature is not implemented: SHOW MEASUREMENTS
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SHOW MEASUREMENTS\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SHOW RETENTION POLICIES\").await"
|
||||
---
|
||||
This feature is not implemented: SHOW MEASUREMENTS
|
||||
This feature is not implemented: SHOW RETENTION POLICIES
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SHOW RETENTION POLICIES\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SHOW TAG KEYS\").await"
|
||||
---
|
||||
This feature is not implemented: SHOW RETENTION POLICIES
|
||||
This feature is not implemented: SHOW TAG KEYS
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SHOW TAG KEYS\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SHOW TAG VALUES WITH KEY = bar\").await"
|
||||
---
|
||||
This feature is not implemented: SHOW TAG KEYS
|
||||
This feature is not implemented: SHOW TAG VALUES
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
source: iox_query/src/plan/influxql.rs
|
||||
expression: "plan(\"SHOW TAG VALUES WITH KEY = bar\")"
|
||||
source: iox_query/src/plan/influxql/planner.rs
|
||||
expression: "plan(\"SHOW FIELD KEYS\").await"
|
||||
---
|
||||
This feature is not implemented: SHOW TAG VALUES
|
||||
This feature is not implemented: SHOW FIELD KEYS
|
||||
|
|
|
@ -16,7 +16,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
iox_time = { path = "../iox_time" }
|
||||
metric = { path = "../metric" }
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
once_cell = { version = "1.17", features = ["parking_lot"] }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
|
|
|
@ -15,7 +15,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
iox_query = { path = "../iox_query" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
iox_time = { path = "../iox_time" }
|
||||
trace = { path = "../trace" }
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ use backoff::BackoffConfig;
|
|||
use clap_blocks::compactor2::{Compactor2Config, CompactorAlgoVersion};
|
||||
use compactor2::{
|
||||
compactor::Compactor2,
|
||||
config::{AlgoVersion, Config, ShardConfig},
|
||||
config::{AlgoVersion, Config, PartitionsSourceConfig, ShardConfig},
|
||||
};
|
||||
use data_types::{PartitionId, TRANSITION_SHARD_NUMBER};
|
||||
use hyper::{Body, Request, Response};
|
||||
|
@ -163,6 +163,20 @@ pub async fn create_compactor2_server_type(
|
|||
CompactorAlgoVersion::TargetLevel => AlgoVersion::TargetLevel,
|
||||
};
|
||||
|
||||
let partitions_source = match (
|
||||
compactor_config.partition_filter,
|
||||
compactor_config.process_all_partitions,
|
||||
) {
|
||||
(None, false) => PartitionsSourceConfig::CatalogRecentWrites,
|
||||
(None, true) => PartitionsSourceConfig::CatalogAll,
|
||||
(Some(ids), false) => {
|
||||
PartitionsSourceConfig::Fixed(ids.into_iter().map(PartitionId::new).collect())
|
||||
}
|
||||
(Some(_), true) => panic!(
|
||||
"provided partition ID filter and specific 'process all', this does not make sense"
|
||||
),
|
||||
};
|
||||
|
||||
let compactor = Compactor2::start(Config {
|
||||
shard_id,
|
||||
metric_registry: Arc::clone(&metric_registry),
|
||||
|
@ -183,9 +197,7 @@ pub async fn create_compactor2_server_type(
|
|||
percentage_max_file_size: compactor_config.percentage_max_file_size,
|
||||
split_percentage: compactor_config.split_percentage,
|
||||
partition_timeout: Duration::from_secs(compactor_config.partition_timeout_secs),
|
||||
partition_filter: compactor_config
|
||||
.partition_filter
|
||||
.map(|parts| parts.into_iter().map(PartitionId::new).collect()),
|
||||
partitions_source,
|
||||
shadow_mode: compactor_config.shadow_mode,
|
||||
ignore_partition_skip_marker: compactor_config.ignore_partition_skip_marker,
|
||||
max_input_files_per_partition: compactor_config.max_input_files_per_partition,
|
||||
|
@ -195,6 +207,8 @@ pub async fn create_compactor2_server_type(
|
|||
compact_version,
|
||||
min_num_l1_files_to_compact: compactor_config.min_num_l1_files_to_compact,
|
||||
process_once: compactor_config.process_once,
|
||||
simulate_without_object_store: false,
|
||||
all_errors_are_fatal: false,
|
||||
});
|
||||
|
||||
Arc::new(Compactor2ServerType::new(
|
||||
|
|
|
@ -13,7 +13,7 @@ ingester = { path = "../ingester" }
|
|||
iox_catalog = { path = "../iox_catalog" }
|
||||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
iox_query = { path = "../iox_query" }
|
||||
trace = { path = "../trace" }
|
||||
write_buffer = { path = "../write_buffer" }
|
||||
|
|
|
@ -13,7 +13,7 @@ generated_types = { path = "../generated_types" }
|
|||
iox_catalog = { path = "../iox_catalog" }
|
||||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
querier = { path = "../querier" }
|
||||
iox_query = { path = "../iox_query" }
|
||||
router = { path = "../router" }
|
||||
|
|
|
@ -13,7 +13,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
mutable_batch = { path = "../mutable_batch" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
router = { path = "../router" }
|
||||
sharder = { path = "../sharder" }
|
||||
|
|
|
@ -11,7 +11,7 @@ bytes = "1.4"
|
|||
futures = "0.3"
|
||||
iox_time = { version = "0.1.0", path = "../iox_time" }
|
||||
metric = { version = "0.1.0", path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
pin-project = "1.0.12"
|
||||
tokio = { version = "1.25", features = ["io-util"] }
|
||||
workspace-hack = { path = "../workspace-hack" }
|
||||
|
|
|
@ -15,7 +15,7 @@ datafusion_util = { path = "../datafusion_util" }
|
|||
futures = "0.3"
|
||||
generated_types = { path = "../generated_types" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.12"
|
||||
parquet = { workspace = true, features = ["experimental"]}
|
||||
|
|
|
@ -11,7 +11,7 @@ datafusion_util = { path = "../datafusion_util" }
|
|||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
futures = {version = "0.3"}
|
||||
num_cpus = "1.15.0"
|
||||
object_store = { version = "0.5.2" }
|
||||
object_store = { version = "0.5.4" }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
schema = { path = "../schema" }
|
||||
tokio = "1.25"
|
||||
|
|
|
@ -23,7 +23,7 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
iox_query = { path = "../iox_query" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
metric = { path = "../metric" }
|
||||
object_store = "0.5.2"
|
||||
object_store = "0.5.4"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.12"
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue