Merge branch 'main' into dom/always-requeue

pull/24376/head
Dom 2023-02-09 10:21:32 +00:00 committed by GitHub
commit d44b6d412f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
103 changed files with 2873 additions and 1333 deletions

43
Cargo.lock generated
View File

@ -549,12 +549,6 @@ version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "base64"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5"
[[package]]
name = "base64"
version = "0.21.0"
@ -1003,6 +997,7 @@ dependencies = [
"data_types",
"datafusion",
"futures",
"insta",
"iox_catalog",
"iox_query",
"iox_tests",
@ -1405,7 +1400,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1451,7 +1446,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"arrow",
"chrono",
@ -1464,7 +1459,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1476,7 +1471,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"arrow",
"async-trait",
@ -1492,7 +1487,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1522,7 +1517,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"arrow",
"chrono",
@ -1539,7 +1534,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"arrow",
"datafusion-common",
@ -1550,7 +1545,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "17.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=a67ef9197a0d7242a8089be6324ba2e25e84f41e#a67ef9197a0d7242a8089be6324ba2e25e84f41e"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e222bd627b6e7974133364fed4600d74b4da6811#e222bd627b6e7974133364fed4600d74b4da6811"
dependencies = [
"arrow-schema",
"datafusion-common",
@ -1747,14 +1742,14 @@ dependencies = [
[[package]]
name = "filetime"
version = "0.2.19"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9"
checksum = "8a3de6e8d11b22ff9edc6d916f890800597d60f8b2da1caf2955c274638d6412"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"windows-sys 0.42.0",
"windows-sys 0.45.0",
]
[[package]]
@ -3793,12 +3788,12 @@ dependencies = [
[[package]]
name = "object_store"
version = "0.5.3"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4201837dc4c27a8670f0363b1255cd3845a4f0c521211cced1ed14c1d0cc6d2"
checksum = "1f344e51ec9584d2f51199c0c29c6f73dddd04ade986497875bf8fa2f178caf0"
dependencies = [
"async-trait",
"base64 0.20.0",
"base64 0.21.0",
"bytes",
"chrono",
"futures",
@ -5965,9 +5960,9 @@ dependencies = [
[[package]]
name = "toml"
version = "0.7.1"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "772c1426ab886e7362aedf4abc9c0d1348a979517efedfc25862944d10137af0"
checksum = "f7afcae9e3f0fe2c370fd4657108972cbb2fa9db1b9f84849cefd80741b01cb6"
dependencies = [
"serde",
"serde_spanned",
@ -5986,9 +5981,9 @@ dependencies = [
[[package]]
name = "toml_edit"
version = "0.19.1"
version = "0.19.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90a238ee2e6ede22fb95350acc78e21dc40da00bb66c0334bde83de4ed89424e"
checksum = "5e6a7712b49e1775fb9a7b998de6635b299237f48b404dde71704f2e0e7f37e5"
dependencies = [
"indexmap",
"nom8",

View File

@ -116,8 +116,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "32.0.0" }
arrow-flight = { version = "32.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="a67ef9197a0d7242a8089be6324ba2e25e84f41e", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="a67ef9197a0d7242a8089be6324ba2e25e84f41e" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="e222bd627b6e7974133364fed4600d74b4da6811", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="e222bd627b6e7974133364fed4600d74b4da6811" }
hashbrown = { version = "0.13.2" }
parquet = { version = "32.0.0" }

View File

@ -14,7 +14,7 @@ humantime = "2.1.0"
iox_catalog = { path = "../iox_catalog" }
iox_time = { path = "../iox_time" }
metric = { path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.92"

View File

@ -236,4 +236,12 @@ pub struct Compactor2Config {
action
)]
pub process_once: bool,
/// Compact all partitions found in the catalog, no matter if/when the received writes.
#[clap(
long = "compaction-process-all-partitions",
env = "INFLUXDB_IOX_COMPACTION_PROCESS_ALL_PARTITIONS",
action
)]
pub process_all_partitions: bool,
}

View File

@ -18,7 +18,7 @@ iox_catalog = { path = "../iox_catalog" }
iox_query = { path = "../iox_query" }
iox_time = { path = "../iox_time" }
metric = { path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
parquet_file = { path = "../parquet_file" }
predicate = { path = "../predicate" }

View File

@ -16,7 +16,7 @@ iox_catalog = { path = "../iox_catalog" }
iox_query = { path = "../iox_query" }
iox_time = { path = "../iox_time" }
metric = { path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
parquet_file = { path = "../parquet_file" }
predicate = { path = "../predicate" }
@ -34,3 +34,4 @@ workspace-hack = { path = "../workspace-hack"}
arrow_util = { path = "../arrow_util" }
iox_tests = { path = "../iox_tests" }
test_helpers = { path = "../test_helpers"}
insta = { version = "1.26.0", features = ["yaml"] }

View File

@ -58,10 +58,8 @@ impl Compactor2 {
_ = async {
compact(config.partition_concurrency, config.partition_timeout, Arc::clone(&job_semaphore), &components).await;
// the main entry point does not allow servers to shut down themselves, so we just wait forever
info!("comapctor done");
futures::future::pending::<()>().await;
} => unreachable!(),
} => {}
}
});
let worker = shared_handle(worker);

View File

@ -1,445 +0,0 @@
#[cfg(test)]
mod tests {
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
use arrow_util::assert_batches_sorted_eq;
use data_types::{CompactionLevel, ParquetFile};
use iox_query::exec::ExecutorType;
use tracker::AsyncSemaphoreMetrics;
use crate::{
components::{
df_planner::panic::PanicDataFusionPlanner, hardcoded::hardcoded_components, Components,
},
config::AlgoVersion,
driver::compact,
test_util::{list_object_store, AssertFutureExt, TestSetup},
};
#[tokio::test]
async fn test_compact_no_file() {
test_helpers::maybe_start_logging();
// no files
let setup = TestSetup::builder().build().await;
let files = setup.list_by_table_not_to_delete().await;
assert!(files.is_empty());
// compact
// This wil wait for files forever.
let fut = run_compact(&setup);
tokio::pin!(fut);
fut.assert_pending().await;
// verify catalog is still empty
let files = setup.list_by_table_not_to_delete().await;
assert!(files.is_empty());
}
#[tokio::test]
async fn test_compact_all_at_once() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let mut setup = TestSetup::builder().with_files().build().await;
setup.set_compact_version(AlgoVersion::AllAtOnce);
// verify 6 files
// verify ID and compaction level of the files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
// verify ID and max_l0_created_at
let times = setup.test_times();
assert_max_l0_created_at(
&files,
vec![
(1, times.time_1_minute_future),
(2, times.time_2_minutes_future),
(3, times.time_5_minutes_future),
(4, times.time_3_minutes_future),
(5, times.time_5_minutes_future),
(6, times.time_2_minutes_future),
],
);
// compact
run_compact(&setup).await;
// verify number of files: 6 files are compacted into 2 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(
&files,
vec![
(7, CompactionLevel::FileNonOverlapped),
(8, CompactionLevel::FileNonOverlapped),
],
);
assert_max_l0_created_at(
&files,
// both files have max_l0_created time_5_minutes_future
// which is the max of all L0 input's max_l0_created_at
vec![
(7, times.time_5_minutes_future),
(8, times.time_5_minutes_future),
],
);
// verify the content of files
// Compacted smaller file with the later data
let mut files = setup.list_by_table_not_to_delete().await;
let file1 = files.pop().unwrap();
let batches = setup.read_parquet_file(file1).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
// Compacted larger file with the earlier data
let file0 = files.pop().unwrap();
let batches = setup.read_parquet_file(file0).await;
assert_batches_sorted_eq!(
[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
}
#[tokio::test]
async fn test_compact_target_level() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let mut setup = TestSetup::builder().with_files().build().await;
setup.set_compact_version(AlgoVersion::TargetLevel);
setup.set_min_num_l1_files_to_compact(2);
// verify 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
// verify ID and max_l0_created_at
let times = setup.test_times();
assert_max_l0_created_at(
&files,
vec![
(1, times.time_1_minute_future),
(2, times.time_2_minutes_future),
(3, times.time_5_minutes_future),
(4, times.time_3_minutes_future),
(5, times.time_5_minutes_future),
(6, times.time_2_minutes_future),
],
);
// compact
run_compact(&setup).await;
// verify number of files: 6 files are compacted into 2 files
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 2);
assert_levels(
&files,
// This is the result of 2-round compaction fomr L0s -> L1s and then L1s -> L2s
// The first round will create two L1 files IDs 7 and 8
// The second round will create tow L2 file IDs 9 and 10
vec![(9, CompactionLevel::Final), (10, CompactionLevel::Final)],
);
assert_max_l0_created_at(
&files,
// both files have max_l0_created time_5_minutes_future
// which is the max of all L0 input's max_l0_created_at
vec![
(9, times.time_5_minutes_future),
(10, times.time_5_minutes_future),
],
);
// verify the content of files
// Compacted smaller file with the later data
let mut files = setup.list_by_table_not_to_delete().await;
let file1 = files.pop().unwrap();
let batches = setup.read_parquet_file(file1).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
// Compacted larger file with the earlier data
let file0 = files.pop().unwrap();
let batches = setup.read_parquet_file(file0).await;
assert_batches_sorted_eq!(
[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
}
#[tokio::test]
async fn test_skip_compact() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder().with_files().build().await;
let expected_files_and_levels = vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
];
// verify 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(&files, expected_files_and_levels.clone());
// add the partition into skipped compaction
setup
.catalog
.add_to_skipped_compaction(setup.partition_info.partition_id, "test reason")
.await;
// compact but nothing will be compacted because the partition is skipped
run_compact(&setup).await;
// verify still 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(&files, expected_files_and_levels.clone());
}
#[tokio::test]
async fn test_partition_fail() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder().with_files().build().await;
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
assert!(!catalog_files_pre.is_empty());
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
assert!(!object_store_files_pre.is_empty());
run_compact_failing(&setup).await;
let catalog_files_post = setup.list_by_table_not_to_delete().await;
assert_eq!(catalog_files_pre, catalog_files_post);
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
assert_eq!(object_store_files_pre, object_store_files_post);
let skipped = setup
.catalog
.catalog
.repositories()
.await
.partitions()
.list_skipped_compactions()
.await
.unwrap();
assert_eq!(skipped.len(), 1);
}
#[tokio::test]
async fn test_shadow_mode() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder()
.with_files()
.with_shadow_mode()
.build()
.await;
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
assert!(!catalog_files_pre.is_empty());
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
assert!(!object_store_files_pre.is_empty());
run_compact(&setup).await;
let catalog_files_post = setup.list_by_table_not_to_delete().await;
assert_eq!(catalog_files_pre, catalog_files_post);
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
assert_eq!(object_store_files_pre, object_store_files_post);
}
#[tokio::test]
async fn test_shadow_mode_partition_fail() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder()
.with_files()
.with_shadow_mode()
.build()
.await;
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
assert!(!catalog_files_pre.is_empty());
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
assert!(!object_store_files_pre.is_empty());
run_compact_failing(&setup).await;
let catalog_files_post = setup.list_by_table_not_to_delete().await;
assert_eq!(catalog_files_pre, catalog_files_post);
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
assert_eq!(object_store_files_pre, object_store_files_post);
let skipped = setup
.catalog
.catalog
.repositories()
.await
.partitions()
.list_skipped_compactions()
.await
.unwrap();
assert_eq!(skipped, vec![]);
}
async fn run_compact(setup: &TestSetup) {
let components = hardcoded_components(&setup.config);
run_compact_impl(setup, components).await;
}
async fn run_compact_failing(setup: &TestSetup) {
let components = hardcoded_components(&setup.config);
let components = Arc::new(Components {
df_planner: Arc::new(PanicDataFusionPlanner::new()),
..components.as_ref().clone()
});
run_compact_impl(setup, components).await;
}
async fn run_compact_impl(setup: &TestSetup, components: Arc<Components>) {
let config = Arc::clone(&setup.config);
let job_semaphore = Arc::new(
Arc::new(AsyncSemaphoreMetrics::new(&config.metric_registry, [])).new_semaphore(10),
);
// register scratchpad store
setup
.catalog
.exec()
.new_context(ExecutorType::Reorg)
.inner()
.runtime_env()
.register_object_store(
"iox",
config.parquet_store_scratchpad.id(),
Arc::clone(config.parquet_store_scratchpad.object_store()),
);
compact(
NonZeroUsize::new(10).unwrap(),
Duration::from_secs(3_6000),
job_semaphore,
&components,
)
.await;
}
#[track_caller]
fn assert_levels<'a>(
files: impl IntoIterator<Item = &'a ParquetFile>,
expected_files_and_levels: impl IntoIterator<Item = (i64, CompactionLevel)>,
) {
let files_and_levels: Vec<_> = files
.into_iter()
.map(|f| (f.id.get(), f.compaction_level))
.collect();
let expected_files_and_levels: Vec<_> = expected_files_and_levels.into_iter().collect();
assert_eq!(files_and_levels, expected_files_and_levels);
}
#[track_caller]
/// Asserts each parquet file has (id, max_l0_created_at)
fn assert_max_l0_created_at<'a>(
files: impl IntoIterator<Item = &'a ParquetFile>,
expected_files_and_max_l0_created_ats: impl IntoIterator<Item = (i64, i64)>,
) {
let files_and_max_l0_created_ats: Vec<_> = files
.into_iter()
.map(|f| (f.id.get(), f.max_l0_created_at.get()))
.collect();
let expected_files_and_max_l0_created_ats: Vec<_> =
expected_files_and_max_l0_created_ats.into_iter().collect();
assert_eq!(
files_and_max_l0_created_ats,
expected_files_and_max_l0_created_ats
);
}
}

View File

@ -33,7 +33,7 @@ use crate::components::{
///
/// | Step | Name | Type | Description |
/// | ---- | --------------------- | ----------------------------------------------------------- | ----------- |
/// | 1 | **Actual source** | `inner_source`/`T1`/[`PartitionsSource`], wrapped | This is the actual source, e.g. a [catalog](crate::components::partitions_source::catalog::CatalogPartitionsSource) |
/// | 1 | **Actual source** | `inner_source`/`T1`/[`PartitionsSource`], wrapped | This is the actual source, e.g. a [catalog](crate::components::partitions_source::catalog_to_compact::CatalogToCompactPartitionsSource) |
/// | 2 | **Unique IDs source** | [`UniquePartionsSourceWrapper`], wraps `inner_source`/`T1` | Outputs that [`PartitionId`]s from the `inner_source` but filters out partitions that have not yet reached the uniqueness sink (step 4) |
/// | 3 | **Critical section** | -- | Here it is always ensured that a single [`PartitionId`] does NOT occur more than once. |
/// | 4 | **Unique IDs sink** | [`UniquePartitionDoneSinkWrapper`], wraps `inner_sink`/`T2` | Observes incoming IDs and removes them from the filter applied in step 2. |

View File

@ -6,6 +6,7 @@ use std::{
use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
pub mod dedicated;
pub mod noop;
pub trait DataFusionPlanExec: Debug + Display + Send + Sync {
/// Convert DataFusion [`ExecutionPlan`] to multiple output streams.

View File

@ -0,0 +1,38 @@
use std::{fmt::Display, sync::Arc};
use datafusion::physical_plan::{
stream::RecordBatchStreamAdapter, ExecutionPlan, SendableRecordBatchStream,
};
use super::DataFusionPlanExec;
/// Creates a DataFusion plan that does nothing (for use in testing)
#[derive(Debug, Default)]
pub struct NoopDataFusionPlanExec;
impl NoopDataFusionPlanExec {
pub fn new() -> Self {
Self::default()
}
}
impl Display for NoopDataFusionPlanExec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "noop")
}
}
impl DataFusionPlanExec for NoopDataFusionPlanExec {
fn exec(&self, plan: Arc<dyn ExecutionPlan>) -> Vec<SendableRecordBatchStream> {
let stream_count = plan.output_partitioning().partition_count();
let schema = plan.schema();
(0..stream_count)
.map(|_| {
let stream = futures::stream::empty();
let stream = RecordBatchStreamAdapter::new(Arc::clone(&schema), stream);
Box::pin(stream) as SendableRecordBatchStream
})
.collect()
}
}

View File

@ -130,7 +130,8 @@ mod tests {
use crate::test_util::{
create_l1_files, create_overlapped_files, create_overlapped_files_2,
create_overlapped_l0_l1_files, create_overlapped_l1_l2_files,
create_overlapped_l0_l1_files, create_overlapped_l1_l2_files, format_files,
format_files_split,
};
use super::*;
@ -186,7 +187,17 @@ mod tests {
#[test]
fn test_apply_one_level_empty() {
let files = create_l1_files(1);
assert_eq!(files.len(), 3);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 1b "
- "L1.13[600,700] |-----L1.13-----| "
- "L1.12[400,500] |-----L1.12-----| "
- "L1.11[250,350] |-----L1.11-----| "
"###
);
let split = TargetLevelNonOverlapSplit::new();
@ -204,29 +215,41 @@ mod tests {
#[test]
fn test_apply_mix_1() {
let files = create_overlapped_l0_l1_files(1);
assert_eq!(files.len(), 6);
// Input files:
// |--L1.1--| |--L1.2--| |--L1.3--|
// |--L0.1--| |--L0.2--| |--L0.3--|
// Output files: (overlap, non_overlap) = ( [L0.1, L0.2, L0.3, L1.2, L1.3] , L1.1] )
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 1b "
- "L0.2[650,750] |---L0.2---| "
- "L0.1[450,620] |-------L0.1-------| "
- "L0.3[800,900] |---L0.3---| "
- "L1, all files 1b "
- "L1.13[600,700] |--L1.13---| "
- "L1.12[400,500] |--L1.12---| "
- "L1.11[250,350] |--L1.11---| "
"###
);
let split = TargetLevelNonOverlapSplit::new();
let (overlap, non_overlap) = split.apply(files, CompactionLevel::FileNonOverlapped);
assert_eq!(overlap.len(), 5);
assert_eq!(non_overlap.len(), 1);
// Verify overlapping files
// sort by id
let mut overlap = overlap;
overlap.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(overlap[0].id.get(), 1);
assert_eq!(overlap[1].id.get(), 2);
assert_eq!(overlap[2].id.get(), 3);
assert_eq!(overlap[3].id.get(), 12);
assert_eq!(overlap[4].id.get(), 13);
// verify non-overlapping files
assert_eq!(non_overlap[0].id.get(), 11);
insta::assert_yaml_snapshot!(
format_files_split("overlap", &overlap, "non_overlap", &non_overlap),
@r###"
---
- overlap
- "L0, all files 1b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 1b "
- "L1.12[400,500] |----L1.12-----| "
- "L1.13[600,700] |----L1.13-----| "
- non_overlap
- "L1, all files 1b "
- "L1.11[250,350] |------------------------------------L1.11-------------------------------------|"
"###
);
}
// |--L2.1--| |--L2.2--|
@ -236,28 +259,39 @@ mod tests {
#[test]
fn test_apply_mix_2() {
let files = create_overlapped_l1_l2_files(1);
assert_eq!(files.len(), 5);
// Input files:
// |--L2.1--| |--L2.2--|
// |--L1.1--| |--L1.2--| |--L1.3--|
// Output files: (overlap, non_overlap) = ( [L1.1, L1.2, L1.3, L2.2] , L2.1] )
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 1b "
- "L1.13[600,700] |--L1.13--| "
- "L1.12[400,500] |--L1.12--| "
- "L1.11[250,350] |--L1.11--| "
- "L2, all files 1b "
- "L2.21[0,100] |--L2.21--| "
- "L2.22[200,300] |--L2.22--| "
"###
);
let split = TargetLevelNonOverlapSplit::new();
let (overlap, non_overlap) = split.apply(files, CompactionLevel::Final);
assert_eq!(overlap.len(), 4);
assert_eq!(non_overlap.len(), 1);
// Verify overlapping files
// sort by id
let mut overlap = overlap;
overlap.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(overlap[0].id.get(), 11);
assert_eq!(overlap[1].id.get(), 12);
assert_eq!(overlap[2].id.get(), 13);
assert_eq!(overlap[3].id.get(), 22);
// verify non-overlapping files
assert_eq!(non_overlap[0].id.get(), 21);
insta::assert_yaml_snapshot!(
format_files_split("overlap", &overlap, "non_overlap", &non_overlap),
@r###"
---
- overlap
- "L1, all files 1b "
- "L1.13[600,700] |----L1.13-----|"
- "L1.12[400,500] |----L1.12-----| "
- "L1.11[250,350] |----L1.11-----| "
- "L2, all files 1b "
- "L2.22[200,300] |----L2.22-----| "
- non_overlap
- "L2, all files 1b "
- "L2.21[0,100] |------------------------------------L2.21-------------------------------------|"
"###
);
}
#[test]
@ -269,26 +303,40 @@ mod tests {
//
// . Output: (overlap, non_overlap) = ( [L0.1, L0.2, L1.2, L1.3] , [L1.1, L1.4] )
let files = create_overlapped_files_2(1);
assert_eq!(files.len(), 6);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 1b "
- "L0.2[520,550] |L0.2| "
- "L0.1[250,350] |--L0.1---| "
- "L1, all files 1b "
- "L1.13[400,500] |--L1.13--| "
- "L1.12[200,300] |--L1.12--| "
- "L1.11[0,100] |--L1.11--| "
- "L1.14[600,700] |--L1.14--| "
"###
);
let split = TargetLevelNonOverlapSplit::new();
let (overlap, non_overlap) = split.apply(files, CompactionLevel::FileNonOverlapped);
assert_eq!(overlap.len(), 4);
assert_eq!(non_overlap.len(), 2);
// Verify overlapping files
// sort by id
let mut overlap = overlap;
overlap.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(overlap[0].id.get(), 1);
assert_eq!(overlap[1].id.get(), 2);
assert_eq!(overlap[2].id.get(), 12);
assert_eq!(overlap[3].id.get(), 13);
// verify non-overlapping files
// sort by id
let mut non_overlap = non_overlap;
non_overlap.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(non_overlap[0].id.get(), 11);
assert_eq!(non_overlap[1].id.get(), 14);
insta::assert_yaml_snapshot!(
format_files_split("overlap", &overlap, "non_overlap", &non_overlap),
@r###"
---
- overlap
- "L0, all files 1b "
- "L0.2[520,550] |L0.2| "
- "L0.1[250,350] |--------L0.1--------| "
- "L1, all files 1b "
- "L1.12[200,300] |-------L1.12--------| "
- "L1.13[400,500] |-------L1.13--------| "
- non_overlap
- "L1, all files 1b "
- "L1.11[0,100] |--L1.11--| "
- "L1.14[600,700] |--L1.14--| "
"###
);
}
}

View File

@ -36,8 +36,8 @@ impl FilesSplit for TargetLevelTargetLevelSplit {
mod tests {
use crate::test_util::{
assert_parquet_files, assert_parquet_files_split, create_l0_files, create_l1_files,
create_l2_files, create_overlapped_files,
create_l0_files, create_l1_files, create_l2_files, create_overlapped_files, format_files,
format_files_split,
};
use super::*;
@ -63,13 +63,17 @@ mod tests {
#[test]
fn test_apply_partial_empty_files_l0() {
let files = create_l0_files(1);
let expected = vec![
"L0 ",
"L0.2[650,750] |-----L0.2------| ",
"L0.1[450,620] |------------L0.1------------| ",
"L0.3[800,900] |-----L0.3------| ",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 1b "
- "L0.2[650,750] |-----L0.2------| "
- "L0.1[450,620] |------------L0.1------------| "
- "L0.3[800,900] |-----L0.3------| "
"###
);
let split = TargetLevelTargetLevelSplit::new();
let (lower, higher) = split.apply(files.clone(), CompactionLevel::Initial);
@ -88,13 +92,17 @@ mod tests {
#[test]
fn test_apply_partial_empty_files_l1() {
let files = create_l1_files(1);
let expected = vec![
"L1 ",
"L1.13[600,700] |-----L1.13-----| ",
"L1.12[400,500] |-----L1.12-----| ",
"L1.11[250,350] |-----L1.11-----| ",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 1b "
- "L1.13[600,700] |-----L1.13-----| "
- "L1.12[400,500] |-----L1.12-----| "
- "L1.11[250,350] |-----L1.11-----| "
"###
);
let split = TargetLevelTargetLevelSplit::new();
let (lower, higher) = split.apply(files.clone(), CompactionLevel::Initial);
@ -113,12 +121,16 @@ mod tests {
#[test]
fn test_apply_partial_empty_files_l2() {
let files = create_l2_files();
let expected = vec![
"L2 ",
"L2.21[0,100] |---------L2.21----------| ",
"L2.22[200,300] |---------L2.22----------| ",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L2, all files 1b "
- "L2.21[0,100] |---------L2.21----------| "
- "L2.22[200,300] |---------L2.22----------| "
"###
);
let split = TargetLevelTargetLevelSplit::new();
let (lower, higher) = split.apply(files.clone(), CompactionLevel::Initial);
@ -138,40 +150,47 @@ mod tests {
fn test_apply_target_level_0() {
// Test target level Initial
let files = create_overlapped_files();
let expected = vec![
"L0 ",
"L0.2[650,750]@1 |-L0.2-| ",
"L0.1[450,620]@1 |----L0.1-----| ",
"L0.3[800,900]@100 |-L0.3-| ",
"L1 ",
"L1.13[600,700]@100 |L1.13-| ",
"L1.12[400,500]@1 |L1.12-| ",
"L1.11[250,350]@1 |L1.11-| ",
"L2 ",
"L2.21[0,100]@1 |L2.21-| ",
"L2.22[200,300]@1 |L2.22-| ",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.2[650,750] 1b |-L0.2-| "
- "L0.1[450,620] 1b |----L0.1-----| "
- "L0.3[800,900] 100b |-L0.3-| "
- "L1 "
- "L1.13[600,700] 100b |L1.13-| "
- "L1.12[400,500] 1b |L1.12-| "
- "L1.11[250,350] 1b |L1.11-| "
- "L2 "
- "L2.21[0,100] 1b |L2.21-| "
- "L2.22[200,300] 1b |L2.22-| "
"###
);
let split = TargetLevelTargetLevelSplit::new();
let (lower, higher) = split.apply(files, CompactionLevel::Initial);
let expected = vec![
"left",
"L0 ",
"L0.2[650,750]@1 |-----L0.2------| ",
"L0.1[450,620]@1 |------------L0.1------------| ",
"L0.3[800,900]@100 |-----L0.3------| ",
"right",
"L1 ",
"L1.13[600,700]@100 |--L1.13--| ",
"L1.12[400,500]@1 |--L1.12--| ",
"L1.11[250,350]@1 |--L1.11--| ",
"L2 ",
"L2.21[0,100]@1 |--L2.21--| ",
"L2.22[200,300]@1 |--L2.22--| ",
];
assert_parquet_files_split(expected, &lower, &higher);
insta::assert_yaml_snapshot!(
format_files_split("lower", &lower, "higher", &higher),
@r###"
---
- lower
- "L0 "
- "L0.2[650,750] 1b |-----L0.2------| "
- "L0.1[450,620] 1b |------------L0.1------------| "
- "L0.3[800,900] 100b |-----L0.3------| "
- higher
- "L1 "
- "L1.13[600,700] 100b |--L1.13--| "
- "L1.12[400,500] 1b |--L1.12--| "
- "L1.11[250,350] 1b |--L1.11--| "
- "L2 "
- "L2.21[0,100] 1b |--L2.21--| "
- "L2.22[200,300] 1b |--L2.22--| "
"###
);
// verify number of files
assert_eq!(lower.len(), 3);
@ -190,40 +209,47 @@ mod tests {
fn test_apply_target_level_l1() {
// Test target level is FileNonOverlapped
let files = create_overlapped_files();
let expected = vec![
"L0 ",
"L0.2[650,750]@1 |-L0.2-| ",
"L0.1[450,620]@1 |----L0.1-----| ",
"L0.3[800,900]@100 |-L0.3-| ",
"L1 ",
"L1.13[600,700]@100 |L1.13-| ",
"L1.12[400,500]@1 |L1.12-| ",
"L1.11[250,350]@1 |L1.11-| ",
"L2 ",
"L2.21[0,100]@1 |L2.21-| ",
"L2.22[200,300]@1 |L2.22-| ",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.2[650,750] 1b |-L0.2-| "
- "L0.1[450,620] 1b |----L0.1-----| "
- "L0.3[800,900] 100b |-L0.3-| "
- "L1 "
- "L1.13[600,700] 100b |L1.13-| "
- "L1.12[400,500] 1b |L1.12-| "
- "L1.11[250,350] 1b |L1.11-| "
- "L2 "
- "L2.21[0,100] 1b |L2.21-| "
- "L2.22[200,300] 1b |L2.22-| "
"###
);
let split = TargetLevelTargetLevelSplit::new();
let (lower, higher) = split.apply(files, CompactionLevel::FileNonOverlapped);
let expected = vec![
"left",
"L0 ",
"L0.2[650,750]@1 |---L0.2---| ",
"L0.1[450,620]@1 |-------L0.1-------| ",
"L0.3[800,900]@100 |---L0.3---| ",
"L1 ",
"L1.13[600,700]@100 |--L1.13---| ",
"L1.12[400,500]@1 |--L1.12---| ",
"L1.11[250,350]@1 |--L1.11---| ",
"right",
"L2 ",
"L2.21[0,100] |---------L2.21----------| ",
"L2.22[200,300] |---------L2.22----------| ",
];
assert_parquet_files_split(expected, &lower, &higher);
insta::assert_yaml_snapshot!(
format_files_split("lower", &lower, "higher", &higher),
@r###"
---
- lower
- "L0 "
- "L0.2[650,750] 1b |---L0.2---| "
- "L0.1[450,620] 1b |-------L0.1-------| "
- "L0.3[800,900] 100b |---L0.3---| "
- "L1 "
- "L1.13[600,700] 100b |--L1.13---| "
- "L1.12[400,500] 1b |--L1.12---| "
- "L1.11[250,350] 1b |--L1.11---| "
- higher
- "L2, all files 1b "
- "L2.21[0,100] |---------L2.21----------| "
- "L2.22[200,300] |---------L2.22----------| "
"###
);
// verify number of files
assert_eq!(lower.len(), 6);
@ -242,20 +268,24 @@ mod tests {
fn test_apply_taget_level_l2() {
// Test target level is Final
let files = create_overlapped_files();
let expected = vec![
"L0 ",
"L0.2[650,750]@1 |-L0.2-| ",
"L0.1[450,620]@1 |----L0.1-----| ",
"L0.3[800,900]@100 |-L0.3-| ",
"L1 ",
"L1.13[600,700]@100 |L1.13-| ",
"L1.12[400,500]@1 |L1.12-| ",
"L1.11[250,350]@1 |L1.11-| ",
"L2 ",
"L2.21[0,100]@1 |L2.21-| ",
"L2.22[200,300]@1 |L2.22-| ",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.2[650,750] 1b |-L0.2-| "
- "L0.1[450,620] 1b |----L0.1-----| "
- "L0.3[800,900] 100b |-L0.3-| "
- "L1 "
- "L1.13[600,700] 100b |L1.13-| "
- "L1.12[400,500] 1b |L1.12-| "
- "L1.11[250,350] 1b |L1.11-| "
- "L2 "
- "L2.21[0,100] 1b |L2.21-| "
- "L2.22[200,300] 1b |L2.22-| "
"###
);
let split = TargetLevelTargetLevelSplit::new();
let (lower, higher) = split.apply(files, CompactionLevel::Final);

View File

@ -145,7 +145,7 @@ mod tests {
create_overlapped_files_2, create_overlapped_files_3, create_overlapped_files_3_mix_size,
create_overlapped_l0_l1_files, create_overlapped_l1_l2_files,
create_overlapped_l1_l2_files_mix_size, create_overlapped_l1_l2_files_mix_size_2,
create_overlapping_l0_files,
create_overlapping_l0_files, format_files, format_files_split,
};
use super::*;
@ -202,24 +202,54 @@ mod tests {
}
#[test]
// |--L0.1-----|
// |--L0.2--| |--L0.3--|
fn test_apply_one_level_overlap_small_l0() {
let files = create_overlapping_l0_files((MAX_SIZE - 1) as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 99b "
- "L0.2[150,180] |L0.2| "
- "L0.1[100,200] |--L0.1--| "
- "L0.3[800,900] |--L0.3--|"
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
// All files are small --> nothing to upgrade
assert_eq!(files_to_compact.len(), 3);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 99b "
- "L0.3[800,900] |--L0.3--|"
- "L0.1[100,200] |--L0.1--| "
- "L0.2[150,180] |L0.2| "
- files_to_upgrade
"###
);
}
#[test]
// |--L0.1-----|
// |--L0.2--| |--L0.3--|
fn test_apply_one_level_overlap_large_l0() {
let files = create_overlapping_l0_files((MAX_SIZE + 1) as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 101b "
- "L0.2[150,180] |L0.2| "
- "L0.1[100,200] |--L0.1--| "
- "L0.3[800,900] |--L0.3--|"
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
@ -227,275 +257,599 @@ mod tests {
// All files are large but only one eligible for upgrade
// files_to_compact = [L0.1, L0.2]
// files_to_upgrade = [L0.3]
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_upgrade.len(), 1);
// verify the files by sorting by id
let mut files_to_compact = files_to_compact;
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_compact[0].id.get(), 1);
assert_eq!(files_to_compact[1].id.get(), 2);
assert_eq!(files_to_upgrade[0].id.get(), 3);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 101b "
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[150,180] |---------L0.2---------| "
- files_to_upgrade
- "L0, all files 101b "
- "L0.3[800,900] |-------------------------------------L0.3-------------------------------------|"
"###
);
}
#[test]
// |--L0.1--| |--L0.2--| |--L0.3--|
fn test_apply_one_level_small_l0() {
let files = create_l0_files((MAX_SIZE - 1) as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 99b "
- "L0.2[650,750] |-----L0.2------| "
- "L0.1[450,620] |------------L0.1------------| "
- "L0.3[800,900] |-----L0.3------| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
// All files are small --> nothing to upgrade
assert_eq!(files_to_compact.len(), 3);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 99b "
- "L0.3[800,900] |-----L0.3------| "
- "L0.1[450,620] |------------L0.1------------| "
- "L0.2[650,750] |-----L0.2------| "
- files_to_upgrade
"###
);
}
#[test]
// |--L0.1--| |--L0.2--| |--L0.3--|
fn test_apply_one_level_large_l0() {
let files = create_l0_files((MAX_SIZE + 1) as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 101b "
- "L0.2[650,750] |-----L0.2------| "
- "L0.1[450,620] |------------L0.1------------| "
- "L0.3[800,900] |-----L0.3------| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
// All files are large and eligible for upgrade
assert_eq!(files_to_compact.len(), 0);
assert_eq!(files_to_upgrade.len(), 3);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- files_to_upgrade
- "L0, all files 101b "
- "L0.2[650,750] |-----L0.2------| "
- "L0.1[450,620] |------------L0.1------------| "
- "L0.3[800,900] |-----L0.3------| "
"###
);
}
#[test]
// |--L1.1--| |--L1.2--| |--L1.3--|
fn test_apply_one_level_small_l1() {
let files = create_l1_files((MAX_SIZE - 1) as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 99b "
- "L1.13[600,700] |-----L1.13-----| "
- "L1.12[400,500] |-----L1.12-----| "
- "L1.11[250,350] |-----L1.11-----| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
// All files are small --> nothing to upgrade
assert_eq!(files_to_compact.len(), 3);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L1, all files 99b "
- "L1.13[600,700] |-----L1.13-----| "
- "L1.12[400,500] |-----L1.12-----| "
- "L1.11[250,350] |-----L1.11-----| "
- files_to_upgrade
"###
);
}
#[test]
// |--L1.1--| |--L1.2--| |--L1.3--|
fn test_apply_one_level_large_l1() {
let files = create_l1_files((MAX_SIZE + 1) as i64);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
// All files are large and eligible for upgrade
assert_eq!(files_to_compact.len(), 0);
assert_eq!(files_to_upgrade.len(), 3);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- files_to_upgrade
- "L1, all files 101b "
- "L1.13[600,700] |-----L1.13-----| "
- "L1.12[400,500] |-----L1.12-----| "
- "L1.11[250,350] |-----L1.11-----| "
"###
);
}
#[test]
// |--L1.1--| |--L1.2--| |--L1.3--| |--L1.4--| |--L1.5--|
// . small files (< size ): L1.1, L1.3
// . Large files (.= size): L1.2, L1.4, L1.5
//
// . files_to_compact = [L1.1, L1.2, L1.3]
// . files_to_upgrade = [L1.4, L1.5]
fn test_apply_one_level_l1_mix_size() {
let files = create_l1_files_mix_size(MAX_SIZE as i64);
// . small files (< size ): L1.1, L1.3
// . Large files (.= size): L1.2, L1.4, L1.5
//
// . files_to_compact = [L1.1, L1.2, L1.3]
// . files_to_upgrade = [L1.4, L1.5]
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1 "
- "L1.15[1000,1100] 200b |-L1.15-| "
- "L1.13[600,700] 90b |-L1.13-| "
- "L1.12[400,500] 101b |-L1.12-| "
- "L1.11[250,350] 99b |-L1.11-| "
- "L1.14[800,900] 100b |-L1.14-| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
// All files are large and eligible for upgrade
assert_eq!(files_to_compact.len(), 3);
assert_eq!(files_to_upgrade.len(), 2);
// verify IDs
let mut files_to_compact = files_to_compact;
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_compact[0].id.get(), 11);
assert_eq!(files_to_compact[1].id.get(), 12);
assert_eq!(files_to_compact[2].id.get(), 13);
let mut files_to_upgrade = files_to_upgrade;
files_to_upgrade.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_upgrade[0].id.get(), 14);
assert_eq!(files_to_upgrade[1].id.get(), 15);
// Some files are large and eligible for upgrade
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L1 "
- "L1.11[250,350] 99b |-----L1.11-----| "
- "L1.13[600,700] 90b |-----L1.13-----| "
- "L1.12[400,500] 101b |-----L1.12-----| "
- files_to_upgrade
- "L1 "
- "L1.15[1000,1100] 200b |---------L1.15----------| "
- "L1.14[800,900] 100b |---------L1.14----------| "
"###
);
}
#[test]
// |--L1.1--| |--L1.2--| |--L1.3--|
// |--L0.1--| |--L0.2--| |--L0.3--|
fn test_apply_all_small_target_l1() {
let files = create_overlapped_l0_l1_files((MAX_SIZE - 1) as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 99b "
- "L0.2[650,750] |---L0.2---| "
- "L0.1[450,620] |-------L0.1-------| "
- "L0.3[800,900] |---L0.3---| "
- "L1, all files 99b "
- "L1.13[600,700] |--L1.13---| "
- "L1.12[400,500] |--L1.12---| "
- "L1.11[250,350] |--L1.11---| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
// All files are small --> nothing to upgrade
assert_eq!(files_to_compact.len(), 6);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 99b "
- "L0.3[800,900] |---L0.3---| "
- "L0.1[450,620] |-------L0.1-------| "
- "L0.2[650,750] |---L0.2---| "
- "L1, all files 99b "
- "L1.13[600,700] |--L1.13---| "
- "L1.12[400,500] |--L1.12---| "
- "L1.11[250,350] |--L1.11---| "
- files_to_upgrade
"###
);
}
#[test]
// |--L1.1--| |--L1.2--| |--L1.3--|
// |--L0.1--| |--L0.2--| |--L0.3--|
fn test_apply_all_large_target_l1() {
let files = create_overlapped_l0_l1_files((MAX_SIZE) as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] |---L0.2---| "
- "L0.1[450,620] |-------L0.1-------| "
- "L0.3[800,900] |---L0.3---| "
- "L1, all files 100b "
- "L1.13[600,700] |--L1.13---| "
- "L1.12[400,500] |--L1.12---| "
- "L1.11[250,350] |--L1.11---| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
// All files are large --> L0.3 is eligible for upgrade
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_upgrade.len(), 1);
// verify IDs
let mut files_to_compact = files_to_compact;
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_compact[0].id.get(), 1);
assert_eq!(files_to_compact[1].id.get(), 2);
assert_eq!(files_to_compact[2].id.get(), 11);
assert_eq!(files_to_compact[3].id.get(), 12);
assert_eq!(files_to_compact[4].id.get(), 13);
//
assert_eq!(files_to_upgrade[0].id.get(), 3);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 100b "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.2[650,750] |-----L0.2-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
- "L1.11[250,350] |----L1.11-----| "
- files_to_upgrade
- "L0, all files 100b "
- "L0.3[800,900] |-------------------------------------L0.3-------------------------------------|"
"###
);
}
#[test]
// |--L2.1--| |--L2.2--|
// |--L1.1--| |--L1.2--| |--L1.3--|
fn test_apply_all_small_target_l2() {
let files = create_overlapped_l1_l2_files((MAX_SIZE - 1) as i64);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
// All files are small --> nothing to upgrade
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L1, all files 99b "
- "L1.11[250,350] |--L1.11--| "
- "L1.12[400,500] |--L1.12--| "
- "L1.13[600,700] |--L1.13--| "
- "L2, all files 99b "
- "L2.21[0,100] |--L2.21--| "
- "L2.22[200,300] |--L2.22--| "
- files_to_upgrade
"###
);
}
#[test]
// |--L2.1--| |--L2.2--|
// |--L1.1--| |--L1.2--| |--L1.3--|
fn test_apply_all_large_target_l2() {
let files = create_overlapped_l1_l2_files(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 100b "
- "L1.13[600,700] |--L1.13--| "
- "L1.12[400,500] |--L1.12--| "
- "L1.11[250,350] |--L1.11--| "
- "L2, all files 100b "
- "L2.21[0,100] |--L2.21--| "
- "L2.22[200,300] |--L2.22--| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
// All files are large --> L1.2 and L1.3 are eligible for upgrade
assert_eq!(files_to_compact.len(), 3);
assert_eq!(files_to_upgrade.len(), 2);
// verify IDs
let mut files_to_compact = files_to_compact;
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_compact[0].id.get(), 11);
assert_eq!(files_to_compact[1].id.get(), 21);
assert_eq!(files_to_compact[2].id.get(), 22);
let mut files_to_upgrade = files_to_upgrade;
files_to_upgrade.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_upgrade[0].id.get(), 12);
assert_eq!(files_to_upgrade[1].id.get(), 13);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L1, all files 100b "
- "L1.11[250,350] |-------L1.11--------| "
- "L2, all files 100b "
- "L2.21[0,100] |-------L2.21--------| "
- "L2.22[200,300] |-------L2.22--------| "
- files_to_upgrade
- "L1, all files 100b "
- "L1.13[600,700] |---------L1.13----------| "
- "L1.12[400,500] |---------L1.12----------| "
"###
);
}
#[test]
// |--L2.1--| |--L2.2--|
// |--L1.1--| |--L1.2--| |--L1.3--|
// Small files (< size): [L1.3]
// Large files: [L2.1, L2.2, L1.1, L1.2]
// ==> nothing to upgrade
fn test_apply_all_small_target_l2_mix_size() {
let files = create_overlapped_l1_l2_files_mix_size(MAX_SIZE as i64);
// Small files (< size): [L1.3]
// Large files: [L2.1, L2.2, L1.1, L1.2]
// ==> nothing to upgrade
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1 "
- "L1.13[600,700] 99b |--L1.13--| "
- "L1.12[400,500] 100b |--L1.12--| "
- "L1.11[250,350] 100b |--L1.11--| "
- "L2 "
- "L2.21[0,100] 100b |--L2.21--| "
- "L2.22[200,300] 100b |--L2.22--| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L1 "
- "L1.11[250,350] 100b |--L1.11--| "
- "L1.13[600,700] 99b |--L1.13--| "
- "L1.12[400,500] 100b |--L1.12--| "
- "L2 "
- "L2.21[0,100] 100b |--L2.21--| "
- "L2.22[200,300] 100b |--L2.22--| "
- files_to_upgrade
"###
);
}
#[test]
// |--L2.1--| |--L2.2--|
// |--L1.1--| |--L1.2--| |--L1.3--|
// Small files (< size): [L1.2]
// Large files: [L2.1, L2.2, L1.1, L1.3]
// ==> L1.3 is eligible for upgrade
fn test_apply_all_small_target_l2_mix_size_2() {
let files = create_overlapped_l1_l2_files_mix_size_2(MAX_SIZE as i64);
// Small files (< size): [L1.2]
// Large files: [L2.1, L2.2, L1.1, L1.3]
// ==> L1.3 is eligible for upgrade
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1 "
- "L1.13[600,700] 100b |--L1.13--| "
- "L1.12[400,500] 99b |--L1.12--| "
- "L1.11[250,350] 100b |--L1.11--| "
- "L2 "
- "L2.21[0,100] 100b |--L2.21--| "
- "L2.22[200,300] 100b |--L2.22--| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) = split.apply(files, CompactionLevel::Final);
assert_eq!(files_to_compact.len(), 4);
assert_eq!(files_to_upgrade.len(), 1);
assert_eq!(files_to_upgrade[0].id.get(), 13);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L1 "
- "L1.11[250,350] 100b |----L1.11-----| "
- "L1.12[400,500] 99b |----L1.12-----|"
- "L2 "
- "L2.21[0,100] 100b |----L2.21-----| "
- "L2.22[200,300] 100b |----L2.22-----| "
- files_to_upgrade
- "L1, all files 100b "
- "L1.13[600,700] |------------------------------------L1.13-------------------------------------|"
"###
);
}
#[test]
// |--L0.1--| |--L0.2--|
// |--L1.1--| |--L1.2--| |--L1.3--| |--L1.4--|
// L0s in the time range of L1 ==> nothing to upgrade
fn test_apply_all_large_but_no_upragde() {
let files = create_overlapped_files_2(MAX_SIZE as i64);
// L0s in the time range of L1 ==> nothing to upgrade
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[520,550] |L0.2| "
- "L0.1[250,350] |--L0.1---| "
- "L1, all files 100b "
- "L1.13[400,500] |--L1.13--| "
- "L1.12[200,300] |--L1.12--| "
- "L1.11[0,100] |--L1.11--| "
- "L1.14[600,700] |--L1.14--| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 6);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 100b "
- "L0.1[250,350] |--L0.1---| "
- "L0.2[520,550] |L0.2| "
- "L1, all files 100b "
- "L1.13[400,500] |--L1.13--| "
- "L1.12[200,300] |--L1.12--| "
- "L1.11[0,100] |--L1.11--| "
- "L1.14[600,700] |--L1.14--| "
- files_to_upgrade
"###
);
}
#[test]
// |--L0.1--| |--L0.2--| |--L0.3--|
// |--L0.4--| |--L0.5--| |--L0.6--|
// |--L1.1--| |--L1.2--|
// All small ==> nothing to upgrade
fn test_apply_all_small_target_l1_2() {
let files = create_overlapped_files_3((MAX_SIZE - 1) as i64);
// All small ==> nothing to upgrade
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 99b "
- "L0.3[400,500] |L0.3-| "
- "L0.2[200,300] |L0.2-| "
- "L0.1[0,100] |L0.1-| "
- "L0.4[600,700] |L0.4-| "
- "L0.5[800,900] |L0.5-| "
- "L0.6[1000,1100] |L0.6-| "
- "L1, all files 99b "
- "L1.11[250,350] |L1.11| "
- "L1.12[650,750] |L1.12| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 8);
assert_eq!(files_to_upgrade.len(), 0);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 99b "
- "L0.6[1000,1100] |L0.6-| "
- "L0.5[800,900] |L0.5-| "
- "L0.4[600,700] |L0.4-| "
- "L0.1[0,100] |L0.1-| "
- "L0.2[200,300] |L0.2-| "
- "L0.3[400,500] |L0.3-| "
- "L1, all files 99b "
- "L1.11[250,350] |L1.11| "
- "L1.12[650,750] |L1.12| "
- files_to_upgrade
"###
);
}
#[test]
// |--L0.1--| |--L0.2--| |--L0.3--|
// |--L0.4--| |--L0.5--| |--L0.6--|
// |--L1.1--| |--L1.2--|
// All large ==> L0.1, L0.5, L0.6 are eligible for upgrade
// files_to_compact: [L0.2, L0.3, L0.4, L1.1, L1.2]
// files_to_upgrade: [L0.1, L0.5, L0.6]
fn test_apply_all_large_target_l1_2() {
let files = create_overlapped_files_3((MAX_SIZE + 10) as i64);
// All large ==> L0.1, L0.5, L0.6 are eligible for upgrade
// files_to_compact: [L0.2, L0.3, L0.4, L1.1, L1.2]
// files_to_upgrade: [L0.1, L0.5, L0.6]
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 110b "
- "L0.3[400,500] |L0.3-| "
- "L0.2[200,300] |L0.2-| "
- "L0.1[0,100] |L0.1-| "
- "L0.4[600,700] |L0.4-| "
- "L0.5[800,900] |L0.5-| "
- "L0.6[1000,1100] |L0.6-| "
- "L1, all files 110b "
- "L1.11[250,350] |L1.11| "
- "L1.12[650,750] |L1.12| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_upgrade.len(), 3);
let mut files_to_compact = files_to_compact;
files_to_compact.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_compact[0].id.get(), 2);
assert_eq!(files_to_compact[1].id.get(), 3);
assert_eq!(files_to_compact[2].id.get(), 4);
assert_eq!(files_to_compact[3].id.get(), 11);
assert_eq!(files_to_compact[4].id.get(), 12);
let mut files_to_upgrade = files_to_upgrade;
files_to_upgrade.sort_by(|a, b| a.id.cmp(&b.id));
assert_eq!(files_to_upgrade[0].id.get(), 1);
assert_eq!(files_to_upgrade[1].id.get(), 5);
assert_eq!(files_to_upgrade[2].id.get(), 6);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0, all files 110b "
- "L0.4[600,700] |----L0.4----| "
- "L0.2[200,300] |----L0.2----| "
- "L0.3[400,500] |----L0.3----| "
- "L1, all files 110b "
- "L1.11[250,350] |---L1.11----| "
- "L1.12[650,750] |---L1.12----| "
- files_to_upgrade
- "L0, all files 110b "
- "L0.1[0,100] |L0.1-| "
- "L0.5[800,900] |L0.5-| "
- "L0.6[1000,1100] |L0.6-| "
"###
);
}
#[test]
// |--L0.1--| |--L0.2--| |--L0.3--|
// |--L0.4--| |--L0.5--| |--L0.6--|
// |--L1.1--| |--L1.2--|
// Small files (< size): L0.6
// Large files: the rest
// ==> only L0.1 is eligible for upgrade
fn test_apply_mix_size_target_l1_2() {
let files = create_overlapped_files_3_mix_size(MAX_SIZE as i64);
// Small files (< size): L0.6
// Large files: the rest
// ==> only L0.1 is eligible for upgrade
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.3[400,500] 100b |L0.3-| "
- "L0.2[200,300] 100b |L0.2-| "
- "L0.1[0,100] 100b |L0.1-| "
- "L0.4[600,700] 100b |L0.4-| "
- "L0.5[800,900] 100b |L0.5-| "
- "L0.6[1000,1100] 99b |L0.6-| "
- "L1 "
- "L1.11[250,350] 100b |L1.11| "
- "L1.12[650,750] 100b |L1.12| "
"###
);
let split = TargetLevelUpgradeSplit::new(MAX_SIZE);
let (files_to_compact, files_to_upgrade) =
split.apply(files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 7);
assert_eq!(files_to_upgrade.len(), 1);
assert_eq!(files_to_upgrade[0].id.get(), 1);
insta::assert_yaml_snapshot!(
format_files_split("files_to_compact", &files_to_compact, "files_to_upgrade", &files_to_upgrade),
@r###"
---
- files_to_compact
- "L0 "
- "L0.6[1000,1100] 99b |-L0.6-| "
- "L0.4[600,700] 100b |-L0.4-| "
- "L0.2[200,300] 100b |-L0.2-| "
- "L0.3[400,500] 100b |-L0.3-| "
- "L0.5[800,900] 100b |-L0.5-| "
- "L1 "
- "L1.11[250,350] 100b |L1.11-| "
- "L1.12[650,750] 100b |L1.12-| "
- files_to_upgrade
- "L0, all files 100b "
- "L0.1[0,100] |-------------------------------------L0.1-------------------------------------|"
"###
);
}
}

View File

@ -12,7 +12,7 @@ use crate::{
namespaces_source::catalog::CatalogNamespacesSource,
tables_source::catalog::CatalogTablesSource,
},
config::{AlgoVersion, Config},
config::{AlgoVersion, Config, PartitionsSourceConfig},
error::ErrorKind,
};
@ -22,7 +22,9 @@ use super::{
catalog::CatalogCommit, logging::LoggingCommitWrapper, metrics::MetricsCommitWrapper,
mock::MockCommit, Commit,
},
df_plan_exec::dedicated::DedicatedDataFusionPlanExec,
df_plan_exec::{
dedicated::DedicatedDataFusionPlanExec, noop::NoopDataFusionPlanExec, DataFusionPlanExec,
},
df_planner::{logging::LoggingDataFusionPlannerWrapper, planner_v1::V1DataFusionPlanner},
divide_initial::single_branch::SingleBranchDivideInitial,
file_filter::{and::AndFileFilter, level_range::LevelRangeFileFilter},
@ -36,13 +38,12 @@ use super::{
target_level_upgrade_split::TargetLevelUpgradeSplit, FilesSplit,
},
id_only_partition_filter::{
and::AndIdOnlyPartitionFilter, by_id::ByIdPartitionFilter, shard::ShardPartitionFilter,
IdOnlyPartitionFilter,
and::AndIdOnlyPartitionFilter, shard::ShardPartitionFilter, IdOnlyPartitionFilter,
},
level_exist::one_level::OneLevelExist,
parquet_file_sink::{
dedicated::DedicatedExecParquetFileSinkWrapper, logging::LoggingParquetFileSinkWrapper,
object_store::ObjectStoreParquetFileSink,
mock::MockParquetFileSink, object_store::ObjectStoreParquetFileSink, ParquetFileSink,
},
partition_done_sink::{
catalog::CatalogPartitionDoneSink, error_kind::ErrorKindPartitionDoneSinkWrapper,
@ -52,6 +53,7 @@ use super::{
partition_files_source::catalog::CatalogPartitionFilesSource,
partition_filter::{
and::AndPartitionFilter, greater_matching_files::GreaterMatchingFilesPartitionFilter,
greater_size_matching_files::GreaterSizeMatchingFilesPartitionFilter,
has_files::HasFilesPartitionFilter, has_matching_file::HasMatchingFilePartitionFilter,
logging::LoggingPartitionFilterWrapper, max_files::MaxFilesPartitionFilter,
max_parquet_bytes::MaxParquetBytesPartitionFilter, metrics::MetricsPartitionFilterWrapper,
@ -65,13 +67,18 @@ use super::{
endless::EndlessPartititionStream, once::OncePartititionStream, PartitionStream,
},
partitions_source::{
catalog::CatalogPartitionsSource, filter::FilterPartitionsSourceWrapper,
logging::LoggingPartitionsSourceWrapper, metrics::MetricsPartitionsSourceWrapper,
mock::MockPartitionsSource, not_empty::NotEmptyPartitionsSourceWrapper,
catalog_all::CatalogAllPartitionsSource,
catalog_to_compact::CatalogToCompactPartitionsSource,
filter::FilterPartitionsSourceWrapper, logging::LoggingPartitionsSourceWrapper,
metrics::MetricsPartitionsSourceWrapper, mock::MockPartitionsSource,
not_empty::NotEmptyPartitionsSourceWrapper,
randomize_order::RandomizeOrderPartitionsSourcesWrapper, PartitionsSource,
},
round_split::all_now::AllNowRoundSplit,
scratchpad::{ignore_writes_object_store::IgnoreWrites, prod::ProdScratchpadGen},
scratchpad::{
ignore_writes_object_store::IgnoreWrites, noop::NoopScratchpadGen, prod::ProdScratchpadGen,
ScratchpadGen,
},
skipped_compactions_source::catalog::CatalogSkippedCompactionsSource,
target_level_chooser::{
all_at_once::AllAtOnceTargetLevelChooser, target_level::TargetLevelTargetLevelChooser,
@ -85,22 +92,25 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
// TODO: partitions source: Implementing ID-based sharding / hash-partitioning so we can run multiple compactors in
// parallel. This should be a wrapper around the existing partions source.
let partitions_source: Arc<dyn PartitionsSource> = if let Some(ids) = &config.partition_filter {
Arc::new(MockPartitionsSource::new(ids.iter().cloned().collect()))
} else {
Arc::new(CatalogPartitionsSource::new(
let partitions_source: Arc<dyn PartitionsSource> = match &config.partitions_source {
PartitionsSourceConfig::CatalogRecentWrites => {
Arc::new(CatalogToCompactPartitionsSource::new(
config.backoff_config.clone(),
Arc::clone(&config.catalog),
config.partition_threshold,
Arc::clone(&config.time_provider),
))
}
PartitionsSourceConfig::CatalogAll => Arc::new(CatalogAllPartitionsSource::new(
config.backoff_config.clone(),
Arc::clone(&config.catalog),
config.partition_threshold,
Arc::clone(&config.time_provider),
))
)),
PartitionsSourceConfig::Fixed(ids) => {
Arc::new(MockPartitionsSource::new(ids.iter().cloned().collect()))
}
};
let mut id_only_partition_filters: Vec<Arc<dyn IdOnlyPartitionFilter>> = vec![];
if let Some(ids) = &config.partition_filter {
// filter as early as possible, so we don't need any catalog lookups for the filtered partitions
id_only_partition_filters.push(Arc::new(ByIdPartitionFilter::new(ids.clone())));
}
if let Some(shard_config) = &config.shard_config {
// add shard filter before performing any catalog IO
id_only_partition_filters.push(Arc::new(ShardPartitionFilter::new(
@ -125,6 +135,15 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
}
partition_filters.append(&mut version_specific_partition_filters(config));
let partition_resource_limit_filters: Vec<Arc<dyn PartitionFilter>> = vec![
Arc::new(MaxFilesPartitionFilter::new(
config.max_input_files_per_partition,
)),
Arc::new(MaxParquetBytesPartitionFilter::new(
config.max_input_parquet_bytes_per_partition,
)),
];
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.shadow_mode {
Arc::new(MockPartitionDoneSink::new())
} else {
@ -159,23 +178,81 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
Duration::from_secs(60),
1,
);
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.all_errors_are_fatal {
Arc::new(partition_done_sink)
} else {
Arc::new(ErrorKindPartitionDoneSinkWrapper::new(
partition_done_sink,
ErrorKind::variants()
.iter()
.filter(|kind| {
// use explicit match statement so we never forget to add new variants
match kind {
ErrorKind::OutOfMemory | ErrorKind::Timeout | ErrorKind::Unknown => true,
ErrorKind::ObjectStore => false,
}
})
.copied()
.collect(),
))
};
// Note: Place "not empty" wrapper at the very last so that the logging and metric wrapper work even when there
// is not data.
let partitions_source = NotEmptyPartitionsSourceWrapper::new(
let partitions_source =
LoggingPartitionsSourceWrapper::new(MetricsPartitionsSourceWrapper::new(
RandomizeOrderPartitionsSourcesWrapper::new(partitions_source, 1234),
&config.metric_registry,
)),
Duration::from_secs(5),
Arc::clone(&config.time_provider),
);
));
let partitions_source: Arc<dyn PartitionsSource> = if config.process_once {
// do not wrap into the "not empty" filter because we do NOT wanna throttle in this case but just exit early
Arc::new(partitions_source)
} else {
Arc::new(NotEmptyPartitionsSourceWrapper::new(
partitions_source,
Duration::from_secs(5),
Arc::clone(&config.time_provider),
))
};
let partition_stream: Arc<dyn PartitionStream> = if config.process_once {
Arc::new(OncePartititionStream::new(partitions_source))
} else {
Arc::new(EndlessPartititionStream::new(partitions_source))
};
let partition_continue_conditions = "continue_conditions";
let partition_resource_limit_conditions = "resource_limit_conditions";
let scratchpad_gen: Arc<dyn ScratchpadGen> = if config.simulate_without_object_store {
Arc::new(NoopScratchpadGen::new())
} else {
Arc::new(ProdScratchpadGen::new(
config.partition_scratchpad_concurrency,
config.backoff_config.clone(),
Arc::clone(config.parquet_store_real.object_store()),
Arc::clone(config.parquet_store_scratchpad.object_store()),
scratchpad_store_output,
))
};
let df_plan_exec: Arc<dyn DataFusionPlanExec> = if config.simulate_without_object_store {
Arc::new(NoopDataFusionPlanExec::new())
} else {
Arc::new(DedicatedDataFusionPlanExec::new(Arc::clone(&config.exec)))
};
let parquet_file_sink: Arc<dyn ParquetFileSink> = if config.simulate_without_object_store {
Arc::new(MockParquetFileSink::new(false))
} else {
Arc::new(LoggingParquetFileSinkWrapper::new(
DedicatedExecParquetFileSinkWrapper::new(
ObjectStoreParquetFileSink::new(
config.shard_id,
config.parquet_store_scratchpad.clone(),
Arc::clone(&config.time_provider),
),
Arc::clone(&config.exec),
),
))
};
Arc::new(Components {
partition_stream,
@ -197,28 +274,12 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
MetricsPartitionFilterWrapper::new(
AndPartitionFilter::new(partition_filters),
&config.metric_registry,
partition_continue_conditions,
),
partition_continue_conditions,
)),
partition_done_sink: Arc::new(LoggingPartitionDoneSinkWrapper::new(
MetricsPartitionDoneSinkWrapper::new(
ErrorKindPartitionDoneSinkWrapper::new(
partition_done_sink,
ErrorKind::variants()
.iter()
.filter(|kind| {
// use explicit match statement so we never forget to add new variants
match kind {
ErrorKind::OutOfMemory
| ErrorKind::Timeout
| ErrorKind::Unknown => true,
ErrorKind::ObjectStore => false,
}
})
.copied()
.collect(),
),
&config.metric_registry,
),
MetricsPartitionDoneSinkWrapper::new(partition_done_sink, &config.metric_registry),
)),
commit: Arc::new(LoggingCommitWrapper::new(MetricsCommitWrapper::new(
commit,
@ -241,76 +302,56 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
config.split_percentage,
),
)),
df_plan_exec: Arc::new(DedicatedDataFusionPlanExec::new(Arc::clone(&config.exec))),
parquet_file_sink: Arc::new(LoggingParquetFileSinkWrapper::new(
DedicatedExecParquetFileSinkWrapper::new(
ObjectStoreParquetFileSink::new(
config.shard_id,
config.parquet_store_scratchpad.clone(),
Arc::clone(&config.time_provider),
),
Arc::clone(&config.exec),
),
)),
df_plan_exec,
parquet_file_sink,
round_split: Arc::new(AllNowRoundSplit::new()),
divide_initial: Arc::new(SingleBranchDivideInitial::new()),
scratchpad_gen: Arc::new(ProdScratchpadGen::new(
config.partition_scratchpad_concurrency,
config.backoff_config.clone(),
Arc::clone(config.parquet_store_real.object_store()),
Arc::clone(config.parquet_store_scratchpad.object_store()),
scratchpad_store_output,
)),
scratchpad_gen,
target_level_chooser: version_specific_target_level_chooser(config),
target_level_split: version_specific_target_level_split(config),
non_overlap_split: version_specific_non_ovverlapping_split(config),
upgrade_split: version_specific_upgrade_split(config),
partition_resource_limit_filter: Arc::new(LoggingPartitionFilterWrapper::new(
MetricsPartitionFilterWrapper::new(
AndPartitionFilter::new(partition_resource_limit_filters),
&config.metric_registry,
partition_resource_limit_conditions,
),
partition_resource_limit_conditions,
)),
})
}
// Conditions to commpact this partittion
// Same for all versions to protect the system from OOMs
// . Number of files < max_input_files_per_partition
// . Total size of files < max_input_parquet_bytes_per_partition
fn version_specific_partition_filters(config: &Config) -> Vec<Arc<dyn PartitionFilter>> {
match config.compact_version {
// Must has L0
AlgoVersion::AllAtOnce => {
vec![
vec![Arc::new(HasMatchingFilePartitionFilter::new(
LevelRangeFileFilter::new(CompactionLevel::Initial..=CompactionLevel::Initial),
))]
}
// (Has-L0) OR -- to avoid overlaped files
// (num(L1) > N) OR -- to avoid many files
// (total_size(L1) > max_desired_file_size) -- to avoid compact and than split
AlgoVersion::TargetLevel => {
vec![Arc::new(OrPartitionFilter::new(vec![
Arc::new(HasMatchingFilePartitionFilter::new(
LevelRangeFileFilter::new(CompactionLevel::Initial..=CompactionLevel::Initial),
)),
Arc::new(MaxFilesPartitionFilter::new(
config.max_input_files_per_partition,
Arc::new(GreaterMatchingFilesPartitionFilter::new(
LevelRangeFileFilter::new(
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
),
config.min_num_l1_files_to_compact,
)),
Arc::new(MaxParquetBytesPartitionFilter::new(
config.max_input_parquet_bytes_per_partition,
Arc::new(GreaterSizeMatchingFilesPartitionFilter::new(
LevelRangeFileFilter::new(
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
),
config.max_desired_file_size_bytes,
)),
]
}
// (Has-L0) OR (num(L1) > N)
AlgoVersion::TargetLevel => {
vec![
Arc::new(OrPartitionFilter::new(vec![
Arc::new(HasMatchingFilePartitionFilter::new(
LevelRangeFileFilter::new(
CompactionLevel::Initial..=CompactionLevel::Initial,
),
)),
Arc::new(GreaterMatchingFilesPartitionFilter::new(
LevelRangeFileFilter::new(
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
),
config.min_num_l1_files_to_compact,
)),
])),
Arc::new(MaxFilesPartitionFilter::new(
config.max_input_files_per_partition,
)),
Arc::new(MaxParquetBytesPartitionFilter::new(
config.max_input_parquet_bytes_per_partition,
)),
]
]))]
}
}
}

View File

@ -10,6 +10,7 @@ pub struct ByIdPartitionFilter {
}
impl ByIdPartitionFilter {
#[allow(dead_code)] // not used anywhere
pub fn new(ids: HashSet<PartitionId>) -> Self {
Self { ids }
}

View File

@ -43,6 +43,7 @@ pub struct Components {
pub partition_files_source: Arc<dyn PartitionFilesSource>,
pub files_filter: Arc<dyn FilesFilter>,
pub partition_filter: Arc<dyn PartitionFilter>,
pub partition_resource_limit_filter: Arc<dyn PartitionFilter>,
pub partition_done_sink: Arc<dyn PartitionDoneSink>,
pub commit: Arc<dyn Commit>,
pub namespaces_source: Arc<dyn NamespacesSource>,

View File

@ -79,7 +79,7 @@ mod tests {
#[test]
fn test_display() {
let sink = DedicatedExecParquetFileSinkWrapper::new(
MockParquetFileSink::new(),
MockParquetFileSink::new(true),
Arc::new(Executor::new_testing()),
);
assert_eq!(sink.to_string(), "dedicated_exec(mock)",)
@ -88,7 +88,7 @@ mod tests {
#[tokio::test]
async fn test_panic() {
let sink = DedicatedExecParquetFileSinkWrapper::new(
MockParquetFileSink::new(),
MockParquetFileSink::new(true),
Arc::new(Executor::new_testing()),
);
let schema = SchemaBuilder::new().build().unwrap().as_arrow();

View File

@ -28,15 +28,19 @@ pub struct StoredFile {
pub schema: SchemaRef,
}
#[derive(Debug, Default)]
#[derive(Debug)]
pub struct MockParquetFileSink {
filter_empty_files: bool,
records: Mutex<Vec<StoredFile>>,
}
impl MockParquetFileSink {
#[allow(dead_code)] // not used anywhere
pub fn new() -> Self {
Self::default()
/// If filter_empty_files is true, parquet files that have "0" rows will not be written to `ParquetFile`s in the catalog.
pub fn new(filter_empty_files: bool) -> Self {
Self {
filter_empty_files,
records: Default::default(),
}
}
#[allow(dead_code)] // not used anywhere
@ -64,7 +68,7 @@ impl ParquetFileSink for MockParquetFileSink {
let batches: Vec<_> = stream.try_collect().await?;
let row_count = batches.iter().map(|b| b.num_rows()).sum::<usize>();
let mut guard = self.records.lock().expect("not poisoned");
let out = (row_count > 0).then(|| ParquetFileParams {
let out = ((row_count > 0) || !self.filter_empty_files).then(|| ParquetFileParams {
shard_id: ShardId::new(1),
namespace_id: partition.namespace_id,
table_id: partition.table.id,
@ -106,12 +110,12 @@ mod tests {
#[test]
fn test_display() {
assert_eq!(MockParquetFileSink::new().to_string(), "mock");
assert_eq!(MockParquetFileSink::new(false).to_string(), "mock");
}
#[tokio::test]
async fn test_store() {
let sink = MockParquetFileSink::new();
async fn test_store_filter_empty() {
let sink = MockParquetFileSink::new(true);
let schema = SchemaBuilder::new()
.field("f", DataType::Int64)
@ -202,4 +206,53 @@ mod tests {
assert_eq!(records[2].level, level);
assert_eq!(records[2].partition, partition);
}
#[tokio::test]
async fn test_store_keep_empty() {
let sink = MockParquetFileSink::new(false);
let schema = SchemaBuilder::new()
.field("f", DataType::Int64)
.unwrap()
.build()
.unwrap()
.as_arrow();
let partition = partition_info();
let level = CompactionLevel::FileNonOverlapped;
let max_l0_created_at = Time::from_timestamp_nanos(1);
let stream = Box::pin(RecordBatchStreamAdapter::new(
Arc::clone(&schema),
futures::stream::empty(),
));
assert_eq!(
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
.await
.unwrap(),
Some(ParquetFileParams {
shard_id: ShardId::new(1),
namespace_id: NamespaceId::new(2),
table_id: TableId::new(3),
partition_id: PartitionId::new(1),
object_store_id: Uuid::from_u128(0),
max_sequence_number: SequenceNumber::new(0),
min_time: Timestamp::new(0),
max_time: Timestamp::new(0),
file_size_bytes: 1,
row_count: 1,
compaction_level: CompactionLevel::FileNonOverlapped,
created_at: Timestamp::new(1),
column_set: ColumnSet::new([]),
max_l0_created_at: max_l0_created_at.into(),
}),
);
let records = sink.records();
assert_eq!(records.len(), 1);
assert_eq!(records[0].batches.len(), 0);
assert_eq!(records[0].schema, schema);
assert_eq!(records[0].level, level);
assert_eq!(records[0].partition, partition);
}
}

View File

@ -0,0 +1,132 @@
use std::fmt::Display;
use async_trait::async_trait;
use data_types::{ParquetFile, PartitionId};
use crate::{components::file_filter::FileFilter, error::DynError};
use super::PartitionFilter;
/// A partition filter that matches partitions that have files
/// matching the given file filter and their total size > max_desired_file_bytes
/// The idea for doing this:
/// 1. Not to compact large input size to avoid hitting OOM/crash.
/// 2. Not to compact too-large input size that lead to unecessary split into many files.
/// - Becasue we limit the size of a file. If the compacted result is too large, we will split them into many files.
/// - Becasue Level-1 files do not overlap, it is a waste to compact too-large size and then split.
#[derive(Debug)]
pub struct GreaterSizeMatchingFilesPartitionFilter<T>
where
T: FileFilter,
{
filter: T,
max_desired_file_bytes: u64,
}
impl<T> GreaterSizeMatchingFilesPartitionFilter<T>
where
T: FileFilter,
{
pub fn new(filter: T, max_desired_file_bytes: u64) -> Self {
Self {
filter,
max_desired_file_bytes,
}
}
}
impl<T> Display for GreaterSizeMatchingFilesPartitionFilter<T>
where
T: FileFilter,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"greater_size_matching_file({}, {})",
self.filter, self.max_desired_file_bytes
)
}
}
#[async_trait]
impl<T> PartitionFilter for GreaterSizeMatchingFilesPartitionFilter<T>
where
T: FileFilter,
{
async fn apply(
&self,
_partition_id: PartitionId,
files: &[ParquetFile],
) -> Result<bool, DynError> {
// Matching files
let matching_files: Vec<&ParquetFile> = files
.iter()
.filter(|file| self.filter.apply(file))
.collect();
// Sum of file_size_bytes matching files
let sum: i64 = matching_files.iter().map(|file| file.file_size_bytes).sum();
Ok(sum >= self.max_desired_file_bytes as i64)
}
}
#[cfg(test)]
mod tests {
use data_types::CompactionLevel;
use crate::{
components::file_filter::level_range::LevelRangeFileFilter, test_util::ParquetFileBuilder,
};
use super::*;
#[test]
fn test_display() {
let filter = GreaterSizeMatchingFilesPartitionFilter::new(
LevelRangeFileFilter::new(
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
),
1,
);
assert_eq!(
filter.to_string(),
"greater_size_matching_file(level_range(1..=1), 1)"
);
}
#[tokio::test]
async fn test_apply() {
let filter = GreaterSizeMatchingFilesPartitionFilter::new(
LevelRangeFileFilter::new(
CompactionLevel::FileNonOverlapped..=CompactionLevel::FileNonOverlapped,
),
15,
);
let f1 = ParquetFileBuilder::new(0)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_file_size_bytes(10)
.build();
let f2 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_file_size_bytes(14)
.build();
let f3 = ParquetFileBuilder::new(2)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_file_size_bytes(15)
.build();
let p_id = PartitionId::new(1);
// empty, not large enough
assert!(!filter.apply(p_id, &[]).await.unwrap());
// Not large enough
assert!(!filter.apply(p_id, &[f1.clone()]).await.unwrap());
assert!(!filter.apply(p_id, &[f2.clone()]).await.unwrap());
// large enough
assert!(filter.apply(p_id, &[f1.clone(), f2.clone()]).await.unwrap());
assert!(filter.apply(p_id, &[f3.clone()]).await.unwrap());
assert!(filter.apply(p_id, &[f1, f2, f3]).await.unwrap());
}
}

View File

@ -14,14 +14,15 @@ where
T: PartitionFilter,
{
inner: T,
filter_type: &'static str,
}
impl<T> LoggingPartitionFilterWrapper<T>
where
T: PartitionFilter,
{
pub fn new(inner: T) -> Self {
Self { inner }
pub fn new(inner: T, filter_type: &'static str) -> Self {
Self { inner, filter_type }
}
}
@ -30,7 +31,7 @@ where
T: PartitionFilter,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "logging({})", self.inner)
write!(f, "logging({}, {})", self.inner, self.filter_type)
}
}
@ -47,13 +48,21 @@ where
let res = self.inner.apply(partition_id, files).await;
match &res {
Ok(true) => {
debug!(partition_id = partition_id.get(), "NOT filtered partition");
debug!(
partition_id = partition_id.get(),
filter_type = self.filter_type,
"NOT filtered partition"
);
}
Ok(false) => {
info!(partition_id = partition_id.get(), "filtered partition");
info!(
partition_id = partition_id.get(),
filter_type = self.filter_type,
"filtered partition"
);
}
Err(e) => {
error!(partition_id = partition_id.get(), %e, "error filtering filtered partition");
error!(partition_id = partition_id.get(), filter_type = self.filter_type, %e, "error filtering filtered partition");
}
}
res
@ -73,13 +82,13 @@ mod tests {
#[test]
fn test_display() {
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new());
assert_eq!(filter.to_string(), "logging(has_files)");
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), "test");
assert_eq!(filter.to_string(), "logging(has_files, test)");
}
#[tokio::test]
async fn test_apply() {
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new());
let filter = LoggingPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), "test");
let f = ParquetFileBuilder::new(0).build();
let p_id1 = PartitionId::new(1);
let p_id2 = PartitionId::new(2);
@ -91,8 +100,8 @@ mod tests {
assert_eq!(
capture.to_string(),
"level = INFO; message = filtered partition; partition_id = 1; \n\
level = DEBUG; message = NOT filtered partition; partition_id = 2; ",
"level = INFO; message = filtered partition; partition_id = 1; filter_type = \"test\";
level = DEBUG; message = NOT filtered partition; partition_id = 2; filter_type = \"test\"; ",
);
}
}

View File

@ -17,27 +17,29 @@ where
filter_counter: U64Counter,
error_counter: U64Counter,
inner: T,
filter_type: &'static str,
}
impl<T> MetricsPartitionFilterWrapper<T>
where
T: PartitionFilter,
{
pub fn new(inner: T, registry: &Registry) -> Self {
pub fn new(inner: T, registry: &Registry, filter_type: &'static str) -> Self {
let metric = registry.register_metric::<U64Counter>(
"iox_compactor_partition_filter_count",
"Number of times the compactor fetched fresh partitions",
);
let pass_counter = metric.recorder(&[("result", "pass")]);
let filter_counter = metric.recorder(&[("result", "filter")]);
let error_counter = metric.recorder(&[("result", "error")]);
let pass_counter = metric.recorder(&[("result", "pass"), ("filter_type", filter_type)]);
let filter_counter = metric.recorder(&[("result", "filter"), ("filter_type", filter_type)]);
let error_counter = metric.recorder(&[("result", "error"), ("filter_type", filter_type)]);
Self {
pass_counter,
filter_counter,
error_counter,
inner,
filter_type,
}
}
}
@ -47,7 +49,7 @@ where
T: PartitionFilter,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "metrics({})", self.inner)
write!(f, "metrics({}, {})", self.inner, self.filter_type)
}
}
@ -91,14 +93,16 @@ mod tests {
#[test]
fn test_display() {
let registry = Registry::new();
let filter = MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), &registry);
assert_eq!(filter.to_string(), "metrics(has_files)",);
let filter =
MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), &registry, "test");
assert_eq!(filter.to_string(), "metrics(has_files, test)",);
}
#[tokio::test]
async fn test_apply() {
let registry = Registry::new();
let filter = MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), &registry);
let filter =
MetricsPartitionFilterWrapper::new(HasFilesPartitionFilter::new(), &registry, "test");
let p_id = PartitionId::new(1);
let f = ParquetFileBuilder::new(0).build();
@ -119,7 +123,10 @@ mod tests {
registry
.get_instrument::<Metric<U64Counter>>("iox_compactor_partition_filter_count")
.expect("instrument not found")
.get_observer(&Attributes::from(&[("result", "pass")]))
.get_observer(&Attributes::from(&[
("result", "pass"),
("filter_type", "test"),
]))
.expect("observer not found")
.fetch()
}
@ -128,7 +135,10 @@ mod tests {
registry
.get_instrument::<Metric<U64Counter>>("iox_compactor_partition_filter_count")
.expect("instrument not found")
.get_observer(&Attributes::from(&[("result", "filter")]))
.get_observer(&Attributes::from(&[
("result", "filter"),
("filter_type", "test"),
]))
.expect("observer not found")
.fetch()
}
@ -137,7 +147,10 @@ mod tests {
registry
.get_instrument::<Metric<U64Counter>>("iox_compactor_partition_filter_count")
.expect("instrument not found")
.get_observer(&Attributes::from(&[("result", "error")]))
.get_observer(&Attributes::from(&[
("result", "error"),
("filter_type", "test"),
]))
.expect("observer not found")
.fetch()
}

View File

@ -7,6 +7,7 @@ use crate::error::DynError;
pub mod and;
pub mod greater_matching_files;
pub mod greater_size_matching_files;
pub mod has_files;
pub mod has_matching_file;
pub mod logging;

View File

@ -0,0 +1,47 @@
use std::{fmt::Display, sync::Arc};
use async_trait::async_trait;
use backoff::{Backoff, BackoffConfig};
use data_types::PartitionId;
use iox_catalog::interface::Catalog;
use super::PartitionsSource;
#[derive(Debug)]
/// Returns all partitions in the catalog, regardless of any other condition
pub struct CatalogAllPartitionsSource {
backoff_config: BackoffConfig,
catalog: Arc<dyn Catalog>,
}
impl CatalogAllPartitionsSource {
pub fn new(backoff_config: BackoffConfig, catalog: Arc<dyn Catalog>) -> Self {
Self {
backoff_config,
catalog,
}
}
}
impl Display for CatalogAllPartitionsSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "catalog_all")
}
}
#[async_trait]
impl PartitionsSource for CatalogAllPartitionsSource {
async fn fetch(&self) -> Vec<PartitionId> {
Backoff::new(&self.backoff_config)
.retry_all_errors("list_ids", || async {
self.catalog
.repositories()
.await
.partitions()
.list_ids()
.await
})
.await
.expect("retry forever")
}
}

View File

@ -9,14 +9,15 @@ use iox_time::TimeProvider;
use super::PartitionsSource;
#[derive(Debug)]
pub struct CatalogPartitionsSource {
/// Returns all partitions that had a new parquet file written more than `threshold` ago.
pub struct CatalogToCompactPartitionsSource {
backoff_config: BackoffConfig,
catalog: Arc<dyn Catalog>,
threshold: Duration,
time_provider: Arc<dyn TimeProvider>,
}
impl CatalogPartitionsSource {
impl CatalogToCompactPartitionsSource {
pub fn new(
backoff_config: BackoffConfig,
catalog: Arc<dyn Catalog>,
@ -32,14 +33,14 @@ impl CatalogPartitionsSource {
}
}
impl Display for CatalogPartitionsSource {
impl Display for CatalogToCompactPartitionsSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "catalog")
write!(f, "catalog_to_compact")
}
}
#[async_trait]
impl PartitionsSource for CatalogPartitionsSource {
impl PartitionsSource for CatalogToCompactPartitionsSource {
async fn fetch(&self) -> Vec<PartitionId> {
let cutoff = self.time_provider.now() - self.threshold;

View File

@ -6,7 +6,8 @@ use std::{
use async_trait::async_trait;
use data_types::PartitionId;
pub mod catalog;
pub mod catalog_all;
pub mod catalog_to_compact;
pub mod filter;
pub mod logging;
pub mod metrics;

View File

@ -26,7 +26,7 @@ pub fn log_config(config: &Config) {
percentage_max_file_size,
split_percentage,
partition_timeout,
partition_filter,
partitions_source,
shadow_mode,
ignore_partition_skip_marker,
max_input_files_per_partition,
@ -35,6 +35,8 @@ pub fn log_config(config: &Config) {
compact_version,
min_num_l1_files_to_compact,
process_once,
simulate_without_object_store,
all_errors_are_fatal,
} = &config;
let (shard_cfg_n_shards, shard_cfg_shard_id) = match shard_config {
@ -63,7 +65,7 @@ pub fn log_config(config: &Config) {
percentage_max_file_size,
split_percentage,
partition_timeout_secs=partition_timeout.as_secs_f32(),
partition_filter=?partition_filter.as_ref().map(|ids| ids.iter().map(|id| id.get()).collect::<Vec<_>>()),
%partitions_source,
shadow_mode,
ignore_partition_skip_marker,
max_input_files_per_partition,
@ -73,6 +75,8 @@ pub fn log_config(config: &Config) {
?compact_version,
min_num_l1_files_to_compact,
process_once,
simulate_without_object_store,
all_errors_are_fatal,
"config",
);
}
@ -86,6 +90,7 @@ pub fn log_components(components: &Components) {
partition_files_source,
files_filter,
partition_filter,
partition_resource_limit_filter,
partition_done_sink,
commit,
tables_source,
@ -108,6 +113,7 @@ pub fn log_components(components: &Components) {
%partition_files_source,
%files_filter,
%partition_filter,
%partition_resource_limit_filter,
%partition_done_sink,
%commit,
%tables_source,

View File

@ -5,6 +5,7 @@ use parquet_file::ParquetFilePath;
use uuid::Uuid;
pub mod ignore_writes_object_store;
pub mod noop;
pub mod prod;
mod util;

View File

@ -0,0 +1,47 @@
use std::fmt::Display;
use async_trait::async_trait;
use parquet_file::ParquetFilePath;
use uuid::Uuid;
use super::{Scratchpad, ScratchpadGen};
/// A scratchpad that ignores all inputs and outputs, for use in testing
#[derive(Debug, Default)]
pub struct NoopScratchpadGen;
impl NoopScratchpadGen {
pub fn new() -> Self {
Self::default()
}
}
impl Display for NoopScratchpadGen {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "noop")
}
}
impl ScratchpadGen for NoopScratchpadGen {
fn pad(&self) -> Box<dyn Scratchpad> {
Box::new(NoopScratchpad)
}
}
#[derive(Debug)]
struct NoopScratchpad;
#[async_trait]
impl Scratchpad for NoopScratchpad {
async fn load_to_scratchpad(&mut self, files: &[ParquetFilePath]) -> Vec<Uuid> {
files.iter().map(|f| f.objest_store_id()).collect()
}
async fn make_public(&mut self, files: &[ParquetFilePath]) -> Vec<Uuid> {
files.iter().map(|f| f.objest_store_id()).collect()
}
async fn clean_from_scratchpad(&mut self, _files: &[ParquetFilePath]) {}
async fn clean(&mut self) {}
}

View File

@ -1,5 +1,5 @@
//! Config-related stuff.
use std::{collections::HashSet, num::NonZeroUsize, sync::Arc, time::Duration};
use std::{collections::HashSet, fmt::Display, num::NonZeroUsize, sync::Arc, time::Duration};
use backoff::{Backoff, BackoffConfig};
use data_types::{PartitionId, ShardId, ShardIndex};
@ -74,10 +74,8 @@ pub struct Config {
/// Maximum duration of the per-partition compaction task.
pub partition_timeout: Duration,
/// Filter partitions to the given set of IDs.
///
/// This is mostly useful for debugging.
pub partition_filter: Option<HashSet<PartitionId>>,
/// Source of partitions to consider for comapction.
pub partitions_source: PartitionsSourceConfig,
/// Shadow mode.
///
@ -111,6 +109,19 @@ pub struct Config {
/// Only process all discovered partitions once.
pub process_once: bool,
/// Simulate compactor w/o any object store interaction. No parquet
/// files will be read or written.
///
/// This will still use the catalog
///
/// This is mostly useful for testing.
pub simulate_without_object_store: bool,
/// Ensure that ALL errors (including object store errors) result in "skipped" partitions.
///
/// This is mostly useful for testing.
pub all_errors_are_fatal: bool,
}
impl Config {
@ -191,3 +202,34 @@ pub enum AlgoVersion {
/// NOT yet ready for production.
TargetLevel,
}
/// Partitions source config.
#[derive(Debug, Clone)]
pub enum PartitionsSourceConfig {
/// Use the catalog to determine which partitions have recently received writes.
CatalogRecentWrites,
/// Use all partitions from the catalog.
///
/// This does NOT consider if/when a partition received any writes.
CatalogAll,
/// Use a fixed set of partitions.
///
/// This is mostly useful for debugging.
Fixed(HashSet<PartitionId>),
}
impl Display for PartitionsSourceConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::CatalogRecentWrites => write!(f, "catalog_recent_writes"),
Self::CatalogAll => write!(f, "catalog_all"),
Self::Fixed(p_ids) => {
let mut p_ids = p_ids.iter().copied().collect::<Vec<_>>();
p_ids.sort();
write!(f, "fixed({p_ids:?})")
}
}
}
}

View File

@ -230,6 +230,21 @@ async fn try_compact_partition(
// compaction
let compaction_plan = build_compaction_plan(branch, Arc::clone(&components))?;
// Cannot run this plan and skip this partition because of over limit of input num_files or size.
// The partition_resource_limit_filter will throw an error if one of the limits hit and will lead
// to the partition is added to the `skipped_compactions` catalog table for us to not bother
// compacting it again.
// TODO: After https://github.com/influxdata/idpe/issues/17090 is iplemented (aka V3), we will
// split files to smaller branches and aslo compact L0s into fewer L0s to deal with all kinds
// of conidtions even with limited resource. Then we will remove this resrouce limit check.
if !components
.partition_resource_limit_filter
.apply(partition_id, &compaction_plan.files_to_compact)
.await?
{
return Ok(());
}
// Compact
let created_file_params = run_compaction_plan(
&compaction_plan.files_to_compact,

View File

@ -178,7 +178,7 @@ mod error;
mod partition_info;
#[cfg(test)]
mod compactor_tests;
mod tests;
pub mod file_group;
#[cfg(test)]

View File

@ -1,5 +1,5 @@
mod display;
pub(crate) use display::{assert_parquet_files, assert_parquet_files_split};
pub(crate) use display::{format_files, format_files_split};
use std::{
collections::{BTreeMap, HashSet},
@ -19,7 +19,9 @@ use data_types::{
};
use datafusion::arrow::record_batch::RecordBatch;
use futures::TryStreamExt;
use iox_tests::util::{TestCatalog, TestParquetFileBuilder, TestTable};
use iox_tests::util::{
TestCatalog, TestNamespace, TestParquetFileBuilder, TestPartition, TestShard, TestTable,
};
use iox_time::TimeProvider;
use object_store::{path::Path, DynObjectStore};
use parquet_file::storage::{ParquetStorage, StorageId};
@ -28,7 +30,7 @@ use uuid::Uuid;
use crate::{
components::namespaces_source::mock::NamespaceWrapper,
config::{AlgoVersion, Config},
config::{AlgoVersion, Config, PartitionsSourceConfig},
partition_info::PartitionInfo,
};
@ -303,38 +305,18 @@ const SPLIT_PERCENTAGE: u16 = 80;
const MIN_NUM_L1_FILES_TO_COMPACT: usize = 2;
#[derive(Debug)]
pub struct TestSetupBuilder {
with_files: bool,
shadow_mode: bool,
compact_version: AlgoVersion,
pub struct TestSetupBuilder<const WITH_FILES: bool> {
config: Config,
catalog: Arc<TestCatalog>,
ns: Arc<TestNamespace>,
shard: Arc<TestShard>,
table: Arc<TestTable>,
partition: Arc<TestPartition>,
files: Vec<ParquetFile>,
}
impl Default for TestSetupBuilder {
fn default() -> Self {
Self {
with_files: false,
shadow_mode: false,
compact_version: AlgoVersion::AllAtOnce,
}
}
}
impl TestSetupBuilder {
pub fn with_files(self) -> Self {
Self {
with_files: true,
..self
}
}
pub fn with_shadow_mode(self) -> Self {
Self {
shadow_mode: true,
..self
}
}
pub async fn build(self) -> TestSetup {
impl TestSetupBuilder<false> {
pub async fn new() -> Self {
let catalog = TestCatalog::new();
let ns = catalog.create_namespace_1hr_retention("ns").await;
let shard = ns.create_shard(SHARD_INDEX).await;
@ -344,7 +326,6 @@ impl TestSetupBuilder {
table.create_column("tag2", ColumnType::Tag).await;
table.create_column("tag3", ColumnType::Tag).await;
table.create_column("time", ColumnType::Time).await;
let table_schema = table.catalog_schema().await;
let partition = table
.with_shard(&shard)
@ -356,126 +337,7 @@ impl TestSetupBuilder {
let sort_key = SortKey::from_columns(["tag1", "tag2", "tag3", "time"]);
let partition = partition.update_sort_key(sort_key.clone()).await;
let candidate_partition = Arc::new(PartitionInfo {
partition_id: partition.partition.id,
namespace_id: ns.namespace.id,
namespace_name: ns.namespace.name.clone(),
table: Arc::new(table.table.clone()),
table_schema: Arc::new(table_schema),
sort_key: partition.partition.sort_key(),
partition_key: partition.partition.partition_key.clone(),
});
let time_provider = Arc::<iox_time::MockProvider>::clone(&catalog.time_provider);
let mut parquet_files = vec![];
if self.with_files {
let time_1_minute_future = time_provider.minutes_into_future(1);
let time_2_minutes_future = time_provider.minutes_into_future(2);
let time_3_minutes_future = time_provider.minutes_into_future(3);
let time_5_minutes_future = time_provider.minutes_into_future(5);
// L1 file
let lp = vec![
"table,tag2=PA,tag3=15 field_int=1601i 30000",
"table,tag2=OH,tag3=21 field_int=21i 36000", // will be eliminated due to duplicate
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_3_minutes_future)
.with_max_l0_created_at(time_1_minute_future)
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
let level_1_file_1_minute_ago = partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec![
"table,tag1=WA field_int=1000i 8000", // will be eliminated due to duplicate
"table,tag1=VT field_int=10i 10000", // latest L0 compared with duplicate in level_1_file_1_minute_ago_with_duplicates
// keep it
"table,tag1=UT field_int=70i 20000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_2_minutes_future)
.with_max_l0_created_at(time_2_minutes_future)
.with_compaction_level(CompactionLevel::Initial);
let level_0_file_16_minutes_ago = partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec![
"table,tag1=WA field_int=1500i 8000", // latest duplicate and kept
"table,tag1=VT field_int=10i 6000",
"table,tag1=UT field_int=270i 25000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_5_minutes_future)
.with_max_l0_created_at(time_5_minutes_future)
.with_compaction_level(CompactionLevel::Initial);
let level_0_file_5_minutes_ago = partition.create_parquet_file(builder).await.into();
// L1 file
let lp = vec![
"table,tag1=VT field_int=88i 10000", // will be eliminated due to duplicate.
// Note: created time more recent than level_0_file_16_minutes_ago
// but always considered older ingested data
"table,tag1=OR field_int=99i 12000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_5_minutes_future)
.with_max_l0_created_at(time_3_minutes_future)
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
let level_1_file_1_minute_ago_with_duplicates: ParquetFile =
partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec!["table,tag2=OH,tag3=21 field_int=22i 36000"].join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_min_time(0)
.with_max_time(36000)
.with_creation_time(time_5_minutes_future)
.with_max_l0_created_at(time_5_minutes_future)
// Will put the group size between "small" and "large"
.with_size_override(50 * 1024 * 1024)
.with_compaction_level(CompactionLevel::Initial);
let medium_level_0_file_time_now = partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec![
"table,tag1=VT field_int=10i 68000",
"table,tag2=OH,tag3=21 field_int=210i 136000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_min_time(36001)
.with_max_time(136000)
.with_creation_time(time_2_minutes_future)
.with_max_l0_created_at(time_2_minutes_future)
// Will put the group size two multiples over "large"
.with_size_override(180 * 1024 * 1024)
.with_compaction_level(CompactionLevel::Initial);
let large_level_0_file_2_2_minutes_ago =
partition.create_parquet_file(builder).await.into();
// Order here isn't relevant; the chunk order should ensure the level 1 files are ordered
// first, then the other files by max seq num.
parquet_files = vec![
level_1_file_1_minute_ago,
level_0_file_16_minutes_ago,
level_0_file_5_minutes_ago,
level_1_file_1_minute_ago_with_duplicates,
medium_level_0_file_time_now,
large_level_0_file_2_2_minutes_ago,
];
}
let config = Arc::new(Config {
let config = Config {
shard_id: shard.shard.id,
metric_registry: catalog.metric_registry(),
catalog: catalog.catalog(),
@ -484,7 +346,7 @@ impl TestSetupBuilder {
Arc::new(object_store::memory::InMemory::new()),
StorageId::from("scratchpad"),
),
time_provider,
time_provider: catalog.time_provider(),
exec: Arc::clone(&catalog.exec),
backoff_config: BackoffConfig::default(),
partition_concurrency: NonZeroUsize::new(1).unwrap(),
@ -495,23 +357,248 @@ impl TestSetupBuilder {
percentage_max_file_size: PERCENTAGE_MAX_FILE_SIZE,
split_percentage: SPLIT_PERCENTAGE,
partition_timeout: Duration::from_secs(3_600),
partition_filter: None,
shadow_mode: self.shadow_mode,
partitions_source: PartitionsSourceConfig::CatalogRecentWrites,
shadow_mode: false,
ignore_partition_skip_marker: false,
max_input_files_per_partition: usize::MAX,
max_input_parquet_bytes_per_partition: usize::MAX,
shard_config: None,
compact_version: self.compact_version,
compact_version: AlgoVersion::AllAtOnce,
min_num_l1_files_to_compact: MIN_NUM_L1_FILES_TO_COMPACT,
process_once: true,
simulate_without_object_store: false,
all_errors_are_fatal: true,
};
Self {
config,
catalog,
ns,
shard,
table,
partition,
files: vec![],
}
}
pub async fn with_files(self) -> TestSetupBuilder<true> {
let time_provider = self.catalog.time_provider();
let time_1_minute_future = time_provider.minutes_into_future(1);
let time_2_minutes_future = time_provider.minutes_into_future(2);
let time_3_minutes_future = time_provider.minutes_into_future(3);
let time_5_minutes_future = time_provider.minutes_into_future(5);
// L1 file
let lp = vec![
"table,tag2=PA,tag3=15 field_int=1601i 30000",
"table,tag2=OH,tag3=21 field_int=21i 36000", // will be eliminated due to duplicate
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_3_minutes_future)
.with_max_l0_created_at(time_1_minute_future)
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
let level_1_file_1_minute_ago = self.partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec![
"table,tag1=WA field_int=1000i 8000", // will be eliminated due to duplicate
"table,tag1=VT field_int=10i 10000", // latest L0 compared with duplicate in level_1_file_1_minute_ago_with_duplicates
// keep it
"table,tag1=UT field_int=70i 20000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_2_minutes_future)
.with_max_l0_created_at(time_2_minutes_future)
.with_compaction_level(CompactionLevel::Initial);
let level_0_file_16_minutes_ago = self.partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec![
"table,tag1=WA field_int=1500i 8000", // latest duplicate and kept
"table,tag1=VT field_int=10i 6000",
"table,tag1=UT field_int=270i 25000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_5_minutes_future)
.with_max_l0_created_at(time_5_minutes_future)
.with_compaction_level(CompactionLevel::Initial);
let level_0_file_5_minutes_ago = self.partition.create_parquet_file(builder).await.into();
// L1 file
let lp = vec![
"table,tag1=VT field_int=88i 10000", // will be eliminated due to duplicate.
// Note: created time more recent than level_0_file_16_minutes_ago
// but always considered older ingested data
"table,tag1=OR field_int=99i 12000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_creation_time(time_5_minutes_future)
.with_max_l0_created_at(time_3_minutes_future)
.with_compaction_level(CompactionLevel::FileNonOverlapped); // Prev compaction
let level_1_file_1_minute_ago_with_duplicates: ParquetFile =
self.partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec!["table,tag2=OH,tag3=21 field_int=22i 36000"].join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_min_time(0)
.with_max_time(36000)
.with_creation_time(time_5_minutes_future)
.with_max_l0_created_at(time_5_minutes_future)
// Will put the group size between "small" and "large"
.with_size_override(50 * 1024 * 1024)
.with_compaction_level(CompactionLevel::Initial);
let medium_level_0_file_time_now = self.partition.create_parquet_file(builder).await.into();
// L0 file
let lp = vec![
"table,tag1=VT field_int=10i 68000",
"table,tag2=OH,tag3=21 field_int=210i 136000",
]
.join("\n");
let builder = TestParquetFileBuilder::default()
.with_line_protocol(&lp)
.with_min_time(36001)
.with_max_time(136000)
.with_creation_time(time_2_minutes_future)
.with_max_l0_created_at(time_2_minutes_future)
// Will put the group size two multiples over "large"
.with_size_override(180 * 1024 * 1024)
.with_compaction_level(CompactionLevel::Initial);
let large_level_0_file_2_2_minutes_ago =
self.partition.create_parquet_file(builder).await.into();
// Order here isn't relevant; the chunk order should ensure the level 1 files are ordered
// first, then the other files by max seq num.
let files = vec![
level_1_file_1_minute_ago,
level_0_file_16_minutes_ago,
level_0_file_5_minutes_ago,
level_1_file_1_minute_ago_with_duplicates,
medium_level_0_file_time_now,
large_level_0_file_2_2_minutes_ago,
];
TestSetupBuilder::<true> {
config: self.config,
catalog: self.catalog,
ns: self.ns,
shard: self.shard,
table: self.table,
partition: self.partition,
files,
}
}
}
impl TestSetupBuilder<true> {
pub fn with_max_input_files_per_partition_relative_to_n_files(self, delta: isize) -> Self {
Self {
config: Config {
max_input_parquet_bytes_per_partition: (self.files.len() as isize + delta) as usize,
..self.config
},
..self
}
}
/// Set max_input_parquet_bytes_per_partition
pub fn with_max_input_parquet_bytes_per_partition_relative_to_total_size(
self,
delta: isize,
) -> Self {
let total_size = self.files.iter().map(|f| f.file_size_bytes).sum::<i64>();
Self {
config: Config {
max_input_parquet_bytes_per_partition: (total_size as isize + delta) as usize,
..self.config
},
..self
}
}
}
impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
pub fn with_shadow_mode(self) -> Self {
Self {
config: Config {
shadow_mode: true,
..self.config
},
..self
}
}
/// Set compact version
pub fn with_compact_version(self, compact_version: AlgoVersion) -> Self {
Self {
config: Config {
compact_version,
..self.config
},
..self
}
}
/// set min_num_l1_files_to_compact
pub fn with_min_num_l1_files_to_compact(self, min_num_l1_files_to_compact: usize) -> Self {
Self {
config: Config {
min_num_l1_files_to_compact,
..self.config
},
..self
}
}
/// Set max_input_files_per_partition
pub fn with_max_input_files_per_partition(self, max_input_files_per_partition: usize) -> Self {
Self {
config: Config {
max_input_files_per_partition,
..self.config
},
..self
}
}
pub fn simulate_without_object_store(self) -> Self {
Self {
config: Config {
simulate_without_object_store: true,
..self.config
},
..self
}
}
pub async fn build(self) -> TestSetup {
let candidate_partition = Arc::new(PartitionInfo {
partition_id: self.partition.partition.id,
namespace_id: self.ns.namespace.id,
namespace_name: self.ns.namespace.name.clone(),
table: Arc::new(self.table.table.clone()),
table_schema: Arc::new(self.table.catalog_schema().await),
sort_key: self.partition.partition.sort_key(),
partition_key: self.partition.partition.partition_key.clone(),
});
TestSetup {
files: Arc::new(parquet_files),
files: Arc::new(self.files),
partition_info: candidate_partition,
catalog,
table,
config,
catalog: self.catalog,
table: self.table,
partition: self.partition,
config: Arc::new(self.config),
}
}
}
@ -521,12 +608,13 @@ pub struct TestSetup {
pub partition_info: Arc<PartitionInfo>,
pub catalog: Arc<TestCatalog>,
pub table: Arc<TestTable>,
pub partition: Arc<TestPartition>,
pub config: Arc<Config>,
}
impl TestSetup {
pub fn builder() -> TestSetupBuilder {
TestSetupBuilder::default()
pub async fn builder() -> TestSetupBuilder<false> {
TestSetupBuilder::new().await
}
/// Get the catalog files stored in the catalog
@ -542,18 +630,6 @@ impl TestSetup {
self.table.read_parquet_file(file).await
}
/// Set compact version
pub fn set_compact_version(&mut self, compact_version: AlgoVersion) {
let mut config = Arc::get_mut(&mut self.config).unwrap();
config.compact_version = compact_version;
}
/// set min_num_l1_files_to_compact
pub fn set_min_num_l1_files_to_compact(&mut self, min_num_l1_files_to_compact: usize) {
let mut config = Arc::get_mut(&mut self.config).unwrap();
config.min_num_l1_files_to_compact = min_num_l1_files_to_compact;
}
/// return a set of times relative to config.time_provider.now()
pub fn test_times(&self) -> TestTimes {
TestTimes::new(self.config.time_provider.as_ref())

View File

@ -2,61 +2,29 @@ use std::collections::BTreeMap;
use data_types::{CompactionLevel, ParquetFile};
/// Compares the a vec of strs with the output of a set of parquet
/// files. See docs on [`ParquetFileFormatter`] for example
/// expected output.
///
/// Designed so that failure output can be directly copy/pasted
/// into the test code as expected results.
///
/// Expects to be called about like this:
/// assert_parquet_files!(expected_lines: &[&str], &files)
#[track_caller]
pub fn assert_parquet_files<'a>(
expected_lines: impl IntoIterator<Item = &'a str>,
files: &[ParquetFile],
) {
let expected_lines: Vec<String> = expected_lines.into_iter().map(|s| s.to_string()).collect();
let actual_lines = readable_list_of_files(None, files);
assert_eq!(
expected_lines, actual_lines,
"\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n",
);
/// Formats the list of files in the manner described on
/// [`ParquetFileFormatter`] into strings suitable for comparison with
/// `insta`.
pub fn format_files<'a>(
title: impl Into<String>,
files: impl IntoIterator<Item = &'a ParquetFile>,
) -> Vec<String> {
readable_list_of_files(Some(title.into()), files)
}
/// Compares the a vec of strs with the output of a set of parquet
/// files. This is used to compare the results of splitting files into
/// two groups. See docs on [`ParquetFileFormatter`] for example
/// expected output.
///
/// Designed so that failure output can be directly copy/pasted
/// into the test code as expected results.
///
/// Expects to be called about like this:
/// assert_parquet_files_split!(expected_lines: &[&str], &files1, &files2)
#[track_caller]
pub fn assert_parquet_files_split<'a>(
expected_lines: impl IntoIterator<Item = &'a str>,
files1: &[ParquetFile],
files2: &[ParquetFile],
) {
let expected_lines: Vec<String> = expected_lines.into_iter().map(|s| s.to_string()).collect();
/// Formats two lists of files in the manner described on
/// [`ParquetFileFormatter`] into strings suitable for comparison with
/// `insta`.
pub fn format_files_split<'a>(
title1: impl Into<String>,
files1: impl IntoIterator<Item = &'a ParquetFile>,
title2: impl Into<String>,
files2: impl IntoIterator<Item = &'a ParquetFile>,
) -> Vec<String> {
let strings1 = readable_list_of_files(Some(title1.into()), files1);
let strings2 = readable_list_of_files(Some(title2.into()), files2);
let actual_lines_one = readable_list_of_files(Some("left".into()), files1);
let actual_lines_two = readable_list_of_files(Some("right".into()), files2);
let actual_lines: Vec<_> = actual_lines_one
.into_iter()
.chain(actual_lines_two.into_iter())
.collect();
assert_eq!(
expected_lines, actual_lines,
"\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n",
);
strings1.into_iter().chain(strings2.into_iter()).collect()
}
/// default width for printing
@ -69,9 +37,8 @@ const DEFAULT_HEADING_WIDTH: usize = 20;
/// parquet files arranged so they are lined up horizontally based on
/// their relative time range.
///
/// See docs on [`ParquetFileFormatter`]
/// for examples.
pub fn readable_list_of_files<'a>(
/// See docs on [`ParquetFileFormatter`]z for examples.
fn readable_list_of_files<'a>(
title: Option<String>,
files: impl IntoIterator<Item = &'a ParquetFile>,
) -> Vec<String> {
@ -127,7 +94,7 @@ pub fn readable_list_of_files<'a>(
#[derive(Debug, Default)]
struct ParquetFileFormatter {
/// should the size of the files be shown (if they are different)
show_size: bool,
file_size_seen: FileSizeSeen,
/// width in characater
row_heading_chars: usize,
/// width, in characters, of the entire min/max timerange
@ -140,9 +107,10 @@ struct ParquetFileFormatter {
max_time: i64,
}
#[derive(Debug)]
#[derive(Debug, Default)]
/// helper to track if there are multiple file sizes in a set of parquet files
enum FileSizeSeen {
#[default]
None,
One(i64),
Many,
@ -182,15 +150,12 @@ impl ParquetFileFormatter {
file_size_seen.observe(file.file_size_bytes)
});
// show the size if there are multiple sizes
let show_size = matches!(file_size_seen, FileSizeSeen::Many);
let time_range = max_time - min_time;
let ns_per_char = (time_range as f64) / (width_chars as f64);
Self {
show_size,
file_size_seen,
width_chars,
ns_per_char,
min_time,
@ -212,9 +177,14 @@ impl ParquetFileFormatter {
}
fn format_level(&self, level: &CompactionLevel) -> String {
let level_heading = display_level(level);
let level_heading = match self.file_size_seen {
FileSizeSeen::One(sz) => format!("{level_heading}, all files {sz}b"),
_ => level_heading.into(),
};
format!(
"{:width$}",
display_level(level),
"{level_heading:width$}",
width = self.width_chars + self.row_heading_chars
)
}
@ -239,7 +209,9 @@ impl ParquetFileFormatter {
// Get compact display of the file, like 'L0.1'
// add |--- ---| formatting (based on field width)
let file_string = format!("|{:-^width$}|", display_file_id(file), width = field_width);
let row_heading = display_format(file, self.show_size);
// show indvidual file sizes if they are different
let show_size = matches!(self.file_size_seen, FileSizeSeen::Many);
let row_heading = display_format(file, show_size);
// special case "zero" width times
if self.min_time == self.max_time {
@ -298,7 +270,7 @@ fn display_format(file: &ParquetFile, show_size: bool) -> String {
let max_time = file.max_time.get(); // display as i64
let sz = file.file_size_bytes;
if show_size {
format!("{file_id}[{min_time},{max_time}]@{sz}")
format!("{file_id}[{min_time},{max_time}] {sz}b")
} else {
format!("{file_id}[{min_time},{max_time}]")
}
@ -321,13 +293,16 @@ mod test {
.build(),
];
let expected = vec![
"L0 ",
"L0.1[0,0] |-------------------------------------L0.1-------------------------------------|",
"L0.2[0,0] |-------------------------------------L0.2-------------------------------------|",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("display", &files),
@r###"
---
- display
- "L0, all files 1b "
- "L0.1[0,0] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[0,0] |-------------------------------------L0.2-------------------------------------|"
"###
);
}
#[test]
@ -345,15 +320,18 @@ mod test {
.build(),
];
let expected = vec![
"L0 ",
"L0.1[0,0]@1 |-------------------------------------L0.1-------------------------------------|",
"L0.2[0,0]@1 |-------------------------------------L0.2-------------------------------------|",
"L2 ",
"L2.3[0,0]@42 |-------------------------------------L2.3-------------------------------------|",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("display", &files),
@r###"
---
- display
- "L0 "
- "L0.1[0,0] 1b |-------------------------------------L0.1-------------------------------------|"
- "L0.2[0,0] 1b |-------------------------------------L0.2-------------------------------------|"
- "L2 "
- "L2.3[0,0] 42b |-------------------------------------L2.3-------------------------------------|"
"###
);
}
#[test]
@ -375,13 +353,16 @@ mod test {
.build(),
];
let expected = vec![
"L0 ",
"L0.1[100,200]@1 |----------L0.1----------| ",
"L0.2[300,400]@1 |----------L0.2----------| ",
"L0.11[150,350]@44 |-----------------------L0.11-----------------------| ",
];
assert_parquet_files(expected, &files);
insta::assert_yaml_snapshot!(
format_files("display", &files),
@r###"
---
- display
- "L0 "
- "L0.1[100,200] 1b |----------L0.1----------| "
- "L0.2[300,400] 1b |----------L0.2----------| "
- "L0.11[150,350] 44b |-----------------------L0.11-----------------------| "
"###
);
}
}

692
compactor2/src/tests.rs Normal file
View File

@ -0,0 +1,692 @@
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
use arrow_util::assert_batches_sorted_eq;
use data_types::{CompactionLevel, ParquetFile, PartitionId};
use iox_query::exec::ExecutorType;
use iox_tests::util::TestParquetFileBuilder;
use tracker::AsyncSemaphoreMetrics;
use crate::{
components::{
df_planner::panic::PanicDataFusionPlanner, hardcoded::hardcoded_components, Components,
},
config::AlgoVersion,
driver::compact,
test_util::{format_files, list_object_store, TestSetup},
};
#[tokio::test]
async fn test_compact_no_file() {
test_helpers::maybe_start_logging();
// no files
let setup = TestSetup::builder().await.build().await;
let files = setup.list_by_table_not_to_delete().await;
assert!(files.is_empty());
// compact
run_compact(&setup).await;
// verify catalog is still empty
let files = setup.list_by_table_not_to_delete().await;
assert!(files.is_empty());
}
#[tokio::test]
async fn test_num_files_over_limit() {
test_helpers::maybe_start_logging();
for version in [AlgoVersion::AllAtOnce, AlgoVersion::TargetLevel] {
// Create a test setup with 6 files
let setup = TestSetup::builder()
.await
.with_files()
.await
.with_compact_version(version)
// Set max num file to 4 (< num files) --> it won't get comapcted
.with_max_input_files_per_partition(4)
.build()
.await;
// verify 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 6);
// verify ID and compaction level of the files
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
run_compact(&setup).await;
//
// read files and verify they are not compacted
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 6);
//
// verify ID and compaction level of the files
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
}
}
#[tokio::test]
async fn test_total_file_size_over_limit() {
test_helpers::maybe_start_logging();
for version in [AlgoVersion::AllAtOnce, AlgoVersion::TargetLevel] {
// Create a test setup with 6 files
let setup = TestSetup::builder()
.await
.with_files()
.await
// Set max size < the input file size --> it won't get compacted
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
.with_compact_version(version)
.build()
.await;
// verify 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 6);
// verify ID and compaction level of the files
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
run_compact(&setup).await;
// read files and verify they are not compacted
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 6);
// verify ID and compaction level of the files
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
}
}
#[tokio::test]
async fn test_compact_all_at_once() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder()
.await
.with_files()
.await
// Ensure we have enough resource to compact the files
.with_max_input_files_per_partition_relative_to_n_files(10)
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(1000)
.with_compact_version(AlgoVersion::AllAtOnce)
.build()
.await;
// verify 6 files
// verify ID and compaction level of the files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
// verify ID and max_l0_created_at
let times = setup.test_times();
assert_max_l0_created_at(
&files,
vec![
(1, times.time_1_minute_future),
(2, times.time_2_minutes_future),
(3, times.time_5_minutes_future),
(4, times.time_3_minutes_future),
(5, times.time_5_minutes_future),
(6, times.time_2_minutes_future),
],
);
// compact
run_compact(&setup).await;
// verify number of files: 6 files are compacted into 2 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(
&files,
vec![
(7, CompactionLevel::FileNonOverlapped),
(8, CompactionLevel::FileNonOverlapped),
],
);
assert_max_l0_created_at(
&files,
// both files have max_l0_created time_5_minutes_future
// which is the max of all L0 input's max_l0_created_at
vec![
(7, times.time_5_minutes_future),
(8, times.time_5_minutes_future),
],
);
// verify the content of files
// Compacted smaller file with the later data
let mut files = setup.list_by_table_not_to_delete().await;
let file1 = files.pop().unwrap();
let batches = setup.read_parquet_file(file1).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
// Compacted larger file with the earlier data
let file0 = files.pop().unwrap();
let batches = setup.read_parquet_file(file0).await;
assert_batches_sorted_eq!(
[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
}
#[tokio::test]
async fn test_compact_target_level() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder()
.await
.with_files()
.await
// Ensure we have enough resource to compact the files
.with_max_input_files_per_partition_relative_to_n_files(10)
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(1000)
.with_compact_version(AlgoVersion::TargetLevel)
.with_min_num_l1_files_to_compact(2)
.build()
.await;
// verify 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
// verify ID and max_l0_created_at
let times = setup.test_times();
assert_max_l0_created_at(
&files,
vec![
(1, times.time_1_minute_future),
(2, times.time_2_minutes_future),
(3, times.time_5_minutes_future),
(4, times.time_3_minutes_future),
(5, times.time_5_minutes_future),
(6, times.time_2_minutes_future),
],
);
// compact
run_compact(&setup).await;
// verify number of files: 6 files are compacted into 2 files
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 2);
assert_levels(
&files,
// This is the result of 2-round compaction fomr L0s -> L1s and then L1s -> L2s
// The first round will create two L1 files IDs 7 and 8
// The second round will create tow L2 file IDs 9 and 10
vec![(9, CompactionLevel::Final), (10, CompactionLevel::Final)],
);
assert_max_l0_created_at(
&files,
// both files have max_l0_created time_5_minutes_future
// which is the max of all L0 input's max_l0_created_at
vec![
(9, times.time_5_minutes_future),
(10, times.time_5_minutes_future),
],
);
// verify the content of files
// Compacted smaller file with the later data
let mut files = setup.list_by_table_not_to_delete().await;
let file1 = files.pop().unwrap();
let batches = setup.read_parquet_file(file1).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
// Compacted larger file with the earlier data
let file0 = files.pop().unwrap();
let batches = setup.read_parquet_file(file0).await;
assert_batches_sorted_eq!(
[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 22 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
}
#[tokio::test]
async fn test_skip_compact() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder().await.with_files().await.build().await;
let expected_files_and_levels = vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
];
// verify 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(&files, expected_files_and_levels.clone());
// add the partition into skipped compaction
setup
.catalog
.add_to_skipped_compaction(setup.partition_info.partition_id, "test reason")
.await;
// compact but nothing will be compacted because the partition is skipped
run_compact(&setup).await;
// verify still 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_levels(&files, expected_files_and_levels.clone());
}
#[tokio::test]
async fn test_partition_fail() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder().await.with_files().await.build().await;
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
assert!(!catalog_files_pre.is_empty());
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
assert!(!object_store_files_pre.is_empty());
run_compact_failing(&setup).await;
let catalog_files_post = setup.list_by_table_not_to_delete().await;
assert_eq!(catalog_files_pre, catalog_files_post);
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
assert_eq!(object_store_files_pre, object_store_files_post);
assert_skipped_compactions(
&setup,
[(
setup.partition_info.partition_id,
"serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: foo",
)],
)
.await;
}
#[tokio::test]
async fn test_shadow_mode() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder()
.await
.with_files()
.await
.with_shadow_mode()
.build()
.await;
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
assert!(!catalog_files_pre.is_empty());
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
assert!(!object_store_files_pre.is_empty());
run_compact(&setup).await;
let catalog_files_post = setup.list_by_table_not_to_delete().await;
assert_eq!(catalog_files_pre, catalog_files_post);
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
assert_eq!(object_store_files_pre, object_store_files_post);
}
#[tokio::test]
async fn test_shadow_mode_partition_fail() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder()
.await
.with_files()
.await
.with_shadow_mode()
.build()
.await;
let catalog_files_pre = setup.list_by_table_not_to_delete().await;
assert!(!catalog_files_pre.is_empty());
let object_store_files_pre = list_object_store(&setup.catalog.object_store).await;
assert!(!object_store_files_pre.is_empty());
run_compact_failing(&setup).await;
let catalog_files_post = setup.list_by_table_not_to_delete().await;
assert_eq!(catalog_files_pre, catalog_files_post);
let object_store_files_post = list_object_store(&setup.catalog.object_store).await;
assert_eq!(object_store_files_pre, object_store_files_post);
assert_skipped_compactions(&setup, []).await;
}
#[tokio::test]
async fn test_pr6890() {
test_helpers::maybe_start_logging();
let setup = TestSetup::builder()
.await
.simulate_without_object_store()
.build()
.await;
// create virtual files
let mut input_files = vec![];
for _ in 0..10 {
let file = setup
.partition
.create_parquet_file(
TestParquetFileBuilder::default()
.with_min_time(100)
.with_max_time(200)
.with_file_size_bytes(1_000_000) // 1MB
.with_compaction_level(CompactionLevel::Initial)
// need some LP to generate the schema
.with_line_protocol("table,tag1=A,tag2=B,tag3=C field_int=1i 100"),
)
.await
.parquet_file;
input_files.push(file);
}
setup.catalog.time_provider.inc(Duration::from_nanos(200));
insta::assert_yaml_snapshot!(
format_files("input", &input_files),
@r###"
---
- input
- "L0, all files 1000000b "
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,200] |-------------------------------------L0.2-------------------------------------|"
- "L0.3[100,200] |-------------------------------------L0.3-------------------------------------|"
- "L0.4[100,200] |-------------------------------------L0.4-------------------------------------|"
- "L0.5[100,200] |-------------------------------------L0.5-------------------------------------|"
- "L0.6[100,200] |-------------------------------------L0.6-------------------------------------|"
- "L0.7[100,200] |-------------------------------------L0.7-------------------------------------|"
- "L0.8[100,200] |-------------------------------------L0.8-------------------------------------|"
- "L0.9[100,200] |-------------------------------------L0.9-------------------------------------|"
- "L0.10[100,200] |------------------------------------L0.10-------------------------------------|"
"###
);
run_compact(&setup).await;
assert_skipped_compactions(&setup, []).await;
let output_files = setup.list_by_table_not_to_delete().await;
insta::assert_yaml_snapshot!(
format_files("input", &output_files),
@r###"
---
- input
- "L1, all files 1b "
- "L1.11[0,0] |------------------------------------L1.11-------------------------------------|"
- "L1.12[0,0] |------------------------------------L1.12-------------------------------------|"
- "L1.13[0,0] |------------------------------------L1.13-------------------------------------|"
- "L1.14[0,0] |------------------------------------L1.14-------------------------------------|"
- "L1.15[0,0] |------------------------------------L1.15-------------------------------------|"
- "L1.16[0,0] |------------------------------------L1.16-------------------------------------|"
- "L1.17[0,0] |------------------------------------L1.17-------------------------------------|"
- "L1.18[0,0] |------------------------------------L1.18-------------------------------------|"
- "L1.19[0,0] |------------------------------------L1.19-------------------------------------|"
- "L1.20[0,0] |------------------------------------L1.20-------------------------------------|"
- "L1.21[0,0] |------------------------------------L1.21-------------------------------------|"
- "L1.22[0,0] |------------------------------------L1.22-------------------------------------|"
- "L1.23[0,0] |------------------------------------L1.23-------------------------------------|"
- "L1.24[0,0] |------------------------------------L1.24-------------------------------------|"
- "L1.25[0,0] |------------------------------------L1.25-------------------------------------|"
- "L1.26[0,0] |------------------------------------L1.26-------------------------------------|"
- "L1.27[0,0] |------------------------------------L1.27-------------------------------------|"
- "L1.28[0,0] |------------------------------------L1.28-------------------------------------|"
- "L1.29[0,0] |------------------------------------L1.29-------------------------------------|"
- "L1.30[0,0] |------------------------------------L1.30-------------------------------------|"
- "L1.31[0,0] |------------------------------------L1.31-------------------------------------|"
- "L1.32[0,0] |------------------------------------L1.32-------------------------------------|"
- "L1.33[0,0] |------------------------------------L1.33-------------------------------------|"
- "L1.34[0,0] |------------------------------------L1.34-------------------------------------|"
- "L1.35[0,0] |------------------------------------L1.35-------------------------------------|"
- "L1.36[0,0] |------------------------------------L1.36-------------------------------------|"
- "L1.37[0,0] |------------------------------------L1.37-------------------------------------|"
- "L1.38[0,0] |------------------------------------L1.38-------------------------------------|"
- "L1.39[0,0] |------------------------------------L1.39-------------------------------------|"
- "L1.40[0,0] |------------------------------------L1.40-------------------------------------|"
- "L1.41[0,0] |------------------------------------L1.41-------------------------------------|"
- "L1.42[0,0] |------------------------------------L1.42-------------------------------------|"
- "L1.43[0,0] |------------------------------------L1.43-------------------------------------|"
- "L1.44[0,0] |------------------------------------L1.44-------------------------------------|"
- "L1.45[0,0] |------------------------------------L1.45-------------------------------------|"
- "L1.46[0,0] |------------------------------------L1.46-------------------------------------|"
- "L1.47[0,0] |------------------------------------L1.47-------------------------------------|"
- "L1.48[0,0] |------------------------------------L1.48-------------------------------------|"
- "L1.49[0,0] |------------------------------------L1.49-------------------------------------|"
- "L1.50[0,0] |------------------------------------L1.50-------------------------------------|"
- "L1.51[0,0] |------------------------------------L1.51-------------------------------------|"
- "L1.52[0,0] |------------------------------------L1.52-------------------------------------|"
- "L1.53[0,0] |------------------------------------L1.53-------------------------------------|"
- "L1.54[0,0] |------------------------------------L1.54-------------------------------------|"
- "L1.55[0,0] |------------------------------------L1.55-------------------------------------|"
- "L1.56[0,0] |------------------------------------L1.56-------------------------------------|"
- "L1.57[0,0] |------------------------------------L1.57-------------------------------------|"
- "L1.58[0,0] |------------------------------------L1.58-------------------------------------|"
- "L1.59[0,0] |------------------------------------L1.59-------------------------------------|"
- "L1.60[0,0] |------------------------------------L1.60-------------------------------------|"
"###
);
}
async fn run_compact(setup: &TestSetup) {
let components = hardcoded_components(&setup.config);
run_compact_impl(setup, components).await;
}
async fn run_compact_failing(setup: &TestSetup) {
let components = hardcoded_components(&setup.config);
let components = Arc::new(Components {
df_planner: Arc::new(PanicDataFusionPlanner::new()),
..components.as_ref().clone()
});
run_compact_impl(setup, components).await;
}
async fn run_compact_impl(setup: &TestSetup, components: Arc<Components>) {
let config = Arc::clone(&setup.config);
let job_semaphore = Arc::new(
Arc::new(AsyncSemaphoreMetrics::new(&config.metric_registry, [])).new_semaphore(10),
);
// register scratchpad store
setup
.catalog
.exec()
.new_context(ExecutorType::Reorg)
.inner()
.runtime_env()
.register_object_store(
"iox",
config.parquet_store_scratchpad.id(),
Arc::clone(config.parquet_store_scratchpad.object_store()),
);
compact(
NonZeroUsize::new(10).unwrap(),
Duration::from_secs(3_6000),
job_semaphore,
&components,
)
.await;
}
#[track_caller]
fn assert_levels<'a>(
files: impl IntoIterator<Item = &'a ParquetFile>,
expected_files_and_levels: impl IntoIterator<Item = (i64, CompactionLevel)>,
) {
let files_and_levels: Vec<_> = files
.into_iter()
.map(|f| (f.id.get(), f.compaction_level))
.collect();
let expected_files_and_levels: Vec<_> = expected_files_and_levels.into_iter().collect();
assert_eq!(files_and_levels, expected_files_and_levels);
}
#[track_caller]
/// Asserts each parquet file has (id, max_l0_created_at)
fn assert_max_l0_created_at<'a>(
files: impl IntoIterator<Item = &'a ParquetFile>,
expected_files_and_max_l0_created_ats: impl IntoIterator<Item = (i64, i64)>,
) {
let files_and_max_l0_created_ats: Vec<_> = files
.into_iter()
.map(|f| (f.id.get(), f.max_l0_created_at.get()))
.collect();
let expected_files_and_max_l0_created_ats: Vec<_> =
expected_files_and_max_l0_created_ats.into_iter().collect();
assert_eq!(
files_and_max_l0_created_ats,
expected_files_and_max_l0_created_ats
);
}
async fn assert_skipped_compactions<const N: usize>(
setup: &TestSetup,
expected: [(PartitionId, &'static str); N],
) {
let skipped = setup
.catalog
.catalog
.repositories()
.await
.partitions()
.list_skipped_compactions()
.await
.unwrap();
let actual = skipped
.iter()
.map(|skipped| (skipped.partition_id, skipped.reason.as_str()))
.collect::<Vec<_>>();
assert_eq!(actual, expected);
}

View File

@ -14,7 +14,7 @@ data_types = { path = "../data_types" }
futures = "0.3"
humantime = "2.1.0"
iox_catalog = { path = "../iox_catalog" }
object_store = { version = "0.5.2" }
object_store = { version = "0.5.4" }
observability_deps = { path = "../observability_deps" }
snafu = "0.7"
tokio = { version = "1", features = ["macros", "rt", "sync"] }

View File

@ -13,7 +13,7 @@ futures = "0.3"
generated_types = { path = "../generated_types" }
influxdb_iox_client = { path = "../influxdb_iox_client" }
iox_catalog = { path = "../iox_catalog" }
object_store = { version = "0.5.2", features = ["aws"] }
object_store = { version = "0.5.4", features = ["aws"] }
observability_deps = { path = "../observability_deps" }
schema = { path = "../schema" }
serde = { version = "1.0", features = ["derive"] }

View File

@ -15,7 +15,7 @@ pub struct Dialect {
pub delimiter: Option<String>,
/// <https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#columns>
#[serde(skip_serializing_if = "Option::is_none")]
pub annotations: Option<Annotations>,
pub annotations: Option<Vec<Annotations>>,
/// Character prefixed to comment strings
#[serde(skip_serializing_if = "Option::is_none")]
pub comment_prefix: Option<String>,

View File

@ -3,6 +3,7 @@
use crate::models::ast::Package;
use crate::models::File;
use serde::{Deserialize, Serialize};
use serde_json::Number;
use std::collections::HashMap;
/// Query influx using the Flux language
@ -23,6 +24,21 @@ pub struct Query {
/// Default is the server's now time.
#[serde(skip_serializing_if = "Option::is_none")]
pub now: Option<String>,
/// Params for use in query via params.param_name
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<HashMap<String, Param>>,
}
/// Query Param Enum for Flux
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
#[serde(untagged)]
pub enum Param {
/// A number param
Number(Number),
/// A string param
String(String),
}
impl Query {

View File

@ -30,7 +30,7 @@ ioxd_querier = { path = "../ioxd_querier"}
ioxd_router = { path = "../ioxd_router"}
ioxd_test = { path = "../ioxd_test"}
metric = { path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
object_store_metrics = { path = "../object_store_metrics" }
observability_deps = { path = "../observability_deps" }
panic_logging = { path = "../panic_logging" }

View File

@ -433,6 +433,7 @@ impl Config {
compact_version: CompactorAlgoVersion::AllAtOnce,
min_num_l1_files_to_compact: 1,
process_once: false,
process_all_partitions: false,
};
let querier_config = QuerierConfig {

View File

@ -112,6 +112,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
}));
let time_provider = Arc::new(SystemProvider::new());
let process_once = config.compactor_config.process_once;
let server_type = create_compactor2_server_type(
&common_state,
Arc::clone(&metric_registry),
@ -127,5 +128,14 @@ pub async fn command(config: Config) -> Result<(), Error> {
info!("starting compactor");
let services = vec![Service::create(server_type, common_state.run_config())];
Ok(main::main(common_state, services, metric_registry).await?)
let res = main::main(common_state, services, metric_registry).await;
match res {
Ok(()) => Ok(()),
// compactor2 is allowed to shut itself down
Err(main::Error::Wrapper {
source: _source @ ioxd_common::Error::LostServer,
}) if process_once => Ok(()),
Err(e) => Err(e.into()),
}
}

View File

@ -310,7 +310,7 @@ mod influxql {
TestCase {
input: "cases/in/issue_6112.influxql",
chunk_stage: ChunkStage::All,
chunk_stage: ChunkStage::Ingester,
}
.run()
.await;

View File

@ -5,6 +5,7 @@
-- Single measurement queries
--
-- Validates expected data is returned
-- Projection wildcard, all tags and fields
-- IOX_COMPARE: sorted
SELECT * FROM m0;
@ -29,21 +30,6 @@ SELECT f64, tag0 FROM m0;
-- IOX_COMPARE: sorted
SELECT f64, tag0, time FROM m0;
-- Validate some math functions
-- IOX_COMPARE: sorted
SELECT f64, floor(f64), ceil(f64) FROM m0;
-- Validate all scalar functions
-- -- IOX_COMPARE: sorted
-- TODO(sgc): log expects two arguments
-- TODO(sgc): asin and acos should cast NaN to NULL
-- SELECT f64, abs(f64), sin(f64), cos(f64), tan(f64),
-- asin(f64), acos(f64), atan(f64), atan2(f64, 1),
-- exp(f64), log(f64), ln(f64), log2(f64),
-- log10(f64), sqrt(f64), pow(f64, 2), floor(f64),
-- ceil(f64), round(f64)
-- FROM m0 LIMIT 1;
-- arithmetic operators
-- IOX_COMPARE: sorted
SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
@ -121,3 +107,105 @@ SELECT tag1, f64 FROM m0 WHERE tag1 != '';
-- TODO(sgc): Not working, as expected
-- -- IOX_COMPARE: sorted
-- SELECT tag1, f64 FROM m0 WHERE tag1 = '';
--
-- LIMIT and OFFSET clauses
-- NOTE: these are working, but due to incorrect default ordering
-- some tests fail
--
SELECT tag0, f64 FROM m0 LIMIT 1;
SELECT tag0, f64 FROM m0 WHERE tag0 = 'val00' LIMIT 2 OFFSET 1;
SELECT tag0, f64 FROM m0 LIMIT 1 OFFSET 1;
-- OFFSET clause, no LIMIT clause
-- TODO(sgc): Fails due to a bug in InfluxQL that utilises the following optimisation
-- https://github.com/influxdata/influxdb/blob/dee8977d2c6598cb2d17e9334ea997c99853640a/tsdb/engine/tsm1/iterator.gen.go#L344-L347
-- which breaks after returning the first point after the offset, because itr.opt.Limit == 0
-- SELECT tag0, f64 FROM m0 OFFSET 1;
--
-- Sort ordering
--
-- No GROUP BY clause
-- Default sort: expected output should default to ORDER BY TIME asc
SELECT * FROM m0;
-- Sort time in descending order
SELECT * FROM m0 ORDER BY time DESC;
--
-- Scalar functions in projection
--
-- Validate all scalar functions with a float field
SELECT
f64,
abs(f64 * -1),
sin(f64),
cos(f64),
tan(f64),
asin(1/f64),
acos(1/f64),
atan(f64),
atan2(f64, 2),
exp(f64),
-- TODO(sgc): Dependent on https://github.com/apache/arrow-datafusion/issues/5206
-- log(f64, 8),
ln(f64),
log2(f64),
log10(f64),
sqrt(f64),
pow(f64, 2),
floor(f64),
ceil(f64),
round(f64)
FROM m0 LIMIT 1;
-- Validate all scalar functions with an integer field
SELECT
i64,
abs(i64 * -1),
sin(i64),
cos(i64),
tan(i64),
-- TODO(sgc): Not coerced to float, so returns incorrect result
-- asin(1/i64),
acos(1/i64),
atan(i64),
atan2(i64, 2),
exp(i64),
-- TODO(sgc): Dependent on https://github.com/apache/arrow-datafusion/issues/5206
-- log(i64, 8),
ln(i64),
log2(i64),
log10(i64),
sqrt(i64),
pow(i64, 2),
floor(i64),
ceil(i64),
round(i64)
FROM m0 LIMIT 1;
-- Deviation from InfluxQL is that NaNs are not coalesced to NULL
-- The InfluxQL compatibility later will be responsible for this translation
SELECT f64, asin(f64), acos(f64) FROM m0 LIMIT 1;
-- INF support
SELECT f64, pow(f64, pow(2, 10)) FROM m0 LIMIT 1;
--
-- TZ clause support
--
-- Interpret date/time (%Y-%M-%D %h:%m:%s) in timezone specified by TZ clause
-- TODO(sgc): condition is correct, but `time` column is not display in local timezone
-- as DataFusion does not support timestamp with timezone data types, and displaying
-- the values in the local timezone
---- SELECT f64 FROM m0 WHERE time = '2022-10-31 13:00:00' TZ('Australia/Hobart');
---- SELECT f64 FROM m0 WHERE time = '2022-10-31T13:00:00Z' TZ('Australia/Hobart');

View File

@ -77,19 +77,6 @@
| 19.2 | val00 | 2022-10-31T02:00:30Z |
| 21.2 | val00 | 2022-10-31T02:00:10Z |
+------+-------+----------------------+
-- InfluxQL: SELECT f64, floor(f64), ceil(f64) FROM m0;
-- Results After Sorting
+----------------------+------+-------+------+
| time | f64 | floor | ceil |
+----------------------+------+-------+------+
| 2022-10-31T02:00:00Z | 10.1 | 10 | 11 |
| 2022-10-31T02:00:00Z | 10.4 | 10 | 11 |
| 2022-10-31T02:00:00Z | 11.3 | 11 | 12 |
| 2022-10-31T02:00:10Z | 18.9 | 18 | 19 |
| 2022-10-31T02:00:10Z | 21.2 | 21 | 22 |
| 2022-10-31T02:00:20Z | 11.2 | 11 | 12 |
| 2022-10-31T02:00:30Z | 19.2 | 19 | 20 |
+----------------------+------+-------+------+
-- InfluxQL: SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
-- Results After Sorting
+----------------------+------+-------+-----+---------+
@ -228,3 +215,70 @@
+----------------------+-------+------+
| 2022-10-31T02:00:10Z | val10 | 18.9 |
+----------------------+-------+------+
-- InfluxQL: SELECT tag0, f64 FROM m0 LIMIT 1;
+----------------------+-------+------+
| time | tag0 | f64 |
+----------------------+-------+------+
| 2022-10-31T02:00:00Z | val00 | 10.1 |
+----------------------+-------+------+
-- InfluxQL: SELECT tag0, f64 FROM m0 WHERE tag0 = 'val00' LIMIT 2 OFFSET 1;
+----------------------+-------+------+
| time | tag0 | f64 |
+----------------------+-------+------+
| 2022-10-31T02:00:10Z | val00 | 21.2 |
| 2022-10-31T02:00:10Z | val00 | 18.9 |
+----------------------+-------+------+
-- InfluxQL: SELECT tag0, f64 FROM m0 LIMIT 1 OFFSET 1;
+----------------------+-------+------+
| time | tag0 | f64 |
+----------------------+-------+------+
| 2022-10-31T02:00:00Z | val01 | 11.3 |
+----------------------+-------+------+
-- InfluxQL: SELECT * FROM m0;
+----------------------+------+-----+-----+-------+-------+
| time | f64 | i64 | str | tag0 | tag1 |
+----------------------+------+-----+-----+-------+-------+
| 2022-10-31T02:00:00Z | 10.1 | 101 | hi | val00 | |
| 2022-10-31T02:00:00Z | 11.3 | 211 | lo | val01 | |
| 2022-10-31T02:00:00Z | 10.4 | 101 | lo | val02 | |
| 2022-10-31T02:00:10Z | 21.2 | 211 | hi | val00 | |
| 2022-10-31T02:00:10Z | 18.9 | 211 | lo | val00 | val10 |
| 2022-10-31T02:00:20Z | 11.2 | 191 | lo | val00 | |
| 2022-10-31T02:00:30Z | 19.2 | 392 | lo | val00 | |
+----------------------+------+-----+-----+-------+-------+
-- InfluxQL: SELECT * FROM m0 ORDER BY time DESC;
+----------------------+------+-----+-----+-------+-------+
| time | f64 | i64 | str | tag0 | tag1 |
+----------------------+------+-----+-----+-------+-------+
| 2022-10-31T02:00:30Z | 19.2 | 392 | lo | val00 | |
| 2022-10-31T02:00:20Z | 11.2 | 191 | lo | val00 | |
| 2022-10-31T02:00:10Z | 21.2 | 211 | hi | val00 | |
| 2022-10-31T02:00:10Z | 18.9 | 211 | lo | val00 | val10 |
| 2022-10-31T02:00:00Z | 10.1 | 101 | hi | val00 | |
| 2022-10-31T02:00:00Z | 11.3 | 211 | lo | val01 | |
| 2022-10-31T02:00:00Z | 10.4 | 101 | lo | val02 | |
+----------------------+------+-----+-----+-------+-------+
-- InfluxQL: SELECT f64, abs(f64 * -1), sin(f64), cos(f64), tan(f64), asin(1/f64), acos(1/f64), atan(f64), atan2(f64, 2), exp(f64), ln(f64), log2(f64), log10(f64), sqrt(f64), pow(f64, 2), floor(f64), ceil(f64), round(f64) FROM m0 LIMIT 1;
+----------------------+------+------+---------------------+---------------------+--------------------+--------------------+-------------------+------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+-------+------+-------+
| time | f64 | abs | sin | cos | tan | asin | acos | atan | atan2 | exp | ln | log2 | log10 | sqrt | pow | floor | ceil | round |
+----------------------+------+------+---------------------+---------------------+--------------------+--------------------+-------------------+------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+-------+------+-------+
| 2022-10-31T02:00:00Z | 10.1 | 10.1 | -0.6250706488928821 | -0.7805681801691837 | 0.8007893029375109 | 0.0991723838059207 | 1.471623942988976 | 1.47210806614649 | 1.3753055265462157 | 24343.00942440838 | 2.312535423847214 | 3.3362833878644325 | 1.0043213737826426 | 3.1780497164141406 | 102.00999999999999 | 10 | 11 | 10 |
+----------------------+------+------+---------------------+---------------------+--------------------+--------------------+-------------------+------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+-------+------+-------+
-- InfluxQL: SELECT i64, abs(i64 * -1), sin(i64), cos(i64), tan(i64), acos(1/i64), atan(i64), atan2(i64, 2), exp(i64), ln(i64), log2(i64), log10(i64), sqrt(i64), pow(i64, 2), floor(i64), ceil(i64), round(i64) FROM m0 LIMIT 1;
+----------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+-------------------+-----------+----------------------------------------------+------------------+-------------------+--------------------+-------------------+-------+-------+------+-------+
| time | i64 | abs | sin | cos | tan | acos | atan | atan2 | exp | ln | log2 | log10 | sqrt | pow | floor | ceil | round |
+----------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+-------------------+-----------+----------------------------------------------+------------------+-------------------+--------------------+-------------------+-------+-------+------+-------+
| 2022-10-31T02:00:00Z | 101 | 101 | 0.45202578717835057 | 0.8920048697881602 | 0.5067526002248183 | 1.5707963267948966 | 1.560895660206908 | 1.5509969 | 73070599793680670000000000000000000000000000 | 4.61512051684126 | 6.658211482751795 | 2.0043213737826426 | 10.04987562112089 | 10201 | 101 | 101 | 101 |
+----------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+-------------------+-----------+----------------------------------------------+------------------+-------------------+--------------------+-------------------+-------+-------+------+-------+
-- InfluxQL: SELECT f64, asin(f64), acos(f64) FROM m0 LIMIT 1;
+----------------------+------+------+------+
| time | f64 | asin | acos |
+----------------------+------+------+------+
| 2022-10-31T02:00:00Z | 10.1 | NaN | NaN |
+----------------------+------+------+------+
-- InfluxQL: SELECT f64, pow(f64, pow(2, 10)) FROM m0 LIMIT 1;
+----------------------+------+-----+
| time | f64 | pow |
+----------------------+------+-----+
| 2022-10-31T02:00:00Z | 10.1 | inf |
+----------------------+------+-----+

View File

@ -28,7 +28,7 @@ iox_time = { path = "../iox_time" }
metric = { path = "../metric" }
mutable_batch = { path = "../mutable_batch"}
mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
once_cell = "1"
parking_lot = "0.12"

View File

@ -28,7 +28,7 @@ iox_time = { path = "../iox_time" }
metric = { version = "0.1.0", path = "../metric" }
mutable_batch = { version = "0.1.0", path = "../mutable_batch" }
mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { version = "0.1.0", path = "../observability_deps" }
once_cell = "1.17"
parking_lot = "0.12.1"

View File

@ -459,6 +459,9 @@ pub trait PartitionRepo: Send + Sync {
/// return the partitions by table id
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
/// return all partitions IDs
async fn list_ids(&mut self) -> Result<Vec<PartitionId>>;
/// Update the sort key for the partition, setting it to `new_sort_key` iff
/// the current value matches `old_sort_key`.
///
@ -955,6 +958,7 @@ pub(crate) mod test_helpers {
};
use metric::{Attributes, DurationHistogram, Metric};
use std::{
collections::BTreeSet,
ops::{Add, DerefMut},
sync::Arc,
time::Duration,
@ -1645,6 +1649,16 @@ pub(crate) mod test_helpers {
created.insert(other_partition.id, other_partition.clone());
assert_eq!(created, listed);
let listed = repos
.partitions()
.list_ids()
.await
.expect("failed to list partitions")
.into_iter()
.collect::<BTreeSet<_>>();
assert_eq!(created.keys().copied().collect::<BTreeSet<_>>(), listed);
// test list_by_namespace
let namespace2 = repos
.namespaces()

View File

@ -873,6 +873,14 @@ impl PartitionRepo for MemTxn {
Ok(partitions)
}
async fn list_ids(&mut self) -> Result<Vec<PartitionId>> {
let stage = self.stage();
let partitions: Vec<_> = stage.partitions.iter().map(|p| p.id).collect();
Ok(partitions)
}
async fn cas_sort_key(
&mut self,
partition_id: PartitionId,

View File

@ -246,6 +246,7 @@ decorate!(
"partition_list_by_shard" = list_by_shard(&mut self, shard_id: ShardId) -> Result<Vec<Partition>>;
"partition_list_by_namespace" = list_by_namespace(&mut self, namespace_id: NamespaceId) -> Result<Vec<Partition>>;
"partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
"partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>;
"partition_update_sort_key" = cas_sort_key(&mut self, partition_id: PartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>;
"partition_record_skipped_compaction" = record_skipped_compaction(&mut self, partition_id: PartitionId, reason: &str, num_files: usize, limit_num_files: usize, limit_num_files_first_in_partition: usize, estimated_bytes: u64, limit_bytes: u64) -> Result<()>;
"partition_list_skipped_compactions" = list_skipped_compactions(&mut self) -> Result<Vec<SkippedCompaction>>;

View File

@ -1287,6 +1287,18 @@ WHERE table_id = $1;
.map_err(|e| Error::SqlxError { source: e })
}
async fn list_ids(&mut self) -> Result<Vec<PartitionId>> {
sqlx::query_as(
r#"
SELECT p.id as partition_id
FROM partition p
"#,
)
.fetch_all(&mut self.inner)
.await
.map_err(|e| Error::SqlxError { source: e })
}
/// Update the sort key for `partition_id` if and only if `old_sort_key`
/// matches the current value in the database.
///

View File

@ -1114,6 +1114,18 @@ WHERE table_id = $1;
.collect())
}
async fn list_ids(&mut self) -> Result<Vec<PartitionId>> {
sqlx::query_as(
r#"
SELECT p.id as partition_id
FROM partition p
"#,
)
.fetch_all(self.inner.get_mut())
.await
.map_err(|e| Error::SqlxError { source: e })
}
/// Update the sort key for `partition_id` if and only if `old_sort_key`
/// matches the current value in the database.
///

View File

@ -27,7 +27,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.92"
snafu = "0.7"
tokio = { version = "1.25", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
toml = "0.7.1"
toml = "0.7.2"
tracing = "0.1"
tracing-subscriber = "0.3"
uuid = { version = "1", default_features = false }

View File

@ -29,7 +29,7 @@ hashbrown = { workspace = true }
influxdb_influxql_parser = { path = "../influxdb_influxql_parser" }
itertools = "0.10.5"
once_cell = "1"
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
parking_lot = "0.12"
parquet_file = { path = "../parquet_file" }

View File

@ -464,7 +464,6 @@ mod test {
physical_plan::displayable,
prelude::{col, lit, lit_timestamp_nano},
scalar::ScalarValue,
sql::TableReference,
};
fn schema() -> Schema {
@ -546,10 +545,9 @@ mod test {
async fn assert_explain(sql: &str, expected: &str) -> Result<()> {
let executor = Executor::new_testing();
let context = executor.new_context(ExecutorType::Query);
context.inner().register_table(
TableReference::Bare { table: "temps" },
Arc::new(EmptyTable::new(Arc::new(schema()))),
)?;
context
.inner()
.register_table("temps", Arc::new(EmptyTable::new(Arc::new(schema()))))?;
let physical_plan = context.prepare_sql(sql).await?;
let actual_plan = displayable(physical_plan.as_ref()).indent().to_string();
let actual_iter = actual_plan.split('\n');

View File

@ -155,9 +155,7 @@ mod tests {
error::Result as ArrowResult,
record_batch::RecordBatch,
};
use datafusion::{
datasource::empty::EmptyTable, error::Result, from_slice::FromSlice, sql::TableReference,
};
use datafusion::{datasource::empty::EmptyTable, error::Result, from_slice::FromSlice};
use crate::exec::{gapfill::GapFillExec, Executor, ExecutorType};
@ -193,10 +191,9 @@ mod tests {
async fn plan_statement_and_get_params(sql: &str) -> Result<GapFillParams> {
let executor = Executor::new_testing();
let context = executor.new_context(ExecutorType::Query);
context.inner().register_table(
TableReference::Bare { table: "t" },
Arc::new(EmptyTable::new(Arc::new(schema()))),
)?;
context
.inner()
.register_table("t", Arc::new(EmptyTable::new(Arc::new(schema()))))?;
let physical_plan = context.prepare_sql(sql).await?;
let gapfill_node = &physical_plan.children()[0];
let gapfill_node = gapfill_node.as_any().downcast_ref::<GapFillExec>().unwrap();

View File

@ -3,16 +3,20 @@ use crate::plan::influxql::rewriter::rewrite_statement;
use crate::plan::influxql::util::binary_operator_to_df_operator;
use crate::{DataFusionError, IOxSessionContext, QueryNamespace};
use arrow::datatypes::DataType;
use datafusion::common::{DFSchema, DFSchemaRef, Result, ScalarValue};
use datafusion::common::{DFSchema, DFSchemaRef, Result, ScalarValue, ToDFSchema};
use datafusion::datasource::provider_as_source;
use datafusion::logical_expr::expr::Sort;
use datafusion::logical_expr::expr_rewriter::{normalize_col, ExprRewritable, ExprRewriter};
use datafusion::logical_expr::logical_plan::builder::project;
use datafusion::logical_expr::logical_plan::Analyze;
use datafusion::logical_expr::{
lit, BinaryExpr, BuiltinScalarFunction, Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder,
Operator,
lit, BinaryExpr, BuiltinScalarFunction, Explain, Expr, ExprSchemable, LogicalPlan,
LogicalPlanBuilder, Operator, PlanType, ToStringifiedPlan,
};
use datafusion::prelude::Column;
use datafusion::sql::TableReference;
use influxdb_influxql_parser::common::OrderByClause;
use influxdb_influxql_parser::explain::{ExplainOption, ExplainStatement};
use influxdb_influxql_parser::expression::{
BinaryOperator, ConditionalExpression, ConditionalOperator, VarRefDataType,
};
@ -29,6 +33,7 @@ use once_cell::sync::Lazy;
use query_functions::clean_non_meta_escapes;
use schema::{InfluxColumnType, InfluxFieldType, Schema};
use std::collections::HashSet;
use std::iter;
use std::ops::Deref;
use std::str::FromStr;
use std::sync::Arc;
@ -70,11 +75,8 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
Statement::DropMeasurement(_) => {
Err(DataFusionError::NotImplemented("DROP MEASUREMENT".into()))
}
Statement::Explain(_) => Err(DataFusionError::NotImplemented("EXPLAIN".into())),
Statement::Select(select) => {
let select = rewrite_statement(self.database.as_meta(), &select)?;
self.select_statement_to_plan(select).await
}
Statement::Explain(explain) => self.explain_statement_to_plan(*explain).await,
Statement::Select(select) => self.select_statement_to_plan(*select).await,
Statement::ShowDatabases(_) => {
Err(DataFusionError::NotImplemented("SHOW DATABASES".into()))
}
@ -96,8 +98,41 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
}
}
async fn explain_statement_to_plan(&self, explain: ExplainStatement) -> Result<LogicalPlan> {
let plan = self.select_statement_to_plan(*explain.select).await?;
let plan = Arc::new(plan);
let schema = LogicalPlan::explain_schema();
let schema = schema.to_dfschema_ref()?;
let (analyze, verbose) = match explain.options {
Some(ExplainOption::AnalyzeVerbose) => (true, true),
Some(ExplainOption::Analyze) => (true, false),
Some(ExplainOption::Verbose) => (false, true),
None => (false, false),
};
if analyze {
Ok(LogicalPlan::Analyze(Analyze {
verbose,
input: plan,
schema,
}))
} else {
let stringified_plans = vec![plan.to_stringified(PlanType::InitialLogicalPlan)];
Ok(LogicalPlan::Explain(Explain {
verbose,
plan,
stringified_plans,
schema,
logical_optimization_succeeded: false,
}))
}
}
/// Create a [`LogicalPlan`] from the specified InfluxQL `SELECT` statement.
async fn select_statement_to_plan(&self, select: SelectStatement) -> Result<LogicalPlan> {
let select = rewrite_statement(self.database.as_meta(), &select)?;
// Process FROM clause
let plans = self.plan_from_tables(select.from).await?;
@ -114,6 +149,27 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
let tz = select.timezone.map(|tz| *tz);
let plan = self.plan_where_clause(select.condition, plan, tz)?;
let plan = if select.group_by.is_none() {
LogicalPlanBuilder::from(plan)
.sort(iter::once(Expr::Sort(Sort {
expr: Box::new(Expr::Column(Column {
relation: None,
name: "time".to_string(),
})),
asc: match select.order_by {
// Default behaviour is to sort by time in ascending order if there is no ORDER BY
None | Some(OrderByClause::Ascending) => true,
Some(OrderByClause::Descending) => false,
},
nulls_first: false,
})))?
.build()
} else {
Err(DataFusionError::NotImplemented(
"GROUP BY not supported".into(),
))
}?;
// Process and validate the field expressions in the SELECT projection list
let select_exprs = self.field_list_to_exprs(&plan, select.fields)?;
@ -647,7 +703,6 @@ mod test {
assert_snapshot!(plan("CREATE DATABASE foo").await);
assert_snapshot!(plan("DELETE FROM foo").await);
assert_snapshot!(plan("DROP MEASUREMENT foo").await);
assert_snapshot!(plan("EXPLAIN SELECT bar FROM foo").await);
assert_snapshot!(plan("SHOW DATABASES").await);
assert_snapshot!(plan("SHOW MEASUREMENTS").await);
assert_snapshot!(plan("SHOW RETENTION POLICIES").await);
@ -702,6 +757,14 @@ mod test {
plan("SELECT foo, f64_field FROM data where non_existent !~ /f/").await
);
}
#[tokio::test]
async fn test_explain() {
assert_snapshot!(plan("EXPLAIN SELECT foo, f64_field FROM data").await);
assert_snapshot!(plan("EXPLAIN VERBOSE SELECT foo, f64_field FROM data").await);
assert_snapshot!(plan("EXPLAIN ANALYZE SELECT foo, f64_field FROM data").await);
assert_snapshot!(plan("EXPLAIN ANALYZE VERBOSE SELECT foo, f64_field FROM data").await);
}
}
/// Tests to validate InfluxQL `SELECT` statements that project columns without specifying

View File

@ -625,6 +625,22 @@ mod test {
"SELECT usage_idle::float AS usage_idle FROM cpu GROUP BY host, region"
);
// Does not include tags in projection when expanded in GROUP BY
let stmt = parse_select("SELECT * FROM cpu GROUP BY *");
let stmt = rewrite_statement(&namespace, &stmt).unwrap();
assert_eq!(
stmt.to_string(),
"SELECT usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY host, region"
);
// Does include explicitly listed tags in projection
let stmt = parse_select("SELECT host, * FROM cpu GROUP BY *");
let stmt = rewrite_statement(&namespace, &stmt).unwrap();
assert_eq!(
stmt.to_string(),
"SELECT host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY host, region"
);
// Fallible
// Invalid regex

View File

@ -0,0 +1,8 @@
---
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"EXPLAIN VERBOSE SELECT foo, f64_field FROM data\").await"
---
Explain [plan_type:Utf8, plan:Utf8]
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -0,0 +1,8 @@
---
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"EXPLAIN ANALYZE SELECT foo, f64_field FROM data\").await"
---
Analyze [plan_type:Utf8, plan:Utf8]
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -0,0 +1,8 @@
---
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"EXPLAIN ANALYZE VERBOSE SELECT foo, f64_field FROM data\").await"
---
Analyze [plan_type:Utf8, plan:Utf8]
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -0,0 +1,8 @@
---
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"EXPLAIN SELECT foo, f64_field FROM data\").await"
---
Explain [plan_type:Utf8, plan:Utf8]
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where non_existent !~ /f/\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where time > '2004-04-09T02:33:45Z'\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: data.time > TimestampNanosecond(1081478025000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time > TimestampNanosecond(1081478025000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where now() - 10s < time\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: now() - IntervalMonthDayNano("10000000000") < data.time [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: now() - IntervalMonthDayNano("10000000000") < data.time [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where foo =~ /f/\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: CAST(data.foo AS Utf8) ~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: CAST(data.foo AS Utf8) ~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where f64_field =~ /f/\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where non_existent =~ /f/\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where foo !~ /f/\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: CAST(data.foo AS Utf8) !~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: CAST(data.foo AS Utf8) !~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where f64_field !~ /f/\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -3,5 +3,6 @@ source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data where time > now() - 10s\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
Filter: data.time > now() - IntervalMonthDayNano("10000000000") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time > now() - IntervalMonthDayNano("10000000000") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT foo, sin(f64_field) FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, sin(f64_field) FROM data\").await"
---
Projection: data.time, data.foo AS foo, sin(data.f64_field) AS sin [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, sin:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT foo, atan2(f64_field, 2) FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, atan2(f64_field, 2) FROM data\").await"
---
Projection: data.time, data.foo AS foo, atan2(data.f64_field, Int64(2)) AS atan2 [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, atan2:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT foo, f64_field + 0.5 FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field + 0.5 FROM data\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field + Float64(0.5) AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT foo, f64_field + f64_field FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field + f64_field FROM data\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field + data.f64_field AS f64_field_f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field_f64_field:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT time, f64_field FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT time, f64_field FROM data\").await"
---
Projection: data.time AS time, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), f64_field:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT time as timestamp, f64_field FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT time as timestamp, f64_field FROM data\").await"
---
Projection: data.time AS timestamp, data.f64_field AS f64_field [timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT foo, f64_field FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field FROM data\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT foo, f64_field, i64_field FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT foo, f64_field, i64_field FROM data\").await"
---
Projection: data.time, data.foo AS foo, data.f64_field AS f64_field, data.i64_field AS i64_field [time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N, i64_field:Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT /^f/ FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT /^f/ FROM data\").await"
---
Projection: data.time, data.f64_field AS f64_field, data.foo AS foo [time:Timestamp(Nanosecond, None), f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT * FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT * FROM data\").await"
---
Projection: data.time, data.TIME AS TIME, data.bar AS bar, data.bool_field AS bool_field, data.f64_field AS f64_field, data.foo AS foo, data.i64_field AS i64_field, data.mixedCase AS mixedCase, data.str_field AS str_field, data.with space AS with space [time:Timestamp(Nanosecond, None), TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT TIME FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT TIME FROM data\").await"
---
Projection: data.time, data.TIME AS TIME [time:Timestamp(Nanosecond, None), TIME:Boolean;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,6 +1,7 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SELECT f64_field FROM data\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SELECT f64_field FROM data\").await"
---
Projection: data.time, data.f64_field AS f64_field [time:Timestamp(Nanosecond, None), f64_field:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Sort: data.time ASC NULLS LAST [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

View File

@ -1,5 +0,0 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SHOW FIELD KEYS\")"
---
This feature is not implemented: SHOW FIELD KEYS

View File

@ -1,5 +1,5 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"EXPLAIN SELECT bar FROM foo\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SHOW DATABASES\").await"
---
This feature is not implemented: EXPLAIN
This feature is not implemented: SHOW DATABASES

View File

@ -1,5 +1,5 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SHOW DATABASES\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SHOW MEASUREMENTS\").await"
---
This feature is not implemented: SHOW DATABASES
This feature is not implemented: SHOW MEASUREMENTS

View File

@ -1,5 +1,5 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SHOW MEASUREMENTS\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SHOW RETENTION POLICIES\").await"
---
This feature is not implemented: SHOW MEASUREMENTS
This feature is not implemented: SHOW RETENTION POLICIES

View File

@ -1,5 +1,5 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SHOW RETENTION POLICIES\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SHOW TAG KEYS\").await"
---
This feature is not implemented: SHOW RETENTION POLICIES
This feature is not implemented: SHOW TAG KEYS

View File

@ -1,5 +1,5 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SHOW TAG KEYS\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SHOW TAG VALUES WITH KEY = bar\").await"
---
This feature is not implemented: SHOW TAG KEYS
This feature is not implemented: SHOW TAG VALUES

View File

@ -1,5 +1,5 @@
---
source: iox_query/src/plan/influxql.rs
expression: "plan(\"SHOW TAG VALUES WITH KEY = bar\")"
source: iox_query/src/plan/influxql/planner.rs
expression: "plan(\"SHOW FIELD KEYS\").await"
---
This feature is not implemented: SHOW TAG VALUES
This feature is not implemented: SHOW FIELD KEYS

View File

@ -16,7 +16,7 @@ iox_catalog = { path = "../iox_catalog" }
iox_time = { path = "../iox_time" }
metric = { path = "../metric" }
mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
once_cell = { version = "1.17", features = ["parking_lot"] }
parquet_file = { path = "../parquet_file" }

View File

@ -15,7 +15,7 @@ iox_catalog = { path = "../iox_catalog" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
iox_query = { path = "../iox_query" }
object_store = "0.5.2"
object_store = "0.5.4"
iox_time = { path = "../iox_time" }
trace = { path = "../trace" }

View File

@ -3,7 +3,7 @@ use backoff::BackoffConfig;
use clap_blocks::compactor2::{Compactor2Config, CompactorAlgoVersion};
use compactor2::{
compactor::Compactor2,
config::{AlgoVersion, Config, ShardConfig},
config::{AlgoVersion, Config, PartitionsSourceConfig, ShardConfig},
};
use data_types::{PartitionId, TRANSITION_SHARD_NUMBER};
use hyper::{Body, Request, Response};
@ -163,6 +163,20 @@ pub async fn create_compactor2_server_type(
CompactorAlgoVersion::TargetLevel => AlgoVersion::TargetLevel,
};
let partitions_source = match (
compactor_config.partition_filter,
compactor_config.process_all_partitions,
) {
(None, false) => PartitionsSourceConfig::CatalogRecentWrites,
(None, true) => PartitionsSourceConfig::CatalogAll,
(Some(ids), false) => {
PartitionsSourceConfig::Fixed(ids.into_iter().map(PartitionId::new).collect())
}
(Some(_), true) => panic!(
"provided partition ID filter and specific 'process all', this does not make sense"
),
};
let compactor = Compactor2::start(Config {
shard_id,
metric_registry: Arc::clone(&metric_registry),
@ -183,9 +197,7 @@ pub async fn create_compactor2_server_type(
percentage_max_file_size: compactor_config.percentage_max_file_size,
split_percentage: compactor_config.split_percentage,
partition_timeout: Duration::from_secs(compactor_config.partition_timeout_secs),
partition_filter: compactor_config
.partition_filter
.map(|parts| parts.into_iter().map(PartitionId::new).collect()),
partitions_source,
shadow_mode: compactor_config.shadow_mode,
ignore_partition_skip_marker: compactor_config.ignore_partition_skip_marker,
max_input_files_per_partition: compactor_config.max_input_files_per_partition,
@ -195,6 +207,8 @@ pub async fn create_compactor2_server_type(
compact_version,
min_num_l1_files_to_compact: compactor_config.min_num_l1_files_to_compact,
process_once: compactor_config.process_once,
simulate_without_object_store: false,
all_errors_are_fatal: false,
});
Arc::new(Compactor2ServerType::new(

View File

@ -13,7 +13,7 @@ ingester = { path = "../ingester" }
iox_catalog = { path = "../iox_catalog" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
iox_query = { path = "../iox_query" }
trace = { path = "../trace" }
write_buffer = { path = "../write_buffer" }

View File

@ -13,7 +13,7 @@ generated_types = { path = "../generated_types" }
iox_catalog = { path = "../iox_catalog" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
querier = { path = "../querier" }
iox_query = { path = "../iox_query" }
router = { path = "../router" }

View File

@ -13,7 +13,7 @@ iox_catalog = { path = "../iox_catalog" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
mutable_batch = { path = "../mutable_batch" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
router = { path = "../router" }
sharder = { path = "../sharder" }

View File

@ -11,7 +11,7 @@ bytes = "1.4"
futures = "0.3"
iox_time = { version = "0.1.0", path = "../iox_time" }
metric = { version = "0.1.0", path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
pin-project = "1.0.12"
tokio = { version = "1.25", features = ["io-util"] }
workspace-hack = { path = "../workspace-hack" }

View File

@ -15,7 +15,7 @@ datafusion_util = { path = "../datafusion_util" }
futures = "0.3"
generated_types = { path = "../generated_types" }
iox_time = { path = "../iox_time" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
parking_lot = "0.12"
parquet = { workspace = true, features = ["experimental"]}

View File

@ -11,7 +11,7 @@ datafusion_util = { path = "../datafusion_util" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
futures = {version = "0.3"}
num_cpus = "1.15.0"
object_store = { version = "0.5.2" }
object_store = { version = "0.5.4" }
parquet_file = { path = "../parquet_file" }
schema = { path = "../schema" }
tokio = "1.25"

View File

@ -23,7 +23,7 @@ iox_catalog = { path = "../iox_catalog" }
iox_query = { path = "../iox_query" }
iox_time = { path = "../iox_time" }
metric = { path = "../metric" }
object_store = "0.5.2"
object_store = "0.5.4"
observability_deps = { path = "../observability_deps" }
parking_lot = "0.12"
parquet_file = { path = "../parquet_file" }

Some files were not shown because too many files have changed in this diff Show More