Merge branch 'main' into dom/wal-refs

pull/24376/head
Dom 2023-03-15 14:24:30 +00:00 committed by GitHub
commit 98daccc463
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 5962 additions and 1750 deletions

50
Cargo.lock generated
View File

@ -1407,8 +1407,8 @@ dependencies = [
[[package]]
name = "datafusion"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1437,7 +1437,6 @@ dependencies = [
"object_store",
"parking_lot 0.12.1",
"parquet",
"paste",
"percent-encoding",
"pin-project-lite",
"rand",
@ -1455,8 +1454,8 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"arrow",
"chrono",
@ -1468,8 +1467,8 @@ dependencies = [
[[package]]
name = "datafusion-execution"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"dashmap",
"datafusion-common",
@ -1485,20 +1484,19 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"ahash 0.8.3",
"arrow",
"datafusion-common",
"log",
"sqlparser",
]
[[package]]
name = "datafusion-optimizer"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"arrow",
"async-trait",
@ -1514,8 +1512,8 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1533,7 +1531,6 @@ dependencies = [
"itertools",
"lazy_static",
"md-5",
"num-traits",
"paste",
"petgraph",
"rand",
@ -1545,8 +1542,8 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"arrow",
"chrono",
@ -1554,7 +1551,6 @@ dependencies = [
"datafusion-common",
"datafusion-expr",
"object_store",
"parking_lot 0.12.1",
"pbjson-build",
"prost",
"prost-build",
@ -1562,8 +1558,8 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"arrow",
"datafusion-common",
@ -1573,8 +1569,8 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=8c34ca4fa34787b137b48ce4f6ffd41b64a1a633#8c34ca4fa34787b137b48ce4f6ffd41b64a1a633"
version = "20.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=612eb1d0ce338af7980fa906df8796eb47c4be44#612eb1d0ce338af7980fa906df8796eb47c4be44"
dependencies = [
"arrow-schema",
"datafusion-common",
@ -5008,18 +5004,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
[[package]]
name = "serde"
version = "1.0.155"
version = "1.0.156"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8"
checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.155"
version = "1.0.156"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630"
checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d"
dependencies = [
"proc-macro2",
"quote",
@ -6690,6 +6686,8 @@ dependencies = [
"crossbeam-utils",
"crypto-common",
"datafusion",
"datafusion-optimizer",
"datafusion-physical-expr",
"digest",
"either",
"fixedbitset",

View File

@ -118,8 +118,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "34.0.0" }
arrow-flight = { version = "34.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="8c34ca4fa34787b137b48ce4f6ffd41b64a1a633", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="8c34ca4fa34787b137b48ce4f6ffd41b64a1a633" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="612eb1d0ce338af7980fa906df8796eb47c4be44", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="612eb1d0ce338af7980fa906df8796eb47c4be44" }
hashbrown = { version = "0.13.2" }
parquet = { version = "34.0.0" }

View File

@ -201,25 +201,6 @@ pub struct Compactor2Config {
)]
pub max_num_files_per_plan: usize,
/// Maximum input bytes (in parquet) per compaction that the
/// compactor will attempt to compact in any one plan.
///
/// In the worst case, if the sum of the sizes of all parquet
/// files in a partition is greater than this value, the compactor
/// may not try to compact this partition. Under normal operation,
/// the compactor compacts a subset of files in a partition but in
/// some cases it may need to compact them all.
///
/// This setting is a self protection mechanism, and it is
/// expected to be removed in future versions
#[clap(
long = "compaction-max-compact-size",
env = "INFLUXDB_IOX_COMPACTION_MAX_COMPACT_SIZE",
default_value = "314572800", // 300MB
action
)]
pub max_compact_size: usize,
/// Number of shards.
///
/// If this is set then the shard ID MUST also be set. If both are not provided, sharding is disabled.

View File

@ -156,10 +156,6 @@ where
.apply(partition_info, files_to_compact, target_level);
files_to_keep.extend(other_files);
// Target level of split files is the same level of the input files all of which are in the same level,
// while target level of compact files is the value of the target_level which is the higested level of the input files
let target_level = files_to_compact_or_split.target_level(target_level);
FileClassification {
target_level,
files_to_compact_or_split,

View File

@ -139,7 +139,7 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
let partition_large_size_tiny_time_range_filter: Vec<Arc<dyn PartitionFilter>> =
vec![Arc::new(UnableToCompactPartitionFilter::new(
config.max_compact_size,
config.max_compact_size_bytes(),
))];
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.shadow_mode {
@ -281,7 +281,10 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
Arc::clone(&config.catalog),
)),
round_info_source: Arc::new(LoggingRoundInfoWrapper::new(Arc::new(
LevelBasedRoundInfo::new(config.max_num_files_per_plan, config.max_compact_size),
LevelBasedRoundInfo::new(
config.max_num_files_per_plan,
config.max_compact_size_bytes(),
),
))),
partition_filter: Arc::new(LoggingPartitionFilterWrapper::new(
MetricsPartitionFilterWrapper::new(
@ -318,7 +321,10 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
NonOverlapSplit::new(),
UpgradeSplit::new(config.max_desired_file_size_bytes),
LoggingSplitOrCompactWrapper::new(MetricsSplitOrCompactWrapper::new(
SplitCompact::new(config.max_compact_size),
SplitCompact::new(
config.max_compact_size_bytes(),
config.max_desired_file_size_bytes,
),
&config.metric_registry,
)),
),

View File

@ -64,7 +64,7 @@ impl V1IRPlanner {
// 7 = 1 (min_time) + 6 (time range)
// 13 = 7 (previous time) + 6 (time range)
// 19 = 13 (previous time) + 6 (time range)
fn compute_split_time(
pub fn compute_split_time(
chunk_times: Vec<TimestampMinMax>,
min_time: i64,
max_time: i64,
@ -330,7 +330,7 @@ mod tests {
// Imagine a customer is backfilling a large amount of data and for some reason, all the
// times on the data are exactly the same. That means the min_time and max_time will be the
// same, but the total_size will be greater than the desired size.
// We will not split it becasue the split has to stick to non-overlapped time range
// We will not split it because the split has to stick to non-overlapped time range
let min_time = 1;
let max_time = 1;

View File

@ -12,8 +12,8 @@ use super::PartitionFilter;
/// The idea for doing this:
/// 1. Not to compact large input size to avoid hitting OOM/crash.
/// 2. Not to compact too-large input size that lead to unecessary split into many files.
/// - Becasue we limit the size of a file. If the compacted result is too large, we will split them into many files.
/// - Becasue Level-1 files do not overlap, it is a waste to compact too-large size and then split.
/// - Because we limit the size of a file. If the compacted result is too large, we will split them into many files.
/// - Because Level-1 files do not overlap, it is a waste to compact too-large size and then split.
#[derive(Debug)]
pub struct GreaterSizeMatchingFilesPartitionFilter<T>
where

View File

@ -30,7 +30,6 @@ pub fn log_config(config: &Config) {
partitions_source,
shadow_mode,
ignore_partition_skip_marker,
max_compact_size,
shard_config,
min_num_l1_files_to_compact,
process_once,
@ -77,7 +76,6 @@ pub fn log_config(config: &Config) {
%partitions_source,
shadow_mode,
ignore_partition_skip_marker,
max_compact_size,
?shard_cfg_n_shards,
?shard_cfg_shard_id,
min_num_l1_files_to_compact,

View File

@ -5,9 +5,13 @@ use crate::components::{
files_split::{target_level_split::TargetLevelSplit, FilesSplit},
};
/// Return (`[files_to_compact]`, `[files_to_keep]`) of given files
/// such that `files_to_compact` are files to compact that under max_compact_size limit
/// and `files_to_keep` are the rest of the files that will be considered to compact in next round
/// Return a struct that holds 2 sets of files:
/// 1. Either files_to_compact or files_to_split
/// - files_to_compact is prioritized first as long as there is a minimum possible compacting
/// set of files that is under max_compact_size limit.
/// - files_to_split is returned when the minimum possible compacting set of files
/// is over max_compact_size limit. files_to_split is that minimum set of files.
/// 2. files_to_keep for next round of compaction
///
/// The input of this function has a constraint that every single file in start-level must overlap
/// with at most one file in target level
@ -23,11 +27,20 @@ use crate::components::{
/// |---L0.1---| |---L0.3---| |---L0.2---| Note that L0.2 is created BEFORE L0.3 but has LATER time range
/// |---L1.1---| |---L1.2---| |---L1.3---|
///
/// Output of files_to_compact: only 3 possible choices:
/// 1. Smallest compacting set: L0.1 + L1.1
/// Output: 4 possible choices:
/// 1. Smallest compacting set, L0.1 + L1.1, is too large to compact:
/// - files_to_split: L0.1 + L1.1
/// - files_to_keep: L0.2 + L0.3 + L1.2 + L1.3
/// 2. Smallest compacting set: L0.1 + L1.1
/// - files_to_compact: L0.1 + L1.1
/// - files_to_keep: L0.2 + L0.3 + L1.2 + L1.3
/// 2. Medium size compacting set: L0.1 + L1.1 + L0.2 + L1.2 + L1.3
/// Note that L1.2 overlaps with the time range of L0.1 + L0.2 and must be included here
/// - files_to_compact: L0.1 + L1.1 + L0.2 + L1.2 + L1.3
/// - files_to_keep: L0.3
/// 3. Largest compacting set: All input files
/// - files_to_compact: All input files
/// - files_to_keep: None
///
/// Example of start level as L1 and target level as L2.
/// Note the difference of the output compared with the previous example
@ -36,16 +49,26 @@ use crate::components::{
/// |---L1.1---| |---L1.3---| |---L1.2---| Note that L1.2 is created BEFORE L1.3 but has LATER time range
/// |---L2.1---| |---L2.2---| |---L2.3---|
///
/// Output of files_to_compact: only 3 possible choices:
/// 1. Smallest compacting set: L1.1 + L2.1
/// 2. Medium size compacting set: L1.1 + L2.1 + L1.3 + L2.2
/// 3. Largest compacting set: All input files
/// Output: 4 possible choices:
/// 1. Smallest compacting set, L1.1 + L2.1, is too large to compact:
/// - files_to_split: L1.1 + L2.1
/// - files_to_keep: L1.2 + L1.3 + L2.2 + L2.3
/// 2. Smallest compacting set: L1.1 + L2.1
/// - files_to_compact: L1.1 + L2.1
/// - files_to_keep: L1.2 + L1.3 + L2.2 + L2.3
/// 3. Medium size compacting set: L1.1 + L2.1 + L1.3 + L2.2
/// Note L1.3 has smaller time range and must be compacted before L1.2
/// - files_to_compact: L1.1 + L2.1 + L1.3 + L2.2
/// - files_to_keep: L1.2 + L2.3
/// 4. Largest compacting set: All input files
/// - files_to_compact: All input files
/// - files_to_keep: None
///
pub fn limit_files_to_compact(
max_compact_size: usize,
files: Vec<ParquetFile>,
target_level: CompactionLevel,
) -> (Vec<ParquetFile>, Vec<ParquetFile>) {
) -> KeepAndCompactSplit {
// panic if not all files are either in target level or start level
let start_level = target_level.prev();
assert!(files
@ -71,6 +94,7 @@ pub fn limit_files_to_compact(
// Go over start-level files and find overlapped files in target level
let mut start_level_files_to_compact = Vec::with_capacity(len);
let mut target_level_files_to_compact = Vec::with_capacity(len);
let mut files_to_further_split = Vec::with_capacity(len);
let mut files_to_keep = Vec::with_capacity(len);
let mut total_size = 0;
@ -103,13 +127,26 @@ pub fn limit_files_to_compact(
total_size += size;
} else {
// Over limit, stop here
files_to_keep.push(file);
if start_level_files_to_compact.is_empty() {
// nothing to compact,
// return this minimum compacting set for further spliting
files_to_further_split.push(file);
// since there is only one start_level file,
// the number of overlapped target_level must be <= 1
assert!(overlapped_files.len() <= 1);
files_to_further_split
.extend(overlapped_files.into_iter().cloned().collect::<Vec<_>>());
} else {
files_to_keep.push(file);
}
break;
}
}
// Remove all files in target_level_files_to_compact from target_level_files
// Remove all files in target_level_files_to_compact
// and files_to_further_split from target_level_files
target_level_files.retain(|f| !target_level_files_to_compact.iter().any(|x| x == f));
target_level_files.retain(|f| !files_to_further_split.iter().any(|x| x == f));
// All files left in start_level_files and target_level_files are kept for next round
target_level_files.extend(start_level_files);
@ -121,9 +158,26 @@ pub fn limit_files_to_compact(
.chain(target_level_files_to_compact.into_iter())
.collect::<Vec<_>>();
assert_eq!(files_to_compact.len() + files_to_keep.len(), len);
// Sanity check
// All files are returned
assert_eq!(
files_to_compact.len() + files_to_further_split.len() + files_to_keep.len(),
len
);
// Either compact or further split has to be empty. This is because if we are able to compact,
// we should not split anything anymore
assert!(files_to_compact.is_empty() || files_to_further_split.is_empty());
(files_to_compact, files_to_keep)
let files_to_compact_or_further_split = if files_to_compact.is_empty() {
CompactOrFurtherSplit::FurtherSplit(files_to_further_split)
} else {
CompactOrFurtherSplit::Compact(files_to_compact)
};
KeepAndCompactSplit {
files_to_compact_or_further_split,
files_to_keep,
}
}
/// Return time range of the given file and the list of given files
@ -138,6 +192,44 @@ fn time_range(file: &ParquetFile, files: &[ParquetFile]) -> (Timestamp, Timestam
(min_time, max_time)
}
/// Hold two sets of file:
/// 1. files that are either small enough to compact or too large and need to further split and
/// 2. files to keep for next compaction round
pub struct KeepAndCompactSplit {
// Files are either small compact or tto large and need further split
files_to_compact_or_further_split: CompactOrFurtherSplit,
// Files to keep for next compaction round
files_to_keep: Vec<ParquetFile>,
}
impl KeepAndCompactSplit {
pub fn files_to_compact(&self) -> Vec<ParquetFile> {
match &self.files_to_compact_or_further_split {
CompactOrFurtherSplit::Compact(files) => files.clone(),
CompactOrFurtherSplit::FurtherSplit(_) => vec![],
}
}
pub fn files_to_further_split(&self) -> Vec<ParquetFile> {
match &self.files_to_compact_or_further_split {
CompactOrFurtherSplit::Compact(_) => vec![],
CompactOrFurtherSplit::FurtherSplit(files) => files.clone(),
}
}
pub fn files_to_keep(&self) -> Vec<ParquetFile> {
self.files_to_keep.clone()
}
}
/// Files to either compact or to further split
pub enum CompactOrFurtherSplit {
// These overlapped files are small enough to be compacted
Compact(Vec<ParquetFile>),
// These overlapped files are the minimum set to compact but still too large to do so
FurtherSplit(Vec<ParquetFile>),
}
#[cfg(test)]
mod tests {
use compactor2_test_utils::{
@ -154,9 +246,15 @@ mod tests {
#[test]
fn test_compact_empty() {
let files = vec![];
let (files_to_compact, files_to_keep) =
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::Initial);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert!(files_to_compact.is_empty());
assert!(files_to_further_split.is_empty());
assert!(files_to_keep.is_empty());
}
@ -167,7 +265,7 @@ mod tests {
let files = create_l1_files(1);
// Target is L0 while all files are in L1 --> panic
let (_files_to_compact, _files_to_keep) =
let _keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::Initial);
}
@ -196,7 +294,7 @@ mod tests {
);
// panic because it only handle at most 2 levels next to each other
let (_files_to_compact, _files_to_keep) =
let _keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::FileNonOverlapped);
}
@ -221,7 +319,7 @@ mod tests {
);
// size limit > total size --> files to compact = all L0s and overalapped L1s
let (_files_to_compact, _files_to_keep) =
let _keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 5 + 1, files, CompactionLevel::FileNonOverlapped);
}
@ -244,10 +342,15 @@ mod tests {
);
// size limit > total size --> files to compact = all L0s and overalapped L1s
let (files_to_compact, files_to_keep) =
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 5 + 1, files, CompactionLevel::FileNonOverlapped);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 0);
// See layout of 2 set of files
@ -287,26 +390,33 @@ mod tests {
);
// size limit too small to compact anything
let (files_to_compact, files_to_keep) =
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::FileNonOverlapped);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 0);
assert_eq!(files_to_keep.len(), 5);
assert_eq!(files_to_further_split.len(), 2);
assert_eq!(files_to_keep.len(), 3);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
format_files_split("files to further split:", &files_to_further_split, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "files to further split:"
- "L0, all files 100b "
- "L0.1[450,550] 120s |---------------------------L0.1---------------------------|"
- "L1, all files 100b "
- "L1.12[400,500] 60s |--------------------------L1.12---------------------------| "
- "files to keep:"
- "L0, all files 100b "
- "L0.1[450,550] 120s |------L0.1------| "
- "L0.2[650,750] 180s |------L0.2------| "
- "L0.3[800,900] 300s |------L0.3------|"
- "L0.2[650,750] 180s |------------L0.2------------| "
- "L0.3[800,900] 300s |------------L0.3------------|"
- "L1, all files 100b "
- "L1.13[600,700] 60s |-----L1.13------| "
- "L1.12[400,500] 60s |-----L1.12------| "
- "L1.13[600,700] 60s |-----------L1.13------------| "
"###
);
}
@ -330,10 +440,15 @@ mod tests {
);
// size limit < total size --> only enough to compact L0.1 with L1.12
let (files_to_compact, files_to_keep) =
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 3, files, CompactionLevel::FileNonOverlapped);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 3);
// See layout of 2 set of files
@ -375,10 +490,15 @@ mod tests {
);
// size limit < total size --> only enough to compact L0.1, L0.2 with L1.12 and L1.13
let (files_to_compact, files_to_keep) =
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 4, files, CompactionLevel::FileNonOverlapped);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 4);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 1);
// See layout of 2 set of files
@ -419,20 +539,59 @@ mod tests {
"###
);
// There are only 3 choices for compacting:
// 1. Smallest set: L0.1 with L1.11
// 2. Medium size set: L0.1, L0.2 with L1.11, L1.12, L1.13
// 3. All files: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
// There are only 4 choices:
// 1. Smallest set is still too large to compact. Split the set: L0.1 with L1.11
// 2. Smallest set to compact: L0.1 with L1.11
// 3. Medium size set to compact: L0.1, L0.2 with L1.11, L1.12, L1.13
// 4. All files to compact: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
// --------------------
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11
let (files_to_compact, files_to_keep) = limit_files_to_compact(
// size limit = MAX_SIZE to force the first choice: splitting L0.1 with L1.11
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE, files.clone(), CompactionLevel::FileNonOverlapped);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 0);
assert_eq!(files_to_further_split.len(), 2);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to further split:", &files_to_further_split, "files to keep:", &files_to_keep),
@r###"
---
- "files to further split:"
- "L0, all files 100b "
- "L0.1[150,250] 120s |---------------------------L0.1---------------------------|"
- "L1, all files 100b "
- "L1.11[100,200] 60s |--------------------------L1.11---------------------------| "
- "files to keep:"
- "L0, all files 100b "
- "L0.2[550,650] 180s |---------L0.2----------| "
- "L0.3[350,450] 300s |---------L0.3----------| "
- "L1, all files 100b "
- "L1.12[300,400] 60s |---------L1.12---------| "
- "L1.13[500,600] 60s |---------L1.13---------| "
"###
);
// --------------------
// size limit = MAX_SIZE * 3 to force the second choice, L0.1 with L1.11
let keep_and_compact_or_split = limit_files_to_compact(
MAX_SIZE * 3,
files.clone(),
CompactionLevel::FileNonOverlapped,
);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
@ -456,14 +615,20 @@ mod tests {
);
// --------------------
// size limit = MAX_SIZE * 4 to force the first choice, L0.1 with L1.11, becasue it still not enough to for second choice
let (files_to_compact, files_to_keep) = limit_files_to_compact(
// size limit = MAX_SIZE * 4 to force the second choice, L0.1 with L1.11, because it still not enough to for second choice
let keep_and_compact_or_split = limit_files_to_compact(
MAX_SIZE * 4,
files.clone(),
CompactionLevel::FileNonOverlapped,
);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
@ -487,14 +652,19 @@ mod tests {
);
// --------------------
// size limit = MAX_SIZE * 5 to force the second choice, L0.1, L0.2 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) = limit_files_to_compact(
// size limit = MAX_SIZE * 5 to force the third choice, L0.1, L0.2 with L1.11, L1.12, L1.13
let keep_and_compact_or_split = limit_files_to_compact(
MAX_SIZE * 5,
files.clone(),
CompactionLevel::FileNonOverlapped,
);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 1);
// See layout of 2 set of files
@ -517,11 +687,16 @@ mod tests {
);
// --------------------
// size limit >= total size to force the third choice compacting everything: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) =
// size limit >= total size to force the forth choice compacting everything: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 6, files, CompactionLevel::FileNonOverlapped);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 6);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 0);
// See layout of 2 set of files
@ -565,17 +740,56 @@ mod tests {
"###
);
// There are only 3 choices for compacting:
// 1. Smallest set: L1.1 with L2.11
// 2. Medium size set: L1.1, L1.3 with L1.11, L1.12,
// 3. All files: L1.1, L1.2, L1.3 with L2.11, L2.12, L2.13
// There are only 4 choices:
// 1. Smallest set is still too large to compact. Split the set: L1.1 with L2.11
// 2. Smallest set to compact: L1.1 with L2.11
// 3. Medium size set to compact: L1.1, L1.3 with L1.11, L1.12,
// 4. All files to compact: L1.1, L1.2, L1.3 with L2.11, L2.12, L2.13
// --------------------
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11
let (files_to_compact, files_to_keep) =
// size limit = MAX_SIZE to force the first choice: splitting L1.1 & L2.11
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE, files.clone(), CompactionLevel::Final);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 0);
assert_eq!(files_to_further_split.len(), 2);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to further split:", &files_to_further_split , "files to keep:", &files_to_keep),
@r###"
---
- "files to further split:"
- "L1, all files 100b "
- "L1.1[150,250] 120s |---------------------------L1.1---------------------------|"
- "L2, all files 100b "
- "L2.11[100,200] 60s |--------------------------L2.11---------------------------| "
- "files to keep:"
- "L1, all files 100b "
- "L1.3[350,450] 300s |---------L1.3----------| "
- "L1.2[550,650] 180s |---------L1.2----------| "
- "L2, all files 100b "
- "L2.12[300,400] 60s |---------L2.12---------| "
- "L2.13[500,600] 60s |---------L2.13---------| "
"###
);
// --------------------
// size limit = MAX_SIZE * 3 to force the second choice,: compact L1.1 with L2.11
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 3, files.clone(), CompactionLevel::Final);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
@ -599,11 +813,16 @@ mod tests {
);
// --------------------
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11, becasue it still not enough to for second choice
let (files_to_compact, files_to_keep) =
// size limit = MAX_SIZE * 3 to force the second choice, compact L1.1 with L1.12, because it still not enough to for third choice
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 3, files.clone(), CompactionLevel::Final);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
@ -627,11 +846,16 @@ mod tests {
);
// --------------------
// size limit = MAX_SIZE * 5 to force the second choice, L0.1, L0.2 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) =
// size limit = MAX_SIZE * 5 to force the third choice, L1.1, L1.2 with L2.11, L2.12, L2.13
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 5, files.clone(), CompactionLevel::Final);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 4);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 2);
// See layout of 2 set of files
@ -655,11 +879,16 @@ mod tests {
);
// --------------------
// size limit >= total size to force the third choice compacting everything: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) =
// size limit >= total size to force the forth choice compacting everything: L1.1, L1.2, L1.3 with L2.11, L2.12, L2.13
let keep_and_compact_or_split =
limit_files_to_compact(MAX_SIZE * 6, files, CompactionLevel::Final);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let files_to_keep = keep_and_compact_or_split.files_to_keep();
assert_eq!(files_to_compact.len(), 6);
assert_eq!(files_to_further_split.len(), 0);
assert_eq!(files_to_keep.len(), 0);
// See layout of 2 set of files

View File

@ -0,0 +1,467 @@
use data_types::{ParquetFile, TimestampMinMax};
use crate::{components::ir_planner::planner_v1::V1IRPlanner, file_classification::FileToSplit};
// max number of files in a minimum possible compacting set
const MAX_FILE_NUM: usize = 2;
// percentage of soft limit of max desired file size allowed to be exceeded when splitting
pub const PERCENTAGE_OF_SOFT_EXCEEDED: f64 = 0.1;
/// Return `[files_to_split]` and `[files_not_to_split]` of the given files.
/// files_to_split are the files that are larger than max_desired_file_size.
/// files_not_to_split are the files that are smaller than max_desired_file_size.
pub fn compute_split_times_for_large_files(
files: Vec<ParquetFile>,
max_desired_file_size: u64,
max_compact_size: usize,
) -> (Vec<FileToSplit>, Vec<ParquetFile>) {
// Sanity checks
// There must be at most 2 files
assert!(
files.len() <= MAX_FILE_NUM && !files.is_empty(),
"There must be at least one file and at most {MAX_FILE_NUM} files",
);
// max compact size must at least MAX_FILE_NUM times larger then max desired file size to ensure the split works
assert!(
max_compact_size >= MAX_FILE_NUM * max_desired_file_size as usize,
"max_compact_size {max_compact_size} must be at least {MAX_FILE_NUM} times larger than max_desired_file_size {max_desired_file_size}",
);
// Total size of files must be larger than max_compact_size
let total_size: i64 = files.iter().map(|f| f.file_size_bytes).sum();
assert!(
total_size as usize > max_compact_size,
"Total size of files {total_size} must be larger than max_compact_size {max_compact_size}",
);
// Split files over max_desired_file_size into multiple files each is softly around max_desired_file_size
let mut files_to_split = Vec::with_capacity(files.len());
let mut files_not_to_split = Vec::with_capacity(files.len());
for file in files.into_iter() {
let file_size = file.file_size_bytes as u64;
let min_time = file.min_time.get();
let max_time = file.max_time.get();
// only split files that are larger than max_desired_file_size and have time range at least 2
let max_file_size =
(max_desired_file_size as f64 * (1.0 + PERCENTAGE_OF_SOFT_EXCEEDED)) as u64;
if file_size > max_file_size && file.min_time < file.max_time - 1 {
let file_times = vec![TimestampMinMax {
min: min_time,
max: max_time,
}];
let split_times = V1IRPlanner::compute_split_time(
file_times,
min_time,
max_time,
file_size,
max_desired_file_size,
);
files_to_split.push(FileToSplit { file, split_times });
} else {
files_not_to_split.push(file);
}
}
(files_to_split, files_not_to_split)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use compactor2_test_utils::{
create_overlapped_l0_l1_files_3, create_overlapped_two_overlapped_files, format_files,
format_files_split, TestTimes,
};
use data_types::CompactionLevel;
use iox_tests::ParquetFileBuilder;
use iox_time::{MockProvider, Time};
use crate::components::split_or_compact::large_files_to_split::compute_split_times_for_large_files;
const FILE_SIZE: i64 = 100;
// empty input
#[test]
#[should_panic(expected = "There must be at least one file and at most 2 files")]
fn test_empty_input() {
let (_files_to_split, _files_not_to_split) = compute_split_times_for_large_files(
vec![],
(FILE_SIZE + 1) as u64,
((FILE_SIZE + 1) * 3) as usize,
);
}
// more than 2 files
#[test]
#[should_panic(expected = "There must be at least one file and at most 2 files")]
fn test_too_many_files() {
let files = create_overlapped_l0_l1_files_3(FILE_SIZE);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] 180s |------L0.2------| "
- "L0.1[450,550] 120s |------L0.1------| "
- "L0.3[800,900] 300s |------L0.3------|"
- "L1, all files 100b "
- "L1.13[600,700] 60s |-----L1.13------| "
- "L1.12[400,500] 60s |-----L1.12------| "
"###
);
let (_files_to_split, _files_not_to_split) = compute_split_times_for_large_files(
files,
(FILE_SIZE + 1) as u64,
((FILE_SIZE + 1) * 3) as usize,
);
}
// invalid max compact size
#[test]
#[should_panic(
expected = "max_compact_size 111 must be at least 2 times larger than max_desired_file_size 101"
)]
fn test_invalid_max_compact_size() {
let files = create_overlapped_two_overlapped_files(FILE_SIZE);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.1[450,620] 120s |-------------------------------L0.1--------------------------------| "
- "L1, all files 100b "
- "L1.11[400,500] 60s |----------------L1.11-----------------| "
"###
);
let (_files_to_split, _files_not_to_split) = compute_split_times_for_large_files(
files,
(FILE_SIZE + 1) as u64,
((FILE_SIZE + 1) + 10) as usize,
);
}
// invalid total size
#[test]
#[should_panic(expected = "Total size of files 200 must be larger than max_compact_size 300")]
fn test_inavlid_total_size() {
let files = create_overlapped_two_overlapped_files(FILE_SIZE);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.1[450,620] 120s |-------------------------------L0.1--------------------------------| "
- "L1, all files 100b "
- "L1.11[400,500] 60s |----------------L1.11-----------------| "
"###
);
let (_files_to_split, _files_not_to_split) =
compute_split_times_for_large_files(files, FILE_SIZE as u64, (FILE_SIZE * 3) as usize);
}
// split both large files
#[test]
fn test_split_both_large_files() {
let file_size = FILE_SIZE;
let max_desired_file_size = (FILE_SIZE / 4) as u64;
let max_compact_size = (max_desired_file_size * 3) as usize;
let files = create_overlapped_two_overlapped_files(file_size);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.1[450,620] 120s |-------------------------------L0.1--------------------------------| "
- "L1, all files 100b "
- "L1.11[400,500] 60s |----------------L1.11-----------------| "
"###
);
let (files_to_split, files_not_to_split) =
compute_split_times_for_large_files(files, max_desired_file_size, max_compact_size);
// See layout of 2 set of files
let files_to_split = files_to_split
.into_iter()
.map(|f| f.file)
.collect::<Vec<_>>();
insta::assert_yaml_snapshot!(
format_files_split("files to split", &files_to_split , "files not to split:", &files_not_to_split),
@r###"
---
- files to split
- "L0, all files 100b "
- "L0.1[450,620] 120s |-------------------------------L0.1--------------------------------| "
- "L1, all files 100b "
- "L1.11[400,500] 60s |----------------L1.11-----------------| "
- "files not to split:"
"###
);
}
// split only the large file start level file
#[test]
fn test_split_large_start_level() {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let large_size = FILE_SIZE * 3;
let small_size = FILE_SIZE / 2;
let max_desired_file_size = FILE_SIZE as u64;
let max_compact_size = (max_desired_file_size * 3) as usize;
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 500)
.with_file_size_bytes(small_size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_1
let l0_1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(450, 620)
.with_file_size_bytes(large_size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
let files = vec![l1_1, l0_1];
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.1[450,620] 120s 300b |-------------------------------L0.1--------------------------------| "
- "L1 "
- "L1.11[400,500] 60s 50b |----------------L1.11-----------------| "
"###
);
let (files_to_split, files_not_to_split) =
compute_split_times_for_large_files(files, max_desired_file_size, max_compact_size);
// The split files should be L0_1 with 2 split times to split the file into 3 smaller files
assert_eq!(files_to_split.len(), 1);
assert_eq!(files_to_split[0].split_times.len(), 2);
// See layout of 2 set of files
let files_to_split = files_to_split
.into_iter()
.map(|f| f.file)
.collect::<Vec<_>>();
insta::assert_yaml_snapshot!(
format_files_split("files to split", &files_to_split , "files not to split:", &files_not_to_split),
@r###"
---
- files to split
- "L0, all files 300b "
- "L0.1[450,620] 120s |------------------------------------------L0.1------------------------------------------|"
- "files not to split:"
- "L1, all files 50b "
- "L1.11[400,500] 60s |-----------------------------------------L1.11------------------------------------------|"
"###
);
}
// split only the large file target level file
#[test]
fn test_split_large_target_level() {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let large_size = FILE_SIZE * 3;
let small_size = FILE_SIZE / 2;
let max_desired_file_size = FILE_SIZE as u64;
let max_compact_size = (max_desired_file_size * 3) as usize;
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 500)
.with_file_size_bytes(large_size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_1
let l0_1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(450, 620)
.with_file_size_bytes(small_size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
let files = vec![l1_1, l0_1];
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.1[450,620] 120s 50b |-------------------------------L0.1--------------------------------| "
- "L1 "
- "L1.11[400,500] 60s 300b |----------------L1.11-----------------| "
"###
);
let (files_to_split, files_not_to_split) =
compute_split_times_for_large_files(files, max_desired_file_size, max_compact_size);
// The split files should be L1_1 with 2 split times to split the file into 3 smaller files
assert_eq!(files_to_split.len(), 1);
assert_eq!(files_to_split[0].split_times.len(), 2);
// See layout of 2 set of files
let files_to_split = files_to_split
.into_iter()
.map(|f| f.file)
.collect::<Vec<_>>();
insta::assert_yaml_snapshot!(
format_files_split("files to split", &files_to_split , "files not to split:", &files_not_to_split),
@r###"
---
- files to split
- "L1, all files 300b "
- "L1.11[400,500] 60s |-----------------------------------------L1.11------------------------------------------|"
- "files not to split:"
- "L0, all files 50b "
- "L0.1[450,620] 120s |------------------------------------------L0.1------------------------------------------|"
"###
);
}
// tiny time-range on one file
#[test]
fn test_one_file_with_tiny_time_range() {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let large_size = FILE_SIZE * 3;
let max_desired_file_size = FILE_SIZE as u64;
let max_compact_size = (max_desired_file_size * 3) as usize;
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 401)
.with_file_size_bytes(large_size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_1
let l0_1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(400, 620)
.with_file_size_bytes(large_size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
let files = vec![l1_1, l0_1];
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 300b "
- "L0.1[400,620] 120s |-----------------------------------------L0.1------------------------------------------| "
- "L1, all files 300b "
- "L1.11[400,401] 60s |L1.11| "
"###
);
let (files_to_split, files_not_to_split) =
compute_split_times_for_large_files(files, max_desired_file_size, max_compact_size);
// The split files should be L1_1 with 2 split times to split the file into 3 smaller files
assert_eq!(files_to_split.len(), 1);
assert_eq!(files_to_split[0].split_times.len(), 2);
// See layout of 2 set of files
let files_to_split = files_to_split
.into_iter()
.map(|f| f.file)
.collect::<Vec<_>>();
insta::assert_yaml_snapshot!(
format_files_split("files to split", &files_to_split , "files not to split:", &files_not_to_split),
@r###"
---
- files to split
- "L0, all files 300b "
- "L0.1[400,620] 120s |-----------------------------------------L0.1------------------------------------------| "
- "files not to split:"
- "L1, all files 300b "
- "L1.11[400,401] 60s |-----------------------------------------L1.11------------------------------------------|"
"###
);
}
// tiny time-range on both files
#[test]
fn test_two_files_with_tiny_time_range() {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let large_size = FILE_SIZE * 3;
let max_desired_file_size = FILE_SIZE as u64;
let max_compact_size = (max_desired_file_size * 3) as usize;
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 401)
.with_file_size_bytes(large_size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_1
let l0_1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(400, 400)
.with_file_size_bytes(large_size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
let files = vec![l1_1, l0_1];
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 300b "
- "L0.1[400,400] 120s |L0.1| "
- "L1, all files 300b "
- "L1.11[400,401] 60s |-----------------------------------------L1.11------------------------------------------|"
"###
);
let (files_to_split, files_not_to_split) =
compute_split_times_for_large_files(files, max_desired_file_size, max_compact_size);
// The split files should be L1_1 with 2 split times to split the file into 3 smaller files
assert_eq!(files_to_split.len(), 0);
// See layout of 2 set of files
let files_to_split = files_to_split
.into_iter()
.map(|f| f.file)
.collect::<Vec<_>>();
insta::assert_yaml_snapshot!(
format_files_split("files to split", &files_to_split , "files not to split:", &files_not_to_split),
@r###"
---
- files to split
- "files not to split:"
- "L0, all files 300b "
- "L0.1[400,400] 120s |L0.1| "
- "L1, all files 300b "
- "L1.11[400,401] 60s |-----------------------------------------L1.11------------------------------------------|"
"###
);
}
}

View File

@ -113,7 +113,7 @@ mod tests {
test_utils::PartitionInfoBuilder,
};
const MAX_SIZE: usize = 100;
const MAX_FILE: usize = 100;
#[test]
fn empty_records_nothing() {
@ -121,8 +121,10 @@ mod tests {
let files = vec![];
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact =
MetricsSplitOrCompactWrapper::new(SplitCompact::new(MAX_SIZE), &registry);
let split_compact = MetricsSplitOrCompactWrapper::new(
SplitCompact::new(MAX_FILE, MAX_FILE as u64),
&registry,
);
let (files_to_compact_or_split, _files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Initial);
@ -152,10 +154,12 @@ mod tests {
fn files_to_split_get_recorded() {
let registry = Registry::new();
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
let files = create_overlapped_l0_l1_files_2(MAX_FILE as i64);
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact =
MetricsSplitOrCompactWrapper::new(SplitCompact::new(MAX_SIZE), &registry);
let split_compact = MetricsSplitOrCompactWrapper::new(
SplitCompact::new(MAX_FILE, MAX_FILE as u64),
&registry,
);
let (files_to_compact_or_split, _files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::FileNonOverlapped);
@ -186,10 +190,12 @@ mod tests {
fn files_to_compact_get_recorded() {
let registry = Registry::new();
let files = create_overlapped_l1_l2_files_2(MAX_SIZE as i64);
let files = create_overlapped_l1_l2_files_2(MAX_FILE as i64);
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact =
MetricsSplitOrCompactWrapper::new(SplitCompact::new(MAX_SIZE * 3), &registry);
let split_compact = MetricsSplitOrCompactWrapper::new(
SplitCompact::new(MAX_FILE * 3, MAX_FILE as u64),
&registry,
);
let (files_to_compact_or_split, _files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Final);

View File

@ -5,10 +5,11 @@ use data_types::{CompactionLevel, ParquetFile};
use crate::{file_classification::FilesToCompactOrSplit, PartitionInfo};
pub mod files_to_compact;
pub mod files_to_split;
pub mod large_files_to_split;
pub mod logging;
pub mod metrics;
pub mod split_compact;
pub mod start_level_files_to_split;
pub trait SplitOrCompact: Debug + Display + Send + Sync {
/// Return (`[files_to_split_or_compact]`, `[files_to_keep]`) of given files

View File

@ -5,24 +5,33 @@ use data_types::{CompactionLevel, ParquetFile};
use crate::{file_classification::FilesToCompactOrSplit, partition_info::PartitionInfo};
use super::{
files_to_compact::limit_files_to_compact, files_to_split::identify_files_to_split,
SplitOrCompact,
files_to_compact::limit_files_to_compact,
large_files_to_split::compute_split_times_for_large_files,
start_level_files_to_split::identify_start_level_files_to_split, SplitOrCompact,
};
#[derive(Debug)]
pub struct SplitCompact {
max_compact_size: usize,
max_desired_file_size: u64,
}
impl SplitCompact {
pub fn new(max_compact_size: usize) -> Self {
Self { max_compact_size }
pub fn new(max_compact_size: usize, max_desired_file_size: u64) -> Self {
Self {
max_compact_size,
max_desired_file_size,
}
}
}
impl Display for SplitCompact {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "split_or_compact({})", self.max_compact_size)
write!(
f,
"split_or_compact({}, {})",
self.max_compact_size, self.max_desired_file_size
)
}
}
@ -45,25 +54,45 @@ impl SplitOrCompact for SplitCompact {
}
// This function identifies all start-level files that overlap with more than one target-level files
let (files_to_split, files_not_to_split) = identify_files_to_split(files, target_level);
let (files_to_split, files_not_to_split) =
identify_start_level_files_to_split(files, target_level);
if !files_to_split.is_empty() {
// These files must be split before further compaction
(
return (
FilesToCompactOrSplit::FilesToSplit(files_to_split),
files_not_to_split,
)
} else {
// No split is needed, which means every start-level file overlaps with at most one target-level file
// Need to limit number of files to compact to stay under compact size limit
let (files_to_compact, files_to_keep) =
limit_files_to_compact(self.max_compact_size, files_not_to_split, target_level);
);
}
(
// No start level split is needed, which means every start-level file overlaps with at most one target-level file
// Need to limit number of files to compact to stay under compact size limit
let keep_and_compact_or_split =
limit_files_to_compact(self.max_compact_size, files_not_to_split, target_level);
let files_to_compact = keep_and_compact_or_split.files_to_compact();
let files_to_further_split = keep_and_compact_or_split.files_to_further_split();
let mut files_to_keep = keep_and_compact_or_split.files_to_keep();
if !files_to_compact.is_empty() {
return (
FilesToCompactOrSplit::FilesToCompact(files_to_compact),
files_to_keep,
)
);
}
let (files_to_split, files_not_to_split) = compute_split_times_for_large_files(
files_to_further_split,
self.max_desired_file_size,
self.max_compact_size,
);
files_to_keep.extend(files_not_to_split);
(
FilesToCompactOrSplit::FilesToSplit(files_to_split),
files_to_keep,
)
}
}
@ -78,17 +107,20 @@ mod tests {
use data_types::CompactionLevel;
use crate::{
components::split_or_compact::{split_compact::SplitCompact, SplitOrCompact},
components::split_or_compact::{
large_files_to_split::PERCENTAGE_OF_SOFT_EXCEEDED, split_compact::SplitCompact,
SplitOrCompact,
},
test_utils::PartitionInfoBuilder,
};
const MAX_SIZE: usize = 100;
const FILE_SIZE: usize = 100;
#[test]
fn test_empty() {
let files = vec![];
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE);
let split_compact = SplitCompact::new(FILE_SIZE, FILE_SIZE as u64);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Initial);
@ -97,8 +129,11 @@ mod tests {
}
#[test]
fn test_compact_too_large_to_compact() {
let files = create_overlapped_l1_l2_files_2(MAX_SIZE as i64);
#[should_panic(
expected = "max_compact_size 100 must be at least 2 times larger than max_desired_file_size 100"
)]
fn test_compact_invalid_max_compact_size() {
let files = create_overlapped_l1_l2_files_2(FILE_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
@ -114,18 +149,70 @@ mod tests {
);
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE);
let split_compact = SplitCompact::new(FILE_SIZE, FILE_SIZE as u64);
let (_files_to_compact_or_split, _files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Final);
}
#[test]
fn test_compact_too_large_to_compact() {
let files = create_overlapped_l1_l2_files_2(FILE_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 100b "
- "L1.13[600,700] 4ns |-----L1.13------|"
- "L1.12[400,500] 3ns |-----L1.12------| "
- "L1.11[250,350] 2ns |-----L1.11------| "
- "L2, all files 100b "
- "L2.22[200,300] 1ns |-----L2.22------| "
"###
);
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let max_desired_file_size =
FILE_SIZE - (FILE_SIZE as f64 * PERCENTAGE_OF_SOFT_EXCEEDED) as usize - 30;
let max_compact_size = 3 * max_desired_file_size;
let split_compact = SplitCompact::new(max_compact_size, max_desired_file_size as u64);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Final);
// nothing to compact or split
// after https://github.com/influxdata/idpe/issues/17246, this list won't be empty
assert!(files_to_compact_or_split.is_empty());
assert_eq!(files_to_keep.len(), 4);
// need to split files
let files_to_compact = files_to_compact_or_split.files_to_compact();
let files_to_split = files_to_compact_or_split.files_to_split();
let split_times = files_to_compact_or_split.split_times();
assert!(files_to_compact.is_empty());
assert_eq!(files_to_split.len(), 2);
assert_eq!(files_to_keep.len(), 2);
// both L1.11 and L2.22 are just a bit larger than max_desired_file_size
// so they are split into 2 files each. This means the split_times of each includes one time where it is split into 2 files
for times in split_times {
assert_eq!(times.len(), 1);
}
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to split", &files_to_compact_or_split.files_to_split() , "files to keep:", &files_to_keep),
@r###"
---
- files to split
- "L1, all files 100b "
- "L1.11[250,350] 2ns |--------------------------L1.11---------------------------|"
- "L2, all files 100b "
- "L2.22[200,300] 1ns |--------------------------L2.22---------------------------| "
- "files to keep:"
- "L1, all files 100b "
- "L1.12[400,500] 3ns |-----------L1.12------------| "
- "L1.13[600,700] 4ns |-----------L1.13------------|"
"###
);
}
#[test]
fn test_compact_files_no_limit() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
let files = create_overlapped_l0_l1_files_2(FILE_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
@ -143,7 +230,7 @@ mod tests {
// size limit > total size --> compact all
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE * 6 + 1);
let split_compact = SplitCompact::new(FILE_SIZE * 6 + 1, FILE_SIZE as u64);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::FileNonOverlapped);
@ -170,7 +257,7 @@ mod tests {
#[test]
fn test_split_files() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
let files = create_overlapped_l0_l1_files_2(FILE_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
@ -188,7 +275,7 @@ mod tests {
// hit size limit -> split start_level files that overlap with more than 1 target_level files
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE);
let split_compact = SplitCompact::new(FILE_SIZE, FILE_SIZE as u64);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::FileNonOverlapped);
@ -216,7 +303,7 @@ mod tests {
#[test]
fn test_compact_files() {
let files = create_overlapped_l1_l2_files_2(MAX_SIZE as i64);
let files = create_overlapped_l1_l2_files_2(FILE_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
@ -233,7 +320,7 @@ mod tests {
// hit size limit and nthign to split --> limit number if files to compact
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE * 3);
let split_compact = SplitCompact::new(FILE_SIZE * 3, FILE_SIZE as u64);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Final);

View File

@ -32,7 +32,7 @@ use crate::{
/// To achieve this goal, a start-level file should be split to overlap with at most one target-level file. This enables the
/// minimum set of compacting files to 2 files: a start-level file and an overlapped target-level file.
///
pub fn identify_files_to_split(
pub fn identify_start_level_files_to_split(
files: Vec<ParquetFile>,
target_level: CompactionLevel,
) -> (Vec<FileToSplit>, Vec<ParquetFile>) {
@ -110,7 +110,7 @@ mod tests {
fn test_split_empty() {
let files = vec![];
let (files_to_split, files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::Initial);
super::identify_start_level_files_to_split(files, CompactionLevel::Initial);
assert!(files_to_split.is_empty());
assert!(files_not_to_split.is_empty());
}
@ -123,7 +123,7 @@ mod tests {
// Target is L0 while all files are in L1 --> panic
let (_files_to_split, _files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::Initial);
super::identify_start_level_files_to_split(files, CompactionLevel::Initial);
}
#[test]
@ -152,7 +152,7 @@ mod tests {
// panic because it only handle at most 2 levels next to each other
let (_files_to_split, _files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
super::identify_start_level_files_to_split(files, CompactionLevel::FileNonOverlapped);
}
#[test]
@ -171,7 +171,7 @@ mod tests {
);
let (files_to_split, files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
super::identify_start_level_files_to_split(files, CompactionLevel::FileNonOverlapped);
assert!(files_to_split.is_empty());
assert_eq!(files_not_to_split.len(), 3);
}
@ -195,7 +195,7 @@ mod tests {
);
let (files_to_split, files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
super::identify_start_level_files_to_split(files, CompactionLevel::FileNonOverlapped);
// L0.1 that overlaps with 2 level-1 files will be split
assert_eq!(files_to_split.len(), 1);

View File

@ -10,6 +10,12 @@ use parquet_file::storage::ParquetStorage;
use crate::components::{commit::CommitWrapper, parquet_files_sink::ParquetFilesSink};
/// Multiple from `max_desired_file_size_bytes` to compute the minimum value for
/// `max_compact_size_bytes`. Since `max_desired_file_size_bytes` is softly enforced, actual file
/// sizes can exceed it. A single compaction job must be able to compact > 1 max sized file, so the
/// multiple should be at least 3.
const MIN_COMPACT_SIZE_MULTIPLE: usize = 3;
/// Config to set up a compactor.
#[derive(Debug, Clone)]
pub struct Config {
@ -91,10 +97,6 @@ pub struct Config {
/// This is mostly useful for debugging.
pub ignore_partition_skip_marker: bool,
/// Maximum input bytes (from parquet files) per compaction. If there is more data, we ignore
/// the partition (for now) as a self-protection mechanism.
pub max_compact_size: usize,
/// Shard config (if sharding should be enabled).
pub shard_config: Option<ShardConfig>,
@ -137,6 +139,12 @@ pub struct Config {
}
impl Config {
/// Maximum input bytes (from parquet files) per compaction. If there is more data, we ignore
/// the partition (for now) as a self-protection mechanism.
pub fn max_compact_size_bytes(&self) -> usize {
self.max_desired_file_size_bytes as usize * MIN_COMPACT_SIZE_MULTIPLE
}
/// Fetch shard ID.
///
/// This is likely required to construct a [`Config`] object.

View File

@ -286,7 +286,6 @@ async fn run_plans(
files,
partition_info,
components,
target_level,
job_semaphore,
scratchpad_ctx,
)
@ -340,7 +339,6 @@ async fn run_split_plans(
files_to_split: &[FileToSplit],
partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>,
target_level: CompactionLevel,
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
scratchpad_ctx: &mut dyn Scratchpad,
) -> Result<Vec<ParquetFileParams>, DynError> {
@ -354,7 +352,6 @@ async fn run_split_plans(
file_to_split,
partition_info,
components,
target_level,
Arc::clone(&job_semaphore),
scratchpad_ctx,
)
@ -370,7 +367,6 @@ async fn run_split_plan(
file_to_split: &FileToSplit,
partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>,
target_level: CompactionLevel,
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
scratchpad_ctx: &mut dyn Scratchpad,
) -> Result<Vec<ParquetFileParams>, DynError> {
@ -382,6 +378,9 @@ async fn run_split_plan(
..file_to_split.file.clone()
};
// target level of a split file is the same as its level
let target_level = file_to_split.file.compaction_level;
let plan_ir = components.ir_planner.split_plan(
file_inpad,
file_to_split.split_times.clone(),

View File

@ -100,6 +100,14 @@ impl FilesToCompactOrSplit {
}
}
// return split times of files to split
pub fn split_times(&self) -> Vec<Vec<i64>> {
match self {
Self::FilesToCompact(_) => vec![],
Self::FilesToSplit(files) => files.iter().map(|f| f.split_times.clone()).collect(),
}
}
/// Return files of either type
pub fn files(&self) -> Vec<ParquetFile> {
match self {
@ -107,15 +115,6 @@ impl FilesToCompactOrSplit {
Self::FilesToSplit(files) => files.iter().map(|f| f.file.clone()).collect(),
}
}
// Returns target level of the files which the compaction level of spit files if any
// or the given target level
pub fn target_level(&self, target_level: CompactionLevel) -> CompactionLevel {
match self {
Self::FilesToCompact(_) => target_level,
Self::FilesToSplit(files) => files[0].file.compaction_level,
}
}
}
/// File to split and their split times

View File

@ -34,7 +34,7 @@ async fn test_num_files_over_limit() {
.await
// Set max num file to 4 (< num files) --> many L0s files, comppact 4 L0s into 2 L0s
.with_max_num_files_per_plan(4)
// Not compact L1s into L2s becasue tnumber of L1s < 5
// Not compact L1s into L2s because tnumber of L1s < 5
.with_min_num_l1_files_to_compact(5)
.build()
.await;
@ -86,7 +86,6 @@ async fn test_compact_target_level() {
.await
// Ensure we have enough resource to compact the files
.with_max_num_files_per_plan(10)
.with_max_compact_size_relative_to_total_size(1000)
.with_min_num_l1_files_to_compact(2)
.build()
.await;
@ -198,10 +197,9 @@ async fn test_compact_large_overlapes() {
.await
// the test setup does not exceed number of files limit
.with_max_num_files_per_plan(10)
// the test setup to have total file size exceed max compact size limit
.with_max_compact_size_relative_to_total_size(-1)
.with_min_num_l1_files_to_compact(2)
.with_max_desired_file_size_bytes(100 * 1024 * 1024)
// the test setup to have total file size exceed max compact size limit
.with_max_desired_file_size_bytes((4 * 1024) as u64)
.build()
.await;
@ -232,9 +230,9 @@ async fn test_compact_large_overlapes() {
---
- initial
- "L2 "
- "L2.3[36000,36000] 180s 2.17kb |L2.3| "
- "L2.9[6000,30000] 240s 2.68kb|-----L2.9-----| "
- "L2.10[68000,136000] 300s 2.62kb |--------------------L2.10--------------------| "
- "L2.6[6000,36000] 300s 2.71kb|-------L2.6-------| "
- "L2.7[68000,68000] 300s 2.51kb |L2.7| "
- "L2.8[136000,136000] 300s 2.55kb |L2.8|"
"###
);
@ -243,6 +241,7 @@ async fn test_compact_large_overlapes() {
// order files on their min_time
files.sort_by_key(|f| f.min_time);
// time range: [6000,36000]
let file = files[0].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
@ -255,6 +254,7 @@ async fn test_compact_large_overlapes() {
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 21 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
@ -263,27 +263,28 @@ async fn test_compact_large_overlapes() {
&batches
);
// time range: [68000,68000]
let file = files[1].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+-----------------------------+",
"| field_int | tag2 | tag3 | time |",
"+-----------+------+------+-----------------------------+",
"| 21 | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"+-----------+------+------+-----------------------------+",
],
&batches
);
let file = files[2].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
// time range: [136000,136000]
let file = files[2].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],
@ -306,10 +307,9 @@ async fn test_compact_large_overlape_2() {
.await
// the test setup does not exceed number of files limit
.with_max_num_files_per_plan(10)
// the test setup exceed max compact size limit
.with_max_compact_size_relative_to_total_size(-1)
.with_min_num_l1_files_to_compact(2)
.with_max_desired_file_size_bytes(100 * 1024 * 1024)
// the test setup to have total file size exceed max compact size limit
.with_max_desired_file_size_bytes((4 * 1024) as u64)
.build()
.await;
@ -339,11 +339,10 @@ async fn test_compact_large_overlape_2() {
@r###"
---
- initial
- "L1 "
- "L1.9[68000,136000] 300s 2.62kb |--------------------L1.9---------------------| "
- "L2 "
- "L2.3[36000,36000] 180s 2.17kb |L2.3| "
- "L2.10[6000,30000] 300s 2.68kb|----L2.10-----| "
- "L2.6[6000,36000] 300s 2.71kb|-------L2.6-------| "
- "L2.7[68000,68000] 300s 2.51kb |L2.7| "
- "L2.8[136000,136000] 300s 2.55kb |L2.8|"
"###
);
@ -352,6 +351,7 @@ async fn test_compact_large_overlape_2() {
// order files on their min_time
files.sort_by_key(|f| f.min_time);
// time range: [6000,36000]
let file = files[0].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
@ -364,6 +364,7 @@ async fn test_compact_large_overlape_2() {
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 21 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
@ -372,27 +373,28 @@ async fn test_compact_large_overlape_2() {
&batches
);
// time range: [68000,68000]
let file = files[1].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+-----------------------------+",
"| field_int | tag2 | tag3 | time |",
"+-----------+------+------+-----------------------------+",
"| 21 | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"+-----------+------+------+-----------------------------+",
],
&batches
);
let file = files[2].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
// time range: [136000,136000]
let file = files[2].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],

View File

@ -0,0 +1,822 @@
//! layout tests for common scenarios for compactor.
//!
//! These scenarios are the "best case" for the compactor and the ones for which its algorithm
//! is designed to work best with.
//!
//! See [crate::layout] module for detailed documentation
use compactor2_test_utils::format_files;
use data_types::CompactionLevel;
use iox_time::Time;
use crate::layouts::{layout_setup_builder, parquet_builder, run_layout_scenario, ONE_MB};
// Each L0 file overlaps around 20% time range with its previously created L0 file.
// The setup has final files of level 2 only
#[tokio::test]
async fn test_keep_ingesting_l0_files_20_percent_overlap() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder().await.build().await;
// This test simulates the case where the ingester creates new L0 files
// with the most recent data and the compactor runs continuously keeping
// the partition ideally configured
//
// The tests compacts N times, each time new M number of L0 files are created.
// Each L0 file is 5MB and only overlaps 20% with the previously created L0 file.
let n = 100;
let m = 5;
let mut idx = 0;
for _i in 0..n {
for _j in 0..m {
let min = idx * 10;
let max = min + 11;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
run_layout_scenario(&setup).await;
}
// Add three L0 file during last compacting
for _ in 0..3 {
let min = idx * 10;
let max = min + 14;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
let files = setup.list_by_table_not_to_delete().await;
// Only the earliest avaialble L0 overlaps with the latest L2 file
insta::assert_yaml_snapshot!(
format_files("final output", &files),
@r###"
---
- final output
- "L0 "
- "L0.751[5000,5014] 500ns 5mb |L0.751|"
- "L0.752[5010,5024] 501ns 5mb |L0.752|"
- "L0.753[5020,5034] 502ns 5mb |L0.753|"
- "L2 "
- "L2.29[0,160] 19ns 79.6mb |L2.29| "
- "L2.59[161,361] 39ns 100.33mb |L2.59| "
- "L2.89[362,562] 59ns 100.47mb |L2.89| "
- "L2.119[563,753] 79ns 95.47mb |L2.119| "
- "L2.149[754,954] 99ns 100.5mb |L2.149| "
- "L2.179[955,1155] 119ns 100.5mb |L2.179| "
- "L2.209[1156,1356] 139ns 100.5mb |L2.209| "
- "L2.239[1357,1557] 159ns 100.5mb |L2.239| "
- "L2.269[1558,1758] 179ns 100.5mb |L2.269| "
- "L2.299[1759,1958] 199ns 100mb |L2.299| "
- "L2.329[1959,2158] 219ns 100mb |L2.329| "
- "L2.359[2159,2358] 239ns 100mb |L2.359| "
- "L2.389[2359,2558] 259ns 100mb |L2.389| "
- "L2.419[2559,2758] 279ns 100mb |L2.419| "
- "L2.449[2759,2958] 299ns 100mb |L2.449| "
- "L2.479[2959,3158] 319ns 100mb |L2.479| "
- "L2.509[3159,3358] 339ns 100mb |L2.509| "
- "L2.539[3359,3558] 359ns 100mb |L2.539| "
- "L2.569[3559,3758] 379ns 100mb |L2.569| "
- "L2.599[3759,3958] 399ns 100mb |L2.599| "
- "L2.629[3959,4158] 419ns 100mb |L2.629| "
- "L2.659[4159,4358] 439ns 100mb |L2.659| "
- "L2.689[4359,4558] 459ns 100mb |L2.689| "
- "L2.719[4559,4758] 479ns 100mb |L2.719| "
- "L2.749[4759,4958] 499ns 100mb |L2.749|"
- "L2.750[4959,5001] 499ns 21.61mb |L2.750|"
"###
);
}
// Each L0 file overlaps ~40% time range with its previously created L0 file.
// The setup has final files of level 2 only
#[tokio::test]
async fn test_keep_ingesting_l0_files_40_percent_overlap() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder().await.build().await;
// This test simulates the case where the ingester creates new L0 files
// with the most recent data but there is a larger delay in new data arriving
// and thus there is more overlap (40%) with the existing files
//
// This test simulates the case where we loop to compact N times, each time new M number of L0 files are created.
// Each L0 file is 5MB and only overlaps 40% with the previously created L0 file.
let n = 100;
let m = 5;
let mut idx = 0;
for _i in 0..n {
for _j in 0..m {
let min = idx * 10;
let max = min + 14;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
run_layout_scenario(&setup).await;
}
// Add three L0 file during last compacting
for _ in 0..3 {
let min = idx * 10;
let max = min + 14;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
let files = setup.list_by_table_not_to_delete().await;
// Only the earliest avaialble L0 overlaps with the latest L2 file
insta::assert_yaml_snapshot!(
format_files("final output", &files),
@r###"
---
- final output
- "L0 "
- "L0.751[5000,5014] 500ns 5mb |L0.751|"
- "L0.752[5010,5024] 501ns 5mb |L0.752|"
- "L0.753[5020,5034] 502ns 5mb |L0.753|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.509[3163,3362] 339ns 100mb |L2.509| "
- "L2.539[3363,3562] 359ns 100mb |L2.539| "
- "L2.569[3563,3762] 379ns 100mb |L2.569| "
- "L2.599[3763,3962] 399ns 100mb |L2.599| "
- "L2.629[3963,4162] 419ns 100mb |L2.629| "
- "L2.659[4163,4362] 439ns 100mb |L2.659| "
- "L2.689[4363,4562] 459ns 100mb |L2.689| "
- "L2.719[4563,4762] 479ns 100mb |L2.719| "
- "L2.749[4763,4962] 499ns 100mb |L2.749|"
- "L2.750[4963,5004] 499ns 21.11mb |L2.750|"
"###
);
}
// Each L0 file overlaps ~40% time range with its previously created L0 file.
// The setup has final files of level 2, level 1, and level 0.
// The level-1 files are not large enough to get compacted into L2 files
// The level-0 files are ingested during the last compaction
#[tokio::test]
async fn test_keep_ingesting_l0_files_40_percent_overlap_l1_left() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder().await.build().await;
// This test simulates the case where we loop to compact N times, each time new M number of L0 files are created.
// Each L0 file is 5MB and only overlaps 40% with the previously created L0 file.
let n = 101;
let m = 5;
let mut idx = 0;
let show_intermediate_result_runs = [0, 28, 45, 67, 89, 99];
for i in 0..n {
for _ in 0..m {
let min = idx * 10;
let max = min + 14;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
// show intermediate reults for index i in show_intermediate_result_runs
if i == show_intermediate_result_runs[0] {
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0, all files 5mb "
- "L0.1[0,14] 0ns |--------L0.1---------| "
- "L0.2[10,24] 1ns |--------L0.2---------| "
- "L0.3[20,34] 2ns |--------L0.3---------| "
- "L0.4[30,44] 3ns |--------L0.4---------| "
- "L0.5[40,54] 4ns |--------L0.5---------| "
- "**** Simulation run 0, type=split(split_times=[43]). 5 Input Files, 25mb total:"
- "L0, all files 5mb "
- "L0.5[40,54] 4ns |--------L0.5---------| "
- "L0.4[30,44] 3ns |--------L0.4---------| "
- "L0.3[20,34] 2ns |--------L0.3---------| "
- "L0.2[10,24] 1ns |--------L0.2---------| "
- "L0.1[0,14] 0ns |--------L0.1---------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 25mb total:"
- "L1 "
- "L1.?[0,43] 4ns 19.91mb |--------------------------------L1.?---------------------------------| "
- "L1.?[44,54] 4ns 5.09mb |-----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 5 files: L0.1, L0.2, L0.3, L0.4, L0.5"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.6[0,43] 4ns 19.91mb |--------------------------------L1.6---------------------------------| "
- "L1.7[44,54] 4ns 5.09mb |-----L1.7-----| "
"###
);
} else if i == show_intermediate_result_runs[1] {
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.211[1400,1414] 140ns 5mb |L0.211|"
- "L0.212[1410,1424] 141ns 5mb |L0.212|"
- "L0.213[1420,1434] 142ns 5mb |L0.213|"
- "L0.214[1430,1444] 143ns 5mb |L0.214|"
- "L0.215[1440,1454] 144ns 5mb |L0.215|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |-L2.29--| "
- "L2.59[164,364] 39ns 100.08mb |--L2.59---| "
- "L2.89[365,565] 59ns 100.43mb |--L2.89---| "
- "L2.119[566,756] 79ns 95.47mb |-L2.119--| "
- "L2.149[757,957] 99ns 100.5mb |--L2.149--| "
- "L2.179[958,1158] 119ns 100.5mb |--L2.179--| "
- "L2.209[1159,1359] 139ns 100.5mb |--L2.209--| "
- "L2.210[1360,1404] 139ns 22.61mb |L2.210|"
- "**** Simulation run 35, type=split(split_times=[1443]). 5 Input Files, 25mb total:"
- "L0, all files 5mb "
- "L0.215[1440,1454] 144ns |-------L0.215--------| "
- "L0.214[1430,1444] 143ns |-------L0.214--------| "
- "L0.213[1420,1434] 142ns |-------L0.213--------| "
- "L0.212[1410,1424] 141ns |-------L0.212--------| "
- "L0.211[1400,1414] 140ns |-------L0.211--------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 25mb total:"
- "L1 "
- "L1.?[1400,1443] 144ns 19.91mb|--------------------------------L1.?---------------------------------| "
- "L1.?[1444,1454] 144ns 5.09mb |-----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 5 files: L0.211, L0.212, L0.213, L0.214, L0.215"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.216[1400,1443] 144ns 19.91mb |L1.216|"
- "L1.217[1444,1454] 144ns 5.09mb |L1.217|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |-L2.29--| "
- "L2.59[164,364] 39ns 100.08mb |--L2.59---| "
- "L2.89[365,565] 59ns 100.43mb |--L2.89---| "
- "L2.119[566,756] 79ns 95.47mb |-L2.119--| "
- "L2.149[757,957] 99ns 100.5mb |--L2.149--| "
- "L2.179[958,1158] 119ns 100.5mb |--L2.179--| "
- "L2.209[1159,1359] 139ns 100.5mb |--L2.209--| "
- "L2.210[1360,1404] 139ns 22.61mb |L2.210|"
"###
);
} else if i == show_intermediate_result_runs[2] {
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.338[2250,2264] 225ns 5mb |L0.338|"
- "L0.339[2260,2274] 226ns 5mb |L0.339|"
- "L0.340[2270,2284] 227ns 5mb |L0.340|"
- "L0.341[2280,2294] 228ns 5mb |L0.341|"
- "L0.342[2290,2304] 229ns 5mb |L0.342|"
- "L1 "
- "L1.336[2200,2243] 224ns 19.91mb |L1.336|"
- "L1.337[2244,2254] 224ns 5.09mb |L1.337|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.330[2163,2204] 219ns 21.11mb |L2.330|"
- "**** Simulation run 56, type=split(split_times=[2292]). 6 Input Files, 30.09mb total:"
- "L0 "
- "L0.342[2290,2304] 229ns 5mb |------L0.342-------|"
- "L0.341[2280,2294] 228ns 5mb |------L0.341-------| "
- "L0.340[2270,2284] 227ns 5mb |------L0.340-------| "
- "L0.339[2260,2274] 226ns 5mb |------L0.339-------| "
- "L0.338[2250,2264] 225ns 5mb |------L0.338-------| "
- "L1 "
- "L1.337[2244,2254] 224ns 5.09mb|---L1.337----| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 30.09mb total:"
- "L1 "
- "L1.?[2244,2292] 229ns 24.07mb|---------------------------------L1.?---------------------------------| "
- "L1.?[2293,2304] 229ns 6.02mb |-----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 6 files: L1.337, L0.338, L0.339, L0.340, L0.341, L0.342"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.336[2200,2243] 224ns 19.91mb |L1.336|"
- "L1.343[2244,2292] 229ns 24.07mb |L1.343|"
- "L1.344[2293,2304] 229ns 6.02mb |L1.344|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.330[2163,2204] 219ns 21.11mb |L2.330|"
"###
);
} else if i == show_intermediate_result_runs[3] {
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.502[3350,3364] 335ns 5mb |L0.502|"
- "L0.503[3360,3374] 336ns 5mb |L0.503|"
- "L0.504[3370,3384] 337ns 5mb |L0.504|"
- "L0.505[3380,3394] 338ns 5mb |L0.505|"
- "L0.506[3390,3404] 339ns 5mb |L0.506|"
- "L1 "
- "L1.486[3200,3243] 324ns 19.91mb |L1.486|"
- "L1.493[3244,3292] 329ns 24.07mb |L1.493|"
- "L1.500[3293,3341] 334ns 24.41mb |L1.500|"
- "L1.501[3342,3354] 334ns 6.61mb |L1.501|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.480[3163,3204] 319ns 21.11mb |L2.480|"
- "**** Simulation run 83, type=split(split_times=[3391]). 6 Input Files, 31.61mb total:"
- "L0 "
- "L0.506[3390,3404] 339ns 5mb |------L0.506------| "
- "L0.505[3380,3394] 338ns 5mb |------L0.505------| "
- "L0.504[3370,3384] 337ns 5mb |------L0.504------| "
- "L0.503[3360,3374] 336ns 5mb |------L0.503------| "
- "L0.502[3350,3364] 335ns 5mb |------L0.502------| "
- "L1 "
- "L1.501[3342,3354] 334ns 6.61mb|----L1.501-----| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 31.61mb total:"
- "L1 "
- "L1.?[3342,3391] 339ns 24.98mb|--------------------------------L1.?---------------------------------| "
- "L1.?[3392,3404] 339ns 6.63mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 6 files: L1.501, L0.502, L0.503, L0.504, L0.505, L0.506"
- " Creating 2 files"
- "**** Simulation run 84, type=split(split_times=[3362]). 6 Input Files, 121.11mb total:"
- "L1 "
- "L1.500[3293,3341] 334ns 24.41mb |----L1.500-----| "
- "L1.493[3244,3292] 329ns 24.07mb |----L1.493-----| "
- "L1.486[3200,3243] 324ns 19.91mb |----L1.486----| "
- "L1.508[3392,3404] 339ns 6.63mb |L1.508|"
- "L1.507[3342,3391] 339ns 24.98mb |-----L1.507-----| "
- "L2 "
- "L2.480[3163,3204] 319ns 21.11mb|---L2.480----| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 121.11mb total:"
- "L2 "
- "L2.?[3163,3362] 339ns 100mb|----------------------------------L2.?----------------------------------| "
- "L2.?[3363,3404] 339ns 21.11mb |----L2.?-----| "
- "Committing partition 1:"
- " Soft Deleting 6 files: L2.480, L1.486, L1.493, L1.500, L1.507, L1.508"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.509[3163,3362] 339ns 100mb |L2.509|"
- "L2.510[3363,3404] 339ns 21.11mb |L2.510|"
"###
);
} else if i == show_intermediate_result_runs[4] {
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.668[4450,4464] 445ns 5mb |L0.668|"
- "L0.669[4460,4474] 446ns 5mb |L0.669|"
- "L0.670[4470,4484] 447ns 5mb |L0.670|"
- "L0.671[4480,4494] 448ns 5mb |L0.671|"
- "L0.672[4490,4504] 449ns 5mb |L0.672|"
- "L1 "
- "L1.666[4400,4443] 444ns 19.91mb |L1.666|"
- "L1.667[4444,4454] 444ns 5.09mb |L1.667|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.509[3163,3362] 339ns 100mb |L2.509| "
- "L2.539[3363,3562] 359ns 100mb |L2.539| "
- "L2.569[3563,3762] 379ns 100mb |L2.569| "
- "L2.599[3763,3962] 399ns 100mb |L2.599| "
- "L2.629[3963,4162] 419ns 100mb |L2.629| "
- "L2.659[4163,4362] 439ns 100mb |L2.659|"
- "L2.660[4363,4404] 439ns 21.11mb |L2.660|"
- "**** Simulation run 111, type=split(split_times=[4492]). 6 Input Files, 30.09mb total:"
- "L0 "
- "L0.672[4490,4504] 449ns 5mb |------L0.672-------|"
- "L0.671[4480,4494] 448ns 5mb |------L0.671-------| "
- "L0.670[4470,4484] 447ns 5mb |------L0.670-------| "
- "L0.669[4460,4474] 446ns 5mb |------L0.669-------| "
- "L0.668[4450,4464] 445ns 5mb |------L0.668-------| "
- "L1 "
- "L1.667[4444,4454] 444ns 5.09mb|---L1.667----| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 30.09mb total:"
- "L1 "
- "L1.?[4444,4492] 449ns 24.07mb|---------------------------------L1.?---------------------------------| "
- "L1.?[4493,4504] 449ns 6.02mb |-----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 6 files: L1.667, L0.668, L0.669, L0.670, L0.671, L0.672"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.666[4400,4443] 444ns 19.91mb |L1.666|"
- "L1.673[4444,4492] 449ns 24.07mb |L1.673|"
- "L1.674[4493,4504] 449ns 6.02mb |L1.674|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.509[3163,3362] 339ns 100mb |L2.509| "
- "L2.539[3363,3562] 359ns 100mb |L2.539| "
- "L2.569[3563,3762] 379ns 100mb |L2.569| "
- "L2.599[3763,3962] 399ns 100mb |L2.599| "
- "L2.629[3963,4162] 419ns 100mb |L2.629| "
- "L2.659[4163,4362] 439ns 100mb |L2.659|"
- "L2.660[4363,4404] 439ns 21.11mb |L2.660|"
"###
);
} else if i == show_intermediate_result_runs[5] {
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.742[4950,4964] 495ns 5mb |L0.742|"
- "L0.743[4960,4974] 496ns 5mb |L0.743|"
- "L0.744[4970,4984] 497ns 5mb |L0.744|"
- "L0.745[4980,4994] 498ns 5mb |L0.745|"
- "L0.746[4990,5004] 499ns 5mb |L0.746|"
- "L1 "
- "L1.726[4800,4843] 484ns 19.91mb |L1.726|"
- "L1.733[4844,4892] 489ns 24.07mb |L1.733|"
- "L1.740[4893,4941] 494ns 24.41mb |L1.740|"
- "L1.741[4942,4954] 494ns 6.61mb |L1.741|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.509[3163,3362] 339ns 100mb |L2.509| "
- "L2.539[3363,3562] 359ns 100mb |L2.539| "
- "L2.569[3563,3762] 379ns 100mb |L2.569| "
- "L2.599[3763,3962] 399ns 100mb |L2.599| "
- "L2.629[3963,4162] 419ns 100mb |L2.629| "
- "L2.659[4163,4362] 439ns 100mb |L2.659| "
- "L2.689[4363,4562] 459ns 100mb |L2.689| "
- "L2.719[4563,4762] 479ns 100mb |L2.719|"
- "L2.720[4763,4804] 479ns 21.11mb |L2.720|"
- "**** Simulation run 123, type=split(split_times=[4991]). 6 Input Files, 31.61mb total:"
- "L0 "
- "L0.746[4990,5004] 499ns 5mb |------L0.746------| "
- "L0.745[4980,4994] 498ns 5mb |------L0.745------| "
- "L0.744[4970,4984] 497ns 5mb |------L0.744------| "
- "L0.743[4960,4974] 496ns 5mb |------L0.743------| "
- "L0.742[4950,4964] 495ns 5mb |------L0.742------| "
- "L1 "
- "L1.741[4942,4954] 494ns 6.61mb|----L1.741-----| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 31.61mb total:"
- "L1 "
- "L1.?[4942,4991] 499ns 24.98mb|--------------------------------L1.?---------------------------------| "
- "L1.?[4992,5004] 499ns 6.63mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 6 files: L1.741, L0.742, L0.743, L0.744, L0.745, L0.746"
- " Creating 2 files"
- "**** Simulation run 124, type=split(split_times=[4962]). 6 Input Files, 121.11mb total:"
- "L1 "
- "L1.740[4893,4941] 494ns 24.41mb |----L1.740-----| "
- "L1.733[4844,4892] 489ns 24.07mb |----L1.733-----| "
- "L1.726[4800,4843] 484ns 19.91mb |----L1.726----| "
- "L1.748[4992,5004] 499ns 6.63mb |L1.748|"
- "L1.747[4942,4991] 499ns 24.98mb |-----L1.747-----| "
- "L2 "
- "L2.720[4763,4804] 479ns 21.11mb|---L2.720----| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 121.11mb total:"
- "L2 "
- "L2.?[4763,4962] 499ns 100mb|----------------------------------L2.?----------------------------------| "
- "L2.?[4963,5004] 499ns 21.11mb |----L2.?-----| "
- "Committing partition 1:"
- " Soft Deleting 6 files: L2.720, L1.726, L1.733, L1.740, L1.747, L1.748"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.509[3163,3362] 339ns 100mb |L2.509| "
- "L2.539[3363,3562] 359ns 100mb |L2.539| "
- "L2.569[3563,3762] 379ns 100mb |L2.569| "
- "L2.599[3763,3962] 399ns 100mb |L2.599| "
- "L2.629[3963,4162] 419ns 100mb |L2.629| "
- "L2.659[4163,4362] 439ns 100mb |L2.659| "
- "L2.689[4363,4562] 459ns 100mb |L2.689| "
- "L2.719[4563,4762] 479ns 100mb |L2.719|"
- "L2.749[4763,4962] 499ns 100mb |L2.749|"
- "L2.750[4963,5004] 499ns 21.11mb |L2.750|"
"###
);
} else {
run_layout_scenario(&setup).await;
}
}
// Add three L0 file during last compacting
for _ in 0..3 {
let min = idx * 10;
let max = min + 14;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
let files = setup.list_by_table_not_to_delete().await;
// Final results
// With time overlapped setup (common use case), there is always:
// . Only the earliest avaialble L0 overlaps with the latest L1 file
// . Only the earliest avaialble L1 overlaps with the latest L2 file
insta::assert_yaml_snapshot!(
format_files("final output", &files),
@r###"
---
- final output
- "L0 "
- "L0.758[5050,5064] 505ns 5mb |L0.758|"
- "L0.759[5060,5074] 506ns 5mb |L0.759|"
- "L0.760[5070,5084] 507ns 5mb |L0.760|"
- "L1 "
- "L1.756[5000,5043] 504ns 19.91mb |L1.756|"
- "L1.757[5044,5054] 504ns 5.09mb |L1.757|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |L2.29| "
- "L2.59[164,364] 39ns 100.08mb |L2.59| "
- "L2.89[365,565] 59ns 100.43mb |L2.89| "
- "L2.119[566,756] 79ns 95.47mb |L2.119| "
- "L2.149[757,957] 99ns 100.5mb |L2.149| "
- "L2.179[958,1158] 119ns 100.5mb |L2.179| "
- "L2.209[1159,1359] 139ns 100.5mb |L2.209| "
- "L2.239[1360,1560] 159ns 100.5mb |L2.239| "
- "L2.269[1561,1761] 179ns 100.5mb |L2.269| "
- "L2.299[1762,1962] 199ns 100.5mb |L2.299| "
- "L2.329[1963,2162] 219ns 100mb |L2.329| "
- "L2.359[2163,2362] 239ns 100mb |L2.359| "
- "L2.389[2363,2562] 259ns 100mb |L2.389| "
- "L2.419[2563,2762] 279ns 100mb |L2.419| "
- "L2.449[2763,2962] 299ns 100mb |L2.449| "
- "L2.479[2963,3162] 319ns 100mb |L2.479| "
- "L2.509[3163,3362] 339ns 100mb |L2.509| "
- "L2.539[3363,3562] 359ns 100mb |L2.539| "
- "L2.569[3563,3762] 379ns 100mb |L2.569| "
- "L2.599[3763,3962] 399ns 100mb |L2.599| "
- "L2.629[3963,4162] 419ns 100mb |L2.629| "
- "L2.659[4163,4362] 439ns 100mb |L2.659| "
- "L2.689[4363,4562] 459ns 100mb |L2.689| "
- "L2.719[4563,4762] 479ns 100mb |L2.719| "
- "L2.749[4763,4962] 499ns 100mb |L2.749|"
- "L2.750[4963,5004] 499ns 21.11mb |L2.750|"
"###
);
}
// Each L0 file overlaps ~40% time range with its previously created L0 file.
// The setup has final files of level 2, level 1, and level 0.
// The level-1 files are not large enough to get compacted into L2 files
// The level-0 files are ingested during the last compaction
#[tokio::test]
async fn test_keep_ingesting_l0_files_40_percent_overlap_output_250mb() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder().await.build().await;
// This test simulates the case where the ingester creates new L0 files
// with the most recent data but there is a larger delay in new data arriving
// and thus there is more overlap (40%) with the existing files
// Loop to compact N times, each time new M number of L0 files are created.
// Each L0 file is 5MB and only overlaps 40% with the previously created L0 file.
let n = 10;
let m = 5;
let mut idx = 0;
for _i in 0..n {
for _j in 0..m {
let min = idx * 10;
let max = min + 14;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
run_layout_scenario(&setup).await;
}
// Add three L0 file during last compacting
for _ in 0..3 {
let min = idx * 10;
let max = min + 14;
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(min)
.with_max_time(max)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(idx))
.with_compaction_level(CompactionLevel::Initial),
)
.await;
idx += 1;
}
let files = setup.list_by_table_not_to_delete().await;
// Only the earliest avaialble L0 overlaps with the latest L2 file
insta::assert_yaml_snapshot!(
format_files("final output", &files),
@r###"
---
- final output
- "L0 "
- "L0.75[500,514] 50ns 5mb |L0.75|"
- "L0.76[510,524] 51ns 5mb |L0.76|"
- "L0.77[520,534] 52ns 5mb |L0.77|"
- "L1 "
- "L1.66[400,443] 44ns 19.91mb |L1.66| "
- "L1.73[444,492] 49ns 24.07mb |L1.73-| "
- "L1.74[493,504] 49ns 6.02mb |L1.74|"
- "L2 "
- "L2.29[0,163] 19ns 79.9mb |----------L2.29----------| "
- "L2.59[164,364] 39ns 100.08mb |-------------L2.59-------------| "
- "L2.60[365,404] 39ns 20.02mb |L2.60| "
"###
);
}

View File

@ -103,7 +103,7 @@ async fn all_overlapping_l0() {
- "L1.?[160021,200000] 10ns 18mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 10 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10"
- " Creating 2 files at level CompactionLevel::L1"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.11[100,160020] 10ns 72mb|--------------------------------L1.11---------------------------------| "
@ -165,7 +165,7 @@ async fn all_non_overlapping_l0() {
- "L1.?[721,901] 1ns 20.09mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 10 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10"
- " Creating 2 files at level CompactionLevel::L1"
- " Creating 2 files"
- "**** Simulation run 1, type=split(split_times=[720]). 2 Input Files, 100mb total:"
- "L1 "
- "L1.12[721,901] 1ns 20.09mb |-----L1.12-----| "
@ -176,7 +176,7 @@ async fn all_non_overlapping_l0() {
- "L2.?[721,901] 1ns 20.09mb |-----L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.11, L1.12"
- " Creating 2 files at level CompactionLevel::L2"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.13[0,720] 1ns 79.91mb |--------------------------------L2.13--------------------------------| "
@ -251,7 +251,7 @@ async fn l1_with_overlapping_l0() {
- "L1.?[100,310] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 6 files: L1.2, L0.3, L0.4, L0.5, L0.6, L0.7"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Final Output Files "
- "L1 "
- "L1.1[50,99] 1ns 10mb |-----L1.1-----| "
@ -322,7 +322,7 @@ async fn l1_with_non_overlapping_l0() {
- "L1.?[300,550] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 5 files: L0.3, L0.4, L0.5, L0.6, L0.7"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Final Output Files "
- "L1 "
- "L1.1[50,99] 1ns 10mb |-L1.1-| "
@ -393,7 +393,7 @@ async fn l1_with_non_overlapping_l0_larger() {
- "L1.?[300,450] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.5, L0.6, L0.7"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=split(split_times=[370]). 5 Input Files, 108mb total:"
- "L1 "
- "L1.4[200,249] 1ns 3mb |--L1.4---| "
@ -407,7 +407,7 @@ async fn l1_with_non_overlapping_l0_larger() {
- "L2.?[371,450] 1ns 21.6mb |-----L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 5 files: L1.1, L1.2, L1.3, L1.4, L1.8"
- " Creating 2 files at level CompactionLevel::L2"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.9[50,370] 1ns 86.4mb |---------------------------------L2.9---------------------------------| "
@ -493,7 +493,7 @@ async fn l1_too_much_with_non_overlapping_l0() {
- "L1.?[600,649] 780s |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.11, L0.12, L0.13"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=split(split_times=[113, 176]). 3 Input Files, 240mb total:"
- "L1 "
- "L1.1[50,99] 0ns 90mb |-----------L1.1------------| "
@ -506,47 +506,46 @@ async fn l1_too_much_with_non_overlapping_l0() {
- "L2.?[177,199] 120s 38.66mb |---L2.?----| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.1, L1.2, L1.3"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Simulation run 2, type=split(split_times=[271, 342]). 3 Input Files, 210mb total:"
- " Creating 3 files"
- "**** Simulation run 2, type=split(split_times=[272, 344]). 4 Input Files, 280mb total:"
- "L1, all files 70mb "
- "L1.4[200,249] 180s |-----------L1.4------------| "
- "L1.5[250,299] 240s |-----------L1.5------------| "
- "L1.6[300,349] 300s |-----------L1.6------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 210mb total:"
- "L1.4[200,249] 180s |--------L1.4--------| "
- "L1.5[250,299] 240s |--------L1.5--------| "
- "L1.6[300,349] 300s |--------L1.6--------| "
- "L1.7[350,399] 360s |--------L1.7--------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 280mb total:"
- "L2 "
- "L2.?[200,271] 300s 100.07mb|------------------L2.?------------------| "
- "L2.?[272,342] 300s 98.66mb |------------------L2.?------------------| "
- "L2.?[343,349] 300s 11.28mb |L2.?|"
- "L2.?[200,272] 360s 101.31mb|-------------L2.?-------------| "
- "L2.?[273,344] 360s 99.9mb |-------------L2.?-------------| "
- "L2.?[345,399] 360s 78.79mb |---------L2.?---------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.4, L1.5, L1.6"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Simulation run 3, type=split(split_times=[421, 492]). 3 Input Files, 210mb total:"
- "L1, all files 70mb "
- "L1.7[350,399] 360s |-----------L1.7------------| "
- "L1.8[400,449] 420s |-----------L1.8------------| "
- "L1.9[450,499] 480s |-----------L1.9------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 210mb total:"
- "L2 "
- "L2.?[350,421] 480s 100.07mb|------------------L2.?------------------| "
- "L2.?[422,492] 480s 98.66mb |------------------L2.?------------------| "
- "L2.?[493,499] 480s 11.28mb |L2.?|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.7, L1.8, L1.9"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Final Output Files "
- " Soft Deleting 4 files: L1.4, L1.5, L1.6, L1.7"
- " Creating 3 files"
- "**** Simulation run 3, type=split(split_times=[511, 622]). 4 Input Files, 225mb total:"
- "L1 "
- "L1.10[500,549] 540s 70mb |L1.10| "
- "L1.14[600,649] 780s 15mb |L1.14| "
- "L1.14[600,649] 780s 15mb |-----L1.14-----| "
- "L1.10[500,549] 540s 70mb |-----L1.10-----| "
- "L1.9[450,499] 480s 70mb |-----L1.9------| "
- "L1.8[400,449] 420s 70mb |-----L1.8------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 225mb total:"
- "L2 "
- "L2.?[400,511] 780s 100.3mb|-----------------L2.?-----------------| "
- "L2.?[512,622] 780s 99.4mb |----------------L2.?-----------------| "
- "L2.?[623,649] 780s 25.3mb |-L2.?--| "
- "Committing partition 1:"
- " Soft Deleting 4 files: L1.8, L1.9, L1.10, L1.14"
- " Creating 3 files"
- "**** Final Output Files "
- "L2 "
- "L2.15[50,113] 120s 101.48mb|-L2.15-| "
- "L2.16[114,176] 120s 99.87mb |-L2.16-| "
- "L2.17[177,199] 120s 38.66mb |L2.17| "
- "L2.18[200,271] 300s 100.07mb |-L2.18--| "
- "L2.19[272,342] 300s 98.66mb |-L2.19--| "
- "L2.20[343,349] 300s 11.28mb |L2.20| "
- "L2.21[350,421] 480s 100.07mb |-L2.21--| "
- "L2.22[422,492] 480s 98.66mb |-L2.22--| "
- "L2.23[493,499] 480s 11.28mb |L2.23| "
- "L2.18[200,272] 360s 101.31mb |-L2.18--| "
- "L2.19[273,344] 360s 99.9mb |-L2.19--| "
- "L2.20[345,399] 360s 78.79mb |L2.20-| "
- "L2.21[400,511] 780s 100.3mb |----L2.21-----| "
- "L2.22[512,622] 780s 99.4mb |----L2.22-----| "
- "L2.23[623,649] 780s 25.3mb |L2.23|"
"###
);
}
@ -619,7 +618,7 @@ async fn many_l1_with_non_overlapping_l0() {
- "L1.?[600,650] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.11, L0.12, L0.13"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=split(split_times=[530]). 11 Input Files, 88mb total:"
- "L1 "
- "L1.10[500,549] 1ns 7mb |L1.10| "
@ -639,7 +638,7 @@ async fn many_l1_with_non_overlapping_l0() {
- "L2.?[531,650] 1ns 17.6mb |-----L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 11 files: L1.1, L1.2, L1.3, L1.4, L1.5, L1.6, L1.7, L1.8, L1.9, L1.10, L1.14"
- " Creating 2 files at level CompactionLevel::L2"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.15[50,530] 1ns 70.4mb |--------------------------------L2.15---------------------------------| "
@ -708,7 +707,7 @@ async fn large_l1_with_non_overlapping_l0() {
- "L1.?[600,650] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.3, L0.4, L0.5"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=split(split_times=[375]). 3 Input Files, 185mb total:"
- "L1 "
- "L1.2[100,149] 1ns 80mb |L1.2-| "
@ -720,7 +719,7 @@ async fn large_l1_with_non_overlapping_l0() {
- "L2.?[376,650] 1ns 84.79mb |-----------------L2.?------------------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.1, L1.2, L1.6"
- " Creating 2 files at level CompactionLevel::L2"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.7[50,375] 1ns 100.21mb|---------------------L2.7---------------------| "

File diff suppressed because it is too large Load Diff

View File

@ -6,10 +6,8 @@ use data_types::CompactionLevel;
use crate::layouts::{layout_setup_builder, parquet_builder, run_layout_scenario, ONE_MB};
const MAX_COMPACT_SIZE: usize = 300 * ONE_MB as usize;
const MAX_DESIRED_FILE_SIZE: u64 = 100 * ONE_MB;
// This file should be upgraded after https://github.com/influxdata/idpe/issues/17246
// One l1 file that is larger than max desired file size
#[tokio::test]
async fn one_larger_max_file_size() {
@ -17,7 +15,6 @@ async fn one_larger_max_file_size() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -58,7 +55,6 @@ async fn one_l0_larger_max_file_size() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -95,7 +91,6 @@ async fn one_l0_larger_max_file_size() {
);
}
// This file should be upgraded after https://github.com/influxdata/idpe/issues/17246
// One l1 file that is larger than max compact size
#[tokio::test]
async fn one_larger_max_compact_size() {
@ -103,11 +98,12 @@ async fn one_larger_max_compact_size() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
let max_compact_size = setup.config.max_compact_size_bytes();
setup
.partition
.create_parquet_file(
@ -116,7 +112,7 @@ async fn one_larger_max_compact_size() {
.with_max_time(1000)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
// file > max_desired_file_size_bytes
.with_file_size_bytes((MAX_COMPACT_SIZE + 1) as u64),
.with_file_size_bytes((max_compact_size + 1) as u64),
)
.await;
@ -139,7 +135,6 @@ async fn one_larger_max_compact_size() {
);
}
// This file should be upgraded after https://github.com/influxdata/idpe/issues/17246
// One l0 file that is larger than max compact size
#[tokio::test]
async fn one_l0_larger_max_compact_size() {
@ -147,11 +142,12 @@ async fn one_l0_larger_max_compact_size() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
let max_compact_size = setup.config.max_compact_size_bytes();
setup
.partition
.create_parquet_file(
@ -160,7 +156,7 @@ async fn one_l0_larger_max_compact_size() {
.with_max_time(1000)
.with_compaction_level(CompactionLevel::Initial)
// file > max_desired_file_size_bytes
.with_file_size_bytes((MAX_COMPACT_SIZE + 1) as u64),
.with_file_size_bytes((max_compact_size + 1) as u64),
)
.await;
@ -185,7 +181,6 @@ async fn one_l0_larger_max_compact_size() {
);
}
// This is working as expected and should stay after https://github.com/influxdata/idpe/issues/17246
// Two files that are under max compact size
#[tokio::test]
async fn two_large_files_total_under_max_compact_size() {
@ -193,7 +188,6 @@ async fn two_large_files_total_under_max_compact_size() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -234,7 +228,7 @@ async fn two_large_files_total_under_max_compact_size() {
- "L2.?[502,1000] 1ns 99.9mb |-------------------L2.?-------------------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.1, L2.2"
- " Creating 2 files at level CompactionLevel::L2"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.3[1,501] 1ns 100.1mb |-------------------L2.3--------------------| "
@ -243,7 +237,6 @@ async fn two_large_files_total_under_max_compact_size() {
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// Two similar size and time range files with total size larger than max compact size
#[tokio::test]
async fn two_large_files_total_over_max_compact_size() {
@ -251,12 +244,12 @@ async fn two_large_files_total_over_max_compact_size() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
let size = MAX_COMPACT_SIZE / 2 + 10;
let max_compact_size = setup.config.max_compact_size_bytes();
let size = max_compact_size / 2 + 10;
for i in 1..=2 {
setup
@ -283,19 +276,57 @@ async fn two_large_files_total_over_max_compact_size() {
- "L2.2[2,1000] 1ns |-----------------------------------------L2.2------------------------------------------| "
- "WARNING: file L1.1[1,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[2,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[667]). 1 Input Files, 150mb total:"
- "L1, all files 150mb "
- "L1.1[1,1000] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L1 "
- "L1.?[1,667] 1ns 100mb |---------------------------L1.?---------------------------| "
- "L1.?[668,1000] 1ns 50mb |-----------L1.?------------| "
- "**** Simulation run 1, type=split(split_times=[668]). 1 Input Files, 150mb total:"
- "L2, all files 150mb "
- "L2.2[2,1000] 1ns |-----------------------------------------L2.2------------------------------------------| "
- "WARNING: file L1.1[1,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[2,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "L2.2[2,1000] 1ns |------------------------------------------L2.2------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L2 "
- "L2.?[2,668] 1ns 100.1mb |---------------------------L2.?---------------------------| "
- "L2.?[669,1000] 1ns 49.9mb |-----------L2.?------------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.1, L2.2"
- " Creating 4 files"
- "**** Simulation run 2, type=split(split_times=[668]). 1 Input Files, 50mb total:"
- "L1, all files 50mb "
- "L1.4[668,1000] 1ns |------------------------------------------L1.4------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 50mb total:"
- "L1 "
- "L1.?[668,668] 1ns 0b |L1.?| "
- "L1.?[669,1000] 1ns 50mb |-----------------------------------------L1.?------------------------------------------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.4"
- " Creating 2 files"
- "**** Simulation run 3, type=split(split_times=[335]). 3 Input Files, 200.1mb total:"
- "L1 "
- "L1.3[1,667] 1ns 100mb |-----------------------------------------L1.3------------------------------------------| "
- "L1.7[668,668] 1ns 0b |L1.7|"
- "L2 "
- "L2.5[2,668] 1ns 100.1mb |-----------------------------------------L2.5------------------------------------------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 200.1mb total:"
- "L2 "
- "L2.?[1,335] 1ns 100.2mb |-------------------L2.?--------------------| "
- "L2.?[336,668] 1ns 99.9mb |-------------------L2.?-------------------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.3, L2.5, L1.7"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.8[669,1000] 1ns 50mb |-----------L1.8------------| "
- "L2 "
- "L2.6[669,1000] 1ns 49.9mb |-----------L2.6------------| "
- "L2.9[1,335] 1ns 100.2mb |------------L2.9------------| "
- "L2.10[336,668] 1ns 99.9mb |-----------L2.10-----------| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// Two similar size files with total size larger than max compact size with small overlap range
// The time range of target level file is much smaller and at the end range of the start level file
#[tokio::test]
@ -304,12 +335,12 @@ async fn two_large_files_total_over_max_compact_size_small_overlap_range() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
let size = MAX_COMPACT_SIZE / 2 + 10;
let max_compact_size = setup.config.max_compact_size_bytes();
let size = max_compact_size / 2 + 10;
for i in 1..=2 {
setup
@ -336,19 +367,46 @@ async fn two_large_files_total_over_max_compact_size_small_overlap_range() {
- "L2.2[800,1000] 1ns |------L2.2------|"
- "WARNING: file L1.1[0,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[800,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[667]). 1 Input Files, 150mb total:"
- "L1, all files 150mb "
- "L1.1[0,1000] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L1 "
- "L1.?[0,667] 1ns 100.05mb |---------------------------L1.?---------------------------| "
- "L1.?[668,1000] 1ns 49.95mb |-----------L1.?------------| "
- "**** Simulation run 1, type=split(split_times=[934]). 1 Input Files, 150mb total:"
- "L2, all files 150mb "
- "L2.2[800,1000] 1ns |------L2.2------|"
- "WARNING: file L1.1[0,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[800,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "L2.2[800,1000] 1ns |------------------------------------------L2.2------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L2 "
- "L2.?[800,934] 1ns 100.5mb|---------------------------L2.?---------------------------| "
- "L2.?[935,1000] 1ns 49.5mb |-----------L2.?------------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.1, L2.2"
- " Creating 4 files"
- "**** Simulation run 2, type=split(split_times=[835]). 3 Input Files, 199.95mb total:"
- "L1 "
- "L1.4[668,1000] 1ns 49.95mb|------------------------------------------L1.4------------------------------------------|"
- "L2 "
- "L2.5[800,934] 1ns 100.5mb |---------------L2.5---------------| "
- "L2.6[935,1000] 1ns 49.5mb |-----L2.6------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 199.95mb total:"
- "L2 "
- "L2.?[668,835] 1ns 100.58mb|-------------------L2.?--------------------| "
- "L2.?[836,1000] 1ns 99.37mb |-------------------L2.?-------------------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.4, L2.5, L2.6"
- " Upgrading 1 files level to CompactionLevel::L2: L1.3"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.3[0,667] 1ns 100.05mb |---------------------------L2.3---------------------------| "
- "L2.7[668,835] 1ns 100.58mb |----L2.7-----| "
- "L2.8[836,1000] 1ns 99.37mb |----L2.8----| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// Two similar size files with total size larger than max compact size with small overlap range
// The overlapped range is at the end range of start_level file and start of target level file
// Two files have similar length of time range
@ -358,12 +416,12 @@ async fn two_large_files_total_over_max_compact_size_small_overlap_range_2() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
let size = MAX_COMPACT_SIZE / 2 + 10;
let max_compact_size = setup.config.max_compact_size_bytes();
let size = max_compact_size / 2 + 10;
for i in 1..=2 {
setup
@ -390,19 +448,47 @@ async fn two_large_files_total_over_max_compact_size_small_overlap_range_2() {
- "L2.2[1600,3000] 1ns |-------------------------L2.2--------------------------| "
- "WARNING: file L1.1[800,2000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[1600,3000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[1600]). 1 Input Files, 150mb total:"
- "L1, all files 150mb "
- "L1.1[800,2000] 1ns |---------------------L1.1----------------------| "
- "L1.1[800,2000] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L1 "
- "L1.?[800,1600] 1ns 100mb |---------------------------L1.?---------------------------| "
- "L1.?[1601,2000] 1ns 50mb |-----------L1.?------------| "
- "**** Simulation run 1, type=split(split_times=[2534]). 1 Input Files, 150mb total:"
- "L2, all files 150mb "
- "L2.2[1600,3000] 1ns |-------------------------L2.2--------------------------| "
- "WARNING: file L1.1[800,2000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[1600,3000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "L2.2[1600,3000] 1ns |------------------------------------------L2.2------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L2 "
- "L2.?[1600,2534] 1ns 100.07mb|---------------------------L2.?---------------------------| "
- "L2.?[2535,3000] 1ns 49.93mb |-----------L2.?------------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.1, L2.2"
- " Creating 4 files"
- "**** Simulation run 2, type=split(split_times=[1494, 2188]). 3 Input Files, 250.07mb total:"
- "L1 "
- "L1.4[1601,2000] 1ns 50mb |-------L1.4-------| "
- "L1.3[800,1600] 1ns 100mb |-----------------L1.3------------------| "
- "L2 "
- "L2.5[1600,2534] 1ns 100.07mb |---------------------L2.5---------------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 250.07mb total:"
- "L2 "
- "L2.?[800,1494] 1ns 100.09mb|---------------L2.?---------------| "
- "L2.?[1495,2188] 1ns 99.94mb |--------------L2.?---------------| "
- "L2.?[2189,2534] 1ns 50.04mb |-----L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.3, L1.4, L2.5"
- " Creating 3 files"
- "**** Final Output Files "
- "L2 "
- "L2.6[2535,3000] 1ns 49.93mb |------L2.6-------| "
- "L2.7[800,1494] 1ns 100.09mb|-----------L2.7-----------| "
- "L2.8[1495,2188] 1ns 99.94mb |-----------L2.8-----------| "
- "L2.9[2189,2534] 1ns 50.04mb |----L2.9----| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// Two similar size files with total size larger than max compact size with small overlap range
// The overlapped range is at the end range of start_level file and start of target level file
// Time range of the start level file is much smaller than the one of target level file
@ -412,12 +498,12 @@ async fn two_large_files_total_over_max_compact_size_small_overlap_range_3() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
let size = MAX_COMPACT_SIZE / 2 + 10;
let max_compact_size = setup.config.max_compact_size_bytes();
let size = max_compact_size / 2 + 10;
for i in 1..=2 {
setup
@ -444,19 +530,47 @@ async fn two_large_files_total_over_max_compact_size_small_overlap_range_3() {
- "L2.2[200,1300] 1ns |-----------------------------------L2.2-----------------------------------| "
- "WARNING: file L1.1[0,300] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[200,1300] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[200]). 1 Input Files, 150mb total:"
- "L1, all files 150mb "
- "L1.1[0,300] 1ns |-------L1.1-------| "
- "L1.1[0,300] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L1 "
- "L1.?[0,200] 1ns 100mb |---------------------------L1.?---------------------------| "
- "L1.?[201,300] 1ns 50mb |-----------L1.?------------| "
- "**** Simulation run 1, type=split(split_times=[934]). 1 Input Files, 150mb total:"
- "L2, all files 150mb "
- "L2.2[200,1300] 1ns |-----------------------------------L2.2-----------------------------------| "
- "WARNING: file L1.1[0,300] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L2.2[200,1300] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "L2.2[200,1300] 1ns |------------------------------------------L2.2------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L2 "
- "L2.?[200,934] 1ns 100.09mb|---------------------------L2.?---------------------------| "
- "L2.?[935,1300] 1ns 49.91mb |-----------L2.?------------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.1, L2.2"
- " Creating 4 files"
- "**** Simulation run 2, type=split(split_times=[374, 748]). 3 Input Files, 250.09mb total:"
- "L1 "
- "L1.4[201,300] 1ns 50mb |-L1.4--| "
- "L1.3[0,200] 1ns 100mb |------L1.3-------| "
- "L2 "
- "L2.5[200,934] 1ns 100.09mb |--------------------------------L2.5--------------------------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 250.09mb total:"
- "L2 "
- "L2.?[0,374] 1ns 100.14mb |---------------L2.?---------------| "
- "L2.?[375,748] 1ns 99.88mb |--------------L2.?---------------| "
- "L2.?[749,934] 1ns 50.07mb |-----L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.3, L1.4, L2.5"
- " Creating 3 files"
- "**** Final Output Files "
- "L2 "
- "L2.6[935,1300] 1ns 49.91mb |---------L2.6----------| "
- "L2.7[0,374] 1ns 100.14mb |---------L2.7----------| "
- "L2.8[375,748] 1ns 99.88mb |---------L2.8----------| "
- "L2.9[749,934] 1ns 50.07mb |---L2.9---| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// Two similar size files with total size larger than max compact size and similar time range
// Start level is 0
#[tokio::test]
@ -465,12 +579,12 @@ async fn two_large_files_total_over_max_compact_size_start_l0() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
let size = MAX_COMPACT_SIZE / 2 + 10;
let max_compact_size = setup.config.max_compact_size_bytes();
let size = max_compact_size / 2 + 10;
for i in 0..=1 {
setup
@ -497,19 +611,69 @@ async fn two_large_files_total_over_max_compact_size_start_l0() {
- "L1.2[1,1000] 1ns |-----------------------------------------L1.2------------------------------------------| "
- "WARNING: file L0.1[0,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L1.2[1,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[667]). 1 Input Files, 150mb total:"
- "L0, all files 150mb "
- "L0.1[0,1000] 1ns |------------------------------------------L0.1------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L0 "
- "L0.?[0,667] 1ns 100.05mb |---------------------------L0.?---------------------------| "
- "L0.?[668,1000] 1ns 49.95mb |-----------L0.?------------| "
- "**** Simulation run 1, type=split(split_times=[667]). 1 Input Files, 150mb total:"
- "L1, all files 150mb "
- "L1.2[1,1000] 1ns |-----------------------------------------L1.2------------------------------------------| "
- "WARNING: file L0.1[0,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L1.2[1,1000] 1ns 150mb exceeds soft limit 100mb by more than 50%"
- "L1.2[1,1000] 1ns |------------------------------------------L1.2------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 150mb total:"
- "L1 "
- "L1.?[1,667] 1ns 100mb |---------------------------L1.?---------------------------| "
- "L1.?[668,1000] 1ns 50mb |-----------L1.?------------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.1, L1.2"
- " Creating 4 files"
- "**** Simulation run 2, type=split(split_times=[933]). 2 Input Files, 99.95mb total:"
- "L0 "
- "L0.4[668,1000] 1ns 49.95mb|------------------------------------------L0.4------------------------------------------|"
- "L1 "
- "L1.6[668,1000] 1ns 50mb |------------------------------------------L1.6------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 99.95mb total:"
- "L1 "
- "L1.?[668,933] 1ns 79.78mb|--------------------------------L1.?---------------------------------| "
- "L1.?[934,1000] 1ns 20.17mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.4, L1.6"
- " Creating 2 files"
- "**** Simulation run 3, type=split(split_times=[334]). 2 Input Files, 200.05mb total:"
- "L0 "
- "L0.3[0,667] 1ns 100.05mb |------------------------------------------L0.3------------------------------------------|"
- "L1 "
- "L1.5[1,667] 1ns 100mb |-----------------------------------------L1.5------------------------------------------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 200.05mb total:"
- "L1 "
- "L1.?[0,334] 1ns 100.17mb |-------------------L1.?--------------------| "
- "L1.?[335,667] 1ns 99.88mb |-------------------L1.?-------------------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.3, L1.5"
- " Creating 2 files"
- "**** Simulation run 4, type=split(split_times=[668]). 3 Input Files, 199.83mb total:"
- "L1 "
- "L1.7[668,933] 1ns 79.78mb |--------------L1.7---------------| "
- "L1.8[934,1000] 1ns 20.17mb |-L1.8-| "
- "L1.10[335,667] 1ns 99.88mb|------------------L1.10-------------------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 199.83mb total:"
- "L2 "
- "L2.?[335,668] 1ns 100.06mb|-------------------L2.?--------------------| "
- "L2.?[669,1000] 1ns 99.76mb |-------------------L2.?-------------------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.7, L1.8, L1.10"
- " Upgrading 1 files level to CompactionLevel::L2: L1.9"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.9[0,334] 1ns 100.17mb |------------L2.9------------| "
- "L2.11[335,668] 1ns 100.06mb |-----------L2.11-----------| "
- "L2.12[669,1000] 1ns 99.76mb |-----------L2.12-----------| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// Real-life case with three good size L1s and one very large L2
#[tokio::test]
async fn target_too_large_1() {
@ -517,7 +681,6 @@ async fn target_too_large_1() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -568,20 +731,57 @@ async fn target_too_large_1() {
- "L2 "
- "L2.1[1,1000] 1ns 253mb |-----------L2.1------------| "
- "WARNING: file L2.1[1,1000] 1ns 253mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Simulation run 0, type=split(split_times=[396, 791]). 1 Input Files, 253mb total:"
- "L2, all files 253mb "
- "L2.1[1,1000] 1ns |------------------------------------------L2.1------------------------------------------|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 253mb total:"
- "L2 "
- "L2.?[1,396] 1ns 100.04mb |--------------L2.?---------------| "
- "L2.?[397,791] 1ns 99.78mb |--------------L2.?---------------| "
- "L2.?[792,1000] 1ns 53.18mb |------L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L2.1"
- " Creating 3 files"
- "**** Simulation run 1, type=split(split_times=[396, 791]). 1 Input Files, 53mb total:"
- "L1, all files 53mb "
- "L1.2[1,1000] 1ns |------------------------------------------L1.2------------------------------------------|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 53mb total:"
- "L1 "
- "L1.?[1,396] 1ns 20.96mb |--------------L1.?---------------| "
- "L1.?[397,791] 1ns 20.9mb |--------------L1.?---------------| "
- "L1.?[792,1000] 1ns 11.14mb |------L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.2"
- " Creating 3 files"
- "**** Simulation run 2, type=split(split_times=[328, 655]). 4 Input Files, 241.68mb total:"
- "L1 "
- "L1.8[1,396] 1ns 20.96mb |-------------------L1.8-------------------| "
- "L1.9[397,791] 1ns 20.9mb |-------------------L1.9-------------------| "
- "L2 "
- "L2.5[1,396] 1ns 100.04mb |-------------------L2.5-------------------| "
- "L2.6[397,791] 1ns 99.78mb |-------------------L2.6-------------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 241.68mb total:"
- "L2 "
- "L2.?[1,328] 1ns 100.04mb |---------------L2.?----------------| "
- "L2.?[329,655] 1ns 99.73mb |---------------L2.?----------------| "
- "L2.?[656,791] 1ns 41.91mb |----L2.?-----| "
- "Committing partition 1:"
- " Soft Deleting 4 files: L2.5, L2.6, L1.8, L1.9"
- " Creating 3 files"
- "**** Final Output Files "
- "L1 "
- "L1.2[1,1000] 1ns 53mb |-----------L1.2------------| "
- "L1.3[1001,2000] 1ns 45mb |-----------L1.3------------| "
- "L1.4[2001,3000] 1ns 5mb |-----------L1.4------------| "
- "L1.10[792,1000] 1ns 11.14mb |L1.10| "
- "L2 "
- "L2.1[1,1000] 1ns 253mb |-----------L2.1------------| "
- "WARNING: file L2.1[1,1000] 1ns 253mb exceeds soft limit 100mb by more than 50%"
- "L2.7[792,1000] 1ns 53.18mb |L2.7| "
- "L2.11[1,328] 1ns 100.04mb|-L2.11-| "
- "L2.12[329,655] 1ns 99.73mb |-L2.12-| "
- "L2.13[656,791] 1ns 41.91mb |L2.13| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// Real-life case with two good size L1s and one very large L2
#[tokio::test]
async fn target_too_large_2() {
@ -589,7 +789,6 @@ async fn target_too_large_2() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -639,19 +838,52 @@ async fn target_too_large_2() {
- "L2 "
- "L2.1[1,3000] 1ns 232mb |------------------------------------------L2.1------------------------------------------|"
- "WARNING: file L2.1[1,3000] 1ns 232mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Simulation run 0, type=split(split_times=[1294, 2587]). 1 Input Files, 232mb total:"
- "L2, all files 232mb "
- "L2.1[1,3000] 1ns |------------------------------------------L2.1------------------------------------------|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 232mb total:"
- "L2 "
- "L2.?[1,1294] 1ns 100.03mb|----------------L2.?----------------| "
- "L2.?[1295,2587] 1ns 99.95mb |----------------L2.?----------------| "
- "L2.?[2588,3000] 1ns 32.03mb |---L2.?---| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L2.1"
- " Creating 3 files"
- "**** Simulation run 1, type=split(split_times=[1294]). 1 Input Files, 50mb total:"
- "L1, all files 50mb "
- "L1.3[1001,2000] 1ns |------------------------------------------L1.3------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 50mb total:"
- "L1 "
- "L1.?[1001,1294] 1ns 14.66mb|----------L1.?----------| "
- "L1.?[1295,2000] 1ns 35.34mb |----------------------------L1.?-----------------------------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.3"
- " Creating 2 files"
- "**** Simulation run 2, type=split(split_times=[705]). 3 Input Files, 183.69mb total:"
- "L1 "
- "L1.2[1,1000] 1ns 69mb |-------------------------------L1.2--------------------------------| "
- "L1.7[1001,1294] 1ns 14.66mb |-------L1.7-------| "
- "L2 "
- "L2.4[1,1294] 1ns 100.03mb|------------------------------------------L2.4------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 183.69mb total:"
- "L2 "
- "L2.?[1,705] 1ns 100.01mb |---------------------L2.?----------------------| "
- "L2.?[706,1294] 1ns 83.68mb |-----------------L2.?-----------------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.2, L2.4, L1.7"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.2[1,1000] 1ns 69mb |-----------L1.2------------| "
- "L1.3[1001,2000] 1ns 50mb |-----------L1.3------------| "
- "L1.8[1295,2000] 1ns 35.34mb |-------L1.8--------| "
- "L2 "
- "L2.1[1,3000] 1ns 232mb |------------------------------------------L2.1------------------------------------------|"
- "WARNING: file L2.1[1,3000] 1ns 232mb exceeds soft limit 100mb by more than 50%"
- "L2.5[1295,2587] 1ns 99.95mb |----------------L2.5----------------| "
- "L2.6[2588,3000] 1ns 32.03mb |---L2.6---| "
- "L2.9[1,705] 1ns 100.01mb |-------L2.9--------| "
- "L2.10[706,1294] 1ns 83.68mb |-----L2.10-----| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// One very large start level file with one good size overlapped target level file
// Two have similar time range
#[tokio::test]
@ -660,7 +892,6 @@ async fn start_too_large_similar_time_range() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -696,18 +927,42 @@ async fn start_too_large_similar_time_range() {
- "L2 "
- "L2.2[2,1000] 1ns 52mb |-----------------------------------------L2.2------------------------------------------| "
- "WARNING: file L1.1[1,1000] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[401, 801]). 1 Input Files, 250mb total:"
- "L1, all files 250mb "
- "L1.1[1,1000] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 250mb total:"
- "L1 "
- "L1.1[1,1000] 1ns 250mb |------------------------------------------L1.1------------------------------------------|"
- "L1.?[1,401] 1ns 100.1mb |---------------L1.?---------------| "
- "L1.?[402,801] 1ns 99.85mb |--------------L1.?---------------| "
- "L1.?[802,1000] 1ns 50.05mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.1"
- " Creating 3 files"
- "**** Simulation run 1, type=split(split_times=[398, 795]). 3 Input Files, 251.95mb total:"
- "L1 "
- "L1.3[1,401] 1ns 100.1mb |---------------L1.3---------------| "
- "L1.4[402,801] 1ns 99.85mb |--------------L1.4---------------| "
- "L2 "
- "L2.2[2,1000] 1ns 52mb |-----------------------------------------L2.2------------------------------------------| "
- "WARNING: file L1.1[1,1000] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "**** 3 Output Files (parquet_file_id not yet assigned), 251.95mb total:"
- "L2 "
- "L2.?[1,398] 1ns 100.12mb |--------------L2.?---------------| "
- "L2.?[399,795] 1ns 99.87mb |--------------L2.?---------------| "
- "L2.?[796,1000] 1ns 51.95mb |------L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L2.2, L1.3, L1.4"
- " Creating 3 files"
- "**** Final Output Files "
- "L1 "
- "L1.5[802,1000] 1ns 50.05mb |-----L1.5------| "
- "L2 "
- "L2.6[1,398] 1ns 100.12mb |--------------L2.6---------------| "
- "L2.7[399,795] 1ns 99.87mb |--------------L2.7---------------| "
- "L2.8[796,1000] 1ns 51.95mb |------L2.8------| "
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// One very large start level file with one good size overlapped target level file
// Overlapped range is small
// The overlapped range is at the end of both start_level file and target level file
@ -717,7 +972,6 @@ async fn start_too_large_small_time_range() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -753,18 +1007,42 @@ async fn start_too_large_small_time_range() {
- "L2 "
- "L2.2[800,1000] 1ns 52mb |------L2.2------|"
- "WARNING: file L1.1[0,1000] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[400, 800]). 1 Input Files, 250mb total:"
- "L1, all files 250mb "
- "L1.1[0,1000] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 250mb total:"
- "L1 "
- "L1.1[0,1000] 1ns 250mb |------------------------------------------L1.1------------------------------------------|"
- "L1.?[0,400] 1ns 100mb |---------------L1.?---------------| "
- "L1.?[401,800] 1ns 99.75mb |--------------L1.?---------------| "
- "L1.?[801,1000] 1ns 50.25mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.1"
- " Creating 3 files"
- "**** Simulation run 1, type=split(split_times=[698, 995]). 3 Input Files, 202mb total:"
- "L1 "
- "L1.5[801,1000] 1ns 50.25mb |-----------L1.5------------| "
- "L1.4[401,800] 1ns 99.75mb|--------------------------L1.4---------------------------| "
- "L2 "
- "L2.2[800,1000] 1ns 52mb |------L2.2------|"
- "WARNING: file L1.1[0,1000] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "L2.2[800,1000] 1ns 52mb |------------L2.2------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 202mb total:"
- "L2 "
- "L2.?[401,698] 1ns 100.16mb|-------------------L2.?-------------------| "
- "L2.?[699,995] 1ns 99.82mb |-------------------L2.?-------------------| "
- "L2.?[996,1000] 1ns 2.02mb |L2.?|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L2.2, L1.4, L1.5"
- " Upgrading 1 files level to CompactionLevel::L2: L1.3"
- " Creating 3 files"
- "**** Final Output Files "
- "L2 "
- "L2.3[0,400] 1ns 100mb |---------------L2.3---------------| "
- "L2.6[401,698] 1ns 100.16mb |----------L2.6----------| "
- "L2.7[699,995] 1ns 99.82mb |----------L2.7----------| "
- "L2.8[996,1000] 1ns 2.02mb |L2.8|"
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// One very large start level file with one good size overlapped target level file
// Overlapped range is small
// The overlapped range is at the end of start_level file and start of target level file
@ -774,7 +1052,6 @@ async fn start_too_large_small_time_range_2() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -810,18 +1087,42 @@ async fn start_too_large_small_time_range_2() {
- "L2 "
- "L2.2[1600,3000] 1ns 52mb |-------------------------L2.2--------------------------| "
- "WARNING: file L1.1[800,2000] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[1280, 1760]). 1 Input Files, 250mb total:"
- "L1, all files 250mb "
- "L1.1[800,2000] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 250mb total:"
- "L1 "
- "L1.1[800,2000] 1ns 250mb |---------------------L1.1----------------------| "
- "L1.?[800,1280] 1ns 100mb |---------------L1.?---------------| "
- "L1.?[1281,1760] 1ns 99.79mb |--------------L1.?---------------| "
- "L1.?[1761,2000] 1ns 50.21mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.1"
- " Creating 3 files"
- "**** Simulation run 1, type=split(split_times=[2132, 2983]). 3 Input Files, 202mb total:"
- "L1 "
- "L1.5[1761,2000] 1ns 50.21mb |---L1.5---| "
- "L1.4[1281,1760] 1ns 99.79mb|---------L1.4----------| "
- "L2 "
- "L2.2[1600,3000] 1ns 52mb |-------------------------L2.2--------------------------| "
- "WARNING: file L1.1[800,2000] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "L2.2[1600,3000] 1ns 52mb |---------------------------------L2.2----------------------------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 202mb total:"
- "L2 "
- "L2.?[1281,2132] 1ns 100mb|-------------------L2.?-------------------| "
- "L2.?[2133,2983] 1ns 99.88mb |-------------------L2.?-------------------| "
- "L2.?[2984,3000] 1ns 2.12mb |L2.?|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L2.2, L1.4, L1.5"
- " Upgrading 1 files level to CompactionLevel::L2: L1.3"
- " Creating 3 files"
- "**** Final Output Files "
- "L2 "
- "L2.3[800,1280] 1ns 100mb |------L2.3-------| "
- "L2.6[1281,2132] 1ns 100mb |--------------L2.6--------------| "
- "L2.7[2133,2983] 1ns 99.88mb |--------------L2.7--------------| "
- "L2.8[2984,3000] 1ns 2.12mb |L2.8|"
"###
);
}
// These files should be split and then compacted after https://github.com/influxdata/idpe/issues/17246
// One very large start level file with one good size overlapped target level file
// Overlapped range is small
// The overlapped range is at the end of start_level file and start of target level file
@ -832,7 +1133,6 @@ async fn start_too_large_small_time_range_3() {
let setup = layout_setup_builder()
.await
.with_max_compact_size(MAX_COMPACT_SIZE)
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
@ -868,13 +1168,104 @@ async fn start_too_large_small_time_range_3() {
- "L2 "
- "L2.2[200,1300] 1ns 52mb |-----------------------------------L2.2-----------------------------------| "
- "WARNING: file L1.1[0,300] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[120, 240]). 1 Input Files, 250mb total:"
- "L1, all files 250mb "
- "L1.1[0,300] 1ns |------------------------------------------L1.1------------------------------------------|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 250mb total:"
- "L1 "
- "L1.1[0,300] 1ns 250mb |-------L1.1-------| "
- "L1.?[0,120] 1ns 100mb |---------------L1.?---------------| "
- "L1.?[121,240] 1ns 99.17mb |--------------L1.?---------------| "
- "L1.?[241,300] 1ns 50.83mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.1"
- " Creating 3 files"
- "**** Simulation run 1, type=split(split_times=[705, 1289]). 3 Input Files, 202mb total:"
- "L1 "
- "L1.5[241,300] 1ns 50.83mb |L1.5| "
- "L1.4[121,240] 1ns 99.17mb|-L1.4--| "
- "L2 "
- "L2.2[200,1300] 1ns 52mb |-----------------------------------L2.2-----------------------------------| "
- "WARNING: file L1.1[0,300] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "L2.2[200,1300] 1ns 52mb |--------------------------------------L2.2---------------------------------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 202mb total:"
- "L2 "
- "L2.?[121,705] 1ns 100.06mb|-------------------L2.?-------------------| "
- "L2.?[706,1289] 1ns 99.89mb |-------------------L2.?-------------------| "
- "L2.?[1290,1300] 1ns 2.06mb |L2.?|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L2.2, L1.4, L1.5"
- " Upgrading 1 files level to CompactionLevel::L2: L1.3"
- " Creating 3 files"
- "**** Final Output Files "
- "L2 "
- "L2.3[0,120] 1ns 100mb |-L2.3-| "
- "L2.6[121,705] 1ns 100.06mb |-----------------L2.6-----------------| "
- "L2.7[706,1289] 1ns 99.89mb |-----------------L2.7-----------------| "
- "L2.8[1290,1300] 1ns 2.06mb |L2.8|"
"###
);
}
// tiny time range and cannot split --> skip
#[tokio::test]
async fn tiny_time_range() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder()
.await
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.build()
.await;
// . one L1 >> max_desired_file_size_bytes to trigger compaction
// . L1 is tiny time range --> won't be split
// . one good size overlapped L2 --> won't be split
// . total size = L1 & L2 > max_compact_size
// size of l1 & l2 respectively
let l1_size = 250 * ONE_MB;
let l2_size = 52 * ONE_MB;
// l1
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(1)
.with_max_time(2)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_file_size_bytes(l1_size),
)
.await;
// l2
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(1)
.with_max_time(1000)
.with_compaction_level(CompactionLevel::Final)
.with_file_size_bytes(l2_size),
)
.await;
// Neither L1 nor L2 will be split and lead to skipping compaction
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L1 "
- "L1.1[1,2] 1ns 250mb |L1.1| "
- "L2 "
- "L2.2[1,1000] 1ns 52mb |------------------------------------------L2.2------------------------------------------|"
- "WARNING: file L1.1[1,2] 1ns 250mb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L1 "
- "L1.1[1,2] 1ns 250mb |L1.1| "
- "L2 "
- "L2.2[1,1000] 1ns 52mb |------------------------------------------L2.2------------------------------------------|"
- "WARNING: file L1.1[1,2] 1ns 250mb exceeds soft limit 100mb by more than 50%"
"###
);
}

View File

@ -12,7 +12,7 @@ use crate::layouts::{layout_setup_builder, parquet_builder, run_layout_scenario,
#[tokio::test]
async fn one_l1_overlaps_with_many_l2s() {
// Simulate a production scenario in which there are two L1 files but one overlaps with three L2 files
// and their total size > limit 256MB
// and their total size > limit 300MB
// |----------L2.1----------||----------L2.2----------||-----L2.3----|
// |----------------------------------------L1.4---------------------------||--L1.5--|
@ -83,30 +83,32 @@ async fn one_l1_overlaps_with_many_l2s() {
- "L1.?[201,250] 240s 20.88mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.4"
- " Creating 4 files at level CompactionLevel::L1"
- "**** Simulation run 1, type=split(split_times=[72]). 2 Input Files, 139.76mb total:"
- " Creating 4 files"
- "**** Simulation run 1, type=split(split_times=[59, 117]). 4 Input Files, 259.44mb total:"
- "L1 "
- "L1.6[1,100] 240s 39.76mb |-----------------------------------------L1.6------------------------------------------| "
- "L1.6[1,100] 240s 39.76mb |--------------------------L1.6---------------------------| "
- "L1.7[101,150] 240s 19.68mb |-----------L1.7------------| "
- "L2 "
- "L2.1[51,100] 0ns 100mb |-------------------L2.1-------------------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 139.76mb total:"
- "L2.1[51,100] 0ns 100mb |-----------L2.1------------| "
- "L2.2[101,150] 60s 100mb |-----------L2.2------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 259.44mb total:"
- "L2 "
- "L2.?[1,72] 240s 100.23mb |-----------------------------L2.?-----------------------------| "
- "L2.?[73,100] 240s 39.53mb |---------L2.?---------| "
- "L2.?[1,59] 240s 100.99mb |--------------L2.?---------------| "
- "L2.?[60,117] 240s 99.25mb |--------------L2.?--------------| "
- "L2.?[118,150] 240s 59.2mb |------L2.?-------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L2.1, L1.6"
- " Creating 2 files at level CompactionLevel::L2"
- " Soft Deleting 4 files: L2.1, L2.2, L1.6, L1.7"
- " Creating 3 files"
- "**** Final Output Files "
- "L1 "
- "L1.5[251,500] 300s 30mb |-------------------L1.5-------------------| "
- "L1.7[101,150] 240s 19.68mb |-L1.7-| "
- "L1.8[151,200] 240s 19.68mb |-L1.8-| "
- "L1.9[201,250] 240s 20.88mb |-L1.9-| "
- "L2 "
- "L2.2[101,150] 60s 100mb |-L2.2-| "
- "L2.3[151,200] 120s 70mb |-L2.3-| "
- "L2.10[1,72] 240s 100.23mb|--L2.10---| "
- "L2.11[73,100] 240s 39.53mb |L2.11| "
- "L2.10[1,59] 240s 100.99mb|-L2.10--| "
- "L2.11[60,117] 240s 99.25mb |-L2.11--| "
- "L2.12[118,150] 240s 59.2mb |L2.12| "
"###
);
}
@ -205,36 +207,36 @@ async fn many_l1_overlaps_with_many_l2s() {
- "L1.?[101,105] 360s 4.64mb |---------L1.?----------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.6"
- " Creating 2 files at level CompactionLevel::L1"
- "**** Simulation run 1, type=split(split_times=[91, 131]). 7 Input Files, 252mb total:"
- " Creating 2 files"
- "**** Simulation run 1, type=split(split_times=[87, 123]). 9 Input Files, 278mb total:"
- "L1 "
- "L1.4[61,75] 240s 13mb |---L1.4---| "
- "L1.5[76,90] 300s 13mb |---L1.5---| "
- "L1.12[91,100] 360s 8.36mb |L1.12-| "
- "L1.13[101,105] 360s 4.64mb |L1.13| "
- "L1.7[106,120] 420s 13mb |---L1.7---| "
- "L1.8[121,135] 480s 13mb |---L1.8---| "
- "L1.9[136,150] 540s 13mb |---L1.9---| "
- "L2 "
- "L2.1[51,100] 0ns 100mb |-------------------L2.1-------------------| "
- "L2.2[101,150] 60s 100mb |-------------------L2.2-------------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 252mb total:"
- "**** 3 Output Files (parquet_file_id not yet assigned), 278mb total:"
- "L2 "
- "L2.?[51,91] 420s 101.82mb|---------------L2.?---------------| "
- "L2.?[92,131] 420s 99.27mb |--------------L2.?---------------| "
- "L2.?[132,150] 420s 50.91mb |-----L2.?-----| "
- "L2.?[51,87] 540s 101.09mb|-------------L2.?-------------| "
- "L2.?[88,123] 540s 98.28mb |------------L2.?-------------| "
- "L2.?[124,150] 540s 78.63mb |--------L2.?---------| "
- "Committing partition 1:"
- " Soft Deleting 7 files: L2.1, L2.2, L1.4, L1.5, L1.7, L1.12, L1.13"
- " Creating 3 files at level CompactionLevel::L2"
- " Soft Deleting 9 files: L2.1, L2.2, L1.4, L1.5, L1.7, L1.8, L1.9, L1.12, L1.13"
- " Creating 3 files"
- "**** Final Output Files "
- "L1 "
- "L1.8[121,135] 480s 13mb |L1.8-| "
- "L1.9[136,150] 540s 13mb |L1.9-| "
- "L1.10[151,165] 600s 13mb |L1.10| "
- "L1.11[201,215] 660s 13mb |L1.11| "
- "L2 "
- "L2.3[151,200] 120s 70mb |----------L2.3----------| "
- "L2.14[51,91] 420s 101.82mb|-------L2.14-------| "
- "L2.15[92,131] 420s 99.27mb |-------L2.15-------| "
- "L2.16[132,150] 420s 50.91mb |-L2.16-| "
- "L2.14[51,87] 540s 101.09mb|------L2.14------| "
- "L2.15[88,123] 540s 98.28mb |------L2.15------| "
- "L2.16[124,150] 540s 78.63mb |---L2.16----| "
"###
);
}
@ -243,7 +245,7 @@ async fn many_l1_overlaps_with_many_l2s() {
async fn many_good_size_l0_files() {
test_helpers::maybe_start_logging();
// Scenario when we have a lot of L0 files becasue the compactor cannot keep up with ingesters
// Scenario when we have a lot of L0 files because the compactor cannot keep up with ingesters
// and each files is a good size, 2MB, which lead to total size of 200 files (limit num files)
// greater than limit size (256MB)
@ -561,384 +563,365 @@ async fn many_good_size_l0_files() {
- "L0.286[285,286] 1ns |L0.286|"
- "L0.287[286,287] 1ns |L0.287|"
- "L0.288[287,288] 1ns |L0.288|"
- "**** Simulation run 0, type=split(split_times=[50, 100]). 128 Input Files, 256mb total:"
- "**** Simulation run 0, type=split(split_times=[50, 100]). 150 Input Files, 300mb total:"
- "L0, all files 2mb "
- "L0.1[0,1] 1ns |L0.1| "
- "L0.2[1,2] 1ns |L0.2| "
- "L0.3[2,3] 1ns |L0.3| "
- "L0.4[3,4] 1ns |L0.4| "
- "L0.4[3,4] 1ns |L0.4| "
- "L0.5[4,5] 1ns |L0.5| "
- "L0.6[5,6] 1ns |L0.6| "
- "L0.7[6,7] 1ns |L0.7| "
- "L0.7[6,7] 1ns |L0.7| "
- "L0.8[7,8] 1ns |L0.8| "
- "L0.9[8,9] 1ns |L0.9| "
- "L0.10[9,10] 1ns |L0.10| "
- "L0.11[10,11] 1ns |L0.11| "
- "L0.12[11,12] 1ns |L0.12| "
- "L0.13[12,13] 1ns |L0.13| "
- "L0.14[13,14] 1ns |L0.14| "
- "L0.15[14,15] 1ns |L0.15| "
- "L0.16[15,16] 1ns |L0.16| "
- "L0.17[16,17] 1ns |L0.17| "
- "L0.18[17,18] 1ns |L0.18| "
- "L0.19[18,19] 1ns |L0.19| "
- "L0.20[19,20] 1ns |L0.20| "
- "L0.21[20,21] 1ns |L0.21| "
- "L0.22[21,22] 1ns |L0.22| "
- "L0.23[22,23] 1ns |L0.23| "
- "L0.24[23,24] 1ns |L0.24| "
- "L0.25[24,25] 1ns |L0.25| "
- "L0.26[25,26] 1ns |L0.26| "
- "L0.27[26,27] 1ns |L0.27| "
- "L0.28[27,28] 1ns |L0.28| "
- "L0.29[28,29] 1ns |L0.29| "
- "L0.30[29,30] 1ns |L0.30| "
- "L0.31[30,31] 1ns |L0.31| "
- "L0.32[31,32] 1ns |L0.32| "
- "L0.33[32,33] 1ns |L0.33| "
- "L0.34[33,34] 1ns |L0.34| "
- "L0.35[34,35] 1ns |L0.35| "
- "L0.36[35,36] 1ns |L0.36| "
- "L0.37[36,37] 1ns |L0.37| "
- "L0.38[37,38] 1ns |L0.38| "
- "L0.39[38,39] 1ns |L0.39| "
- "L0.40[39,40] 1ns |L0.40| "
- "L0.41[40,41] 1ns |L0.41| "
- "L0.42[41,42] 1ns |L0.42| "
- "L0.43[42,43] 1ns |L0.43| "
- "L0.44[43,44] 1ns |L0.44| "
- "L0.45[44,45] 1ns |L0.45| "
- "L0.46[45,46] 1ns |L0.46| "
- "L0.47[46,47] 1ns |L0.47| "
- "L0.48[47,48] 1ns |L0.48| "
- "L0.49[48,49] 1ns |L0.49| "
- "L0.50[49,50] 1ns |L0.50| "
- "L0.51[50,51] 1ns |L0.51| "
- "L0.52[51,52] 1ns |L0.52| "
- "L0.53[52,53] 1ns |L0.53| "
- "L0.54[53,54] 1ns |L0.54| "
- "L0.55[54,55] 1ns |L0.55| "
- "L0.56[55,56] 1ns |L0.56| "
- "L0.57[56,57] 1ns |L0.57| "
- "L0.58[57,58] 1ns |L0.58| "
- "L0.59[58,59] 1ns |L0.59| "
- "L0.60[59,60] 1ns |L0.60| "
- "L0.61[60,61] 1ns |L0.61| "
- "L0.62[61,62] 1ns |L0.62| "
- "L0.63[62,63] 1ns |L0.63| "
- "L0.64[63,64] 1ns |L0.64| "
- "L0.65[64,65] 1ns |L0.65| "
- "L0.66[65,66] 1ns |L0.66| "
- "L0.67[66,67] 1ns |L0.67| "
- "L0.68[67,68] 1ns |L0.68| "
- "L0.69[68,69] 1ns |L0.69| "
- "L0.70[69,70] 1ns |L0.70| "
- "L0.71[70,71] 1ns |L0.71| "
- "L0.72[71,72] 1ns |L0.72| "
- "L0.73[72,73] 1ns |L0.73| "
- "L0.74[73,74] 1ns |L0.74| "
- "L0.75[74,75] 1ns |L0.75| "
- "L0.76[75,76] 1ns |L0.76| "
- "L0.77[76,77] 1ns |L0.77| "
- "L0.78[77,78] 1ns |L0.78| "
- "L0.79[78,79] 1ns |L0.79| "
- "L0.80[79,80] 1ns |L0.80| "
- "L0.81[80,81] 1ns |L0.81| "
- "L0.82[81,82] 1ns |L0.82| "
- "L0.83[82,83] 1ns |L0.83| "
- "L0.84[83,84] 1ns |L0.84| "
- "L0.85[84,85] 1ns |L0.85| "
- "L0.86[85,86] 1ns |L0.86| "
- "L0.87[86,87] 1ns |L0.87| "
- "L0.88[87,88] 1ns |L0.88| "
- "L0.89[88,89] 1ns |L0.89| "
- "L0.90[89,90] 1ns |L0.90| "
- "L0.91[90,91] 1ns |L0.91| "
- "L0.92[91,92] 1ns |L0.92| "
- "L0.93[92,93] 1ns |L0.93| "
- "L0.94[93,94] 1ns |L0.94| "
- "L0.95[94,95] 1ns |L0.95| "
- "L0.96[95,96] 1ns |L0.96| "
- "L0.97[96,97] 1ns |L0.97| "
- "L0.98[97,98] 1ns |L0.98| "
- "L0.99[98,99] 1ns |L0.99| "
- "L0.100[99,100] 1ns |L0.100| "
- "L0.101[100,101] 1ns |L0.101| "
- "L0.102[101,102] 1ns |L0.102| "
- "L0.103[102,103] 1ns |L0.103| "
- "L0.104[103,104] 1ns |L0.104| "
- "L0.105[104,105] 1ns |L0.105| "
- "L0.106[105,106] 1ns |L0.106| "
- "L0.107[106,107] 1ns |L0.107| "
- "L0.108[107,108] 1ns |L0.108| "
- "L0.109[108,109] 1ns |L0.109| "
- "L0.110[109,110] 1ns |L0.110| "
- "L0.111[110,111] 1ns |L0.111| "
- "L0.112[111,112] 1ns |L0.112| "
- "L0.113[112,113] 1ns |L0.113| "
- "L0.114[113,114] 1ns |L0.114| "
- "L0.115[114,115] 1ns |L0.115| "
- "L0.116[115,116] 1ns |L0.116| "
- "L0.117[116,117] 1ns |L0.117| "
- "L0.118[117,118] 1ns |L0.118|"
- "L0.119[118,119] 1ns |L0.119|"
- "L0.120[119,120] 1ns |L0.120|"
- "L0.121[120,121] 1ns |L0.121|"
- "L0.122[121,122] 1ns |L0.122|"
- "L0.123[122,123] 1ns |L0.123|"
- "L0.124[123,124] 1ns |L0.124|"
- "L0.125[124,125] 1ns |L0.125|"
- "L0.126[125,126] 1ns |L0.126|"
- "L0.127[126,127] 1ns |L0.127|"
- "L0.128[127,128] 1ns |L0.128|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 256mb total:"
- "L0.9[8,9] 1ns |L0.9| "
- "L0.10[9,10] 1ns |L0.10| "
- "L0.11[10,11] 1ns |L0.11| "
- "L0.12[11,12] 1ns |L0.12| "
- "L0.13[12,13] 1ns |L0.13| "
- "L0.14[13,14] 1ns |L0.14| "
- "L0.15[14,15] 1ns |L0.15| "
- "L0.16[15,16] 1ns |L0.16| "
- "L0.17[16,17] 1ns |L0.17| "
- "L0.18[17,18] 1ns |L0.18| "
- "L0.19[18,19] 1ns |L0.19| "
- "L0.20[19,20] 1ns |L0.20| "
- "L0.21[20,21] 1ns |L0.21| "
- "L0.22[21,22] 1ns |L0.22| "
- "L0.23[22,23] 1ns |L0.23| "
- "L0.24[23,24] 1ns |L0.24| "
- "L0.25[24,25] 1ns |L0.25| "
- "L0.26[25,26] 1ns |L0.26| "
- "L0.27[26,27] 1ns |L0.27| "
- "L0.28[27,28] 1ns |L0.28| "
- "L0.29[28,29] 1ns |L0.29| "
- "L0.30[29,30] 1ns |L0.30| "
- "L0.31[30,31] 1ns |L0.31| "
- "L0.32[31,32] 1ns |L0.32| "
- "L0.33[32,33] 1ns |L0.33| "
- "L0.34[33,34] 1ns |L0.34| "
- "L0.35[34,35] 1ns |L0.35| "
- "L0.36[35,36] 1ns |L0.36| "
- "L0.37[36,37] 1ns |L0.37| "
- "L0.38[37,38] 1ns |L0.38| "
- "L0.39[38,39] 1ns |L0.39| "
- "L0.40[39,40] 1ns |L0.40| "
- "L0.41[40,41] 1ns |L0.41| "
- "L0.42[41,42] 1ns |L0.42| "
- "L0.43[42,43] 1ns |L0.43| "
- "L0.44[43,44] 1ns |L0.44| "
- "L0.45[44,45] 1ns |L0.45| "
- "L0.46[45,46] 1ns |L0.46| "
- "L0.47[46,47] 1ns |L0.47| "
- "L0.48[47,48] 1ns |L0.48| "
- "L0.49[48,49] 1ns |L0.49| "
- "L0.50[49,50] 1ns |L0.50| "
- "L0.51[50,51] 1ns |L0.51| "
- "L0.52[51,52] 1ns |L0.52| "
- "L0.53[52,53] 1ns |L0.53| "
- "L0.54[53,54] 1ns |L0.54| "
- "L0.55[54,55] 1ns |L0.55| "
- "L0.56[55,56] 1ns |L0.56| "
- "L0.57[56,57] 1ns |L0.57| "
- "L0.58[57,58] 1ns |L0.58| "
- "L0.59[58,59] 1ns |L0.59| "
- "L0.60[59,60] 1ns |L0.60| "
- "L0.61[60,61] 1ns |L0.61| "
- "L0.62[61,62] 1ns |L0.62| "
- "L0.63[62,63] 1ns |L0.63| "
- "L0.64[63,64] 1ns |L0.64| "
- "L0.65[64,65] 1ns |L0.65| "
- "L0.66[65,66] 1ns |L0.66| "
- "L0.67[66,67] 1ns |L0.67| "
- "L0.68[67,68] 1ns |L0.68| "
- "L0.69[68,69] 1ns |L0.69| "
- "L0.70[69,70] 1ns |L0.70| "
- "L0.71[70,71] 1ns |L0.71| "
- "L0.72[71,72] 1ns |L0.72| "
- "L0.73[72,73] 1ns |L0.73| "
- "L0.74[73,74] 1ns |L0.74| "
- "L0.75[74,75] 1ns |L0.75| "
- "L0.76[75,76] 1ns |L0.76| "
- "L0.77[76,77] 1ns |L0.77| "
- "L0.78[77,78] 1ns |L0.78| "
- "L0.79[78,79] 1ns |L0.79| "
- "L0.80[79,80] 1ns |L0.80| "
- "L0.81[80,81] 1ns |L0.81| "
- "L0.82[81,82] 1ns |L0.82| "
- "L0.83[82,83] 1ns |L0.83| "
- "L0.84[83,84] 1ns |L0.84| "
- "L0.85[84,85] 1ns |L0.85| "
- "L0.86[85,86] 1ns |L0.86| "
- "L0.87[86,87] 1ns |L0.87| "
- "L0.88[87,88] 1ns |L0.88| "
- "L0.89[88,89] 1ns |L0.89| "
- "L0.90[89,90] 1ns |L0.90| "
- "L0.91[90,91] 1ns |L0.91| "
- "L0.92[91,92] 1ns |L0.92| "
- "L0.93[92,93] 1ns |L0.93| "
- "L0.94[93,94] 1ns |L0.94| "
- "L0.95[94,95] 1ns |L0.95| "
- "L0.96[95,96] 1ns |L0.96| "
- "L0.97[96,97] 1ns |L0.97| "
- "L0.98[97,98] 1ns |L0.98| "
- "L0.99[98,99] 1ns |L0.99| "
- "L0.100[99,100] 1ns |L0.100| "
- "L0.101[100,101] 1ns |L0.101| "
- "L0.102[101,102] 1ns |L0.102| "
- "L0.103[102,103] 1ns |L0.103| "
- "L0.104[103,104] 1ns |L0.104| "
- "L0.105[104,105] 1ns |L0.105| "
- "L0.106[105,106] 1ns |L0.106| "
- "L0.107[106,107] 1ns |L0.107| "
- "L0.108[107,108] 1ns |L0.108| "
- "L0.109[108,109] 1ns |L0.109| "
- "L0.110[109,110] 1ns |L0.110| "
- "L0.111[110,111] 1ns |L0.111| "
- "L0.112[111,112] 1ns |L0.112| "
- "L0.113[112,113] 1ns |L0.113| "
- "L0.114[113,114] 1ns |L0.114| "
- "L0.115[114,115] 1ns |L0.115| "
- "L0.116[115,116] 1ns |L0.116| "
- "L0.117[116,117] 1ns |L0.117| "
- "L0.118[117,118] 1ns |L0.118| "
- "L0.119[118,119] 1ns |L0.119| "
- "L0.120[119,120] 1ns |L0.120| "
- "L0.121[120,121] 1ns |L0.121| "
- "L0.122[121,122] 1ns |L0.122| "
- "L0.123[122,123] 1ns |L0.123| "
- "L0.124[123,124] 1ns |L0.124| "
- "L0.125[124,125] 1ns |L0.125| "
- "L0.126[125,126] 1ns |L0.126| "
- "L0.127[126,127] 1ns |L0.127| "
- "L0.128[127,128] 1ns |L0.128| "
- "L0.129[128,129] 1ns |L0.129| "
- "L0.130[129,130] 1ns |L0.130| "
- "L0.131[130,131] 1ns |L0.131| "
- "L0.132[131,132] 1ns |L0.132| "
- "L0.133[132,133] 1ns |L0.133| "
- "L0.134[133,134] 1ns |L0.134| "
- "L0.135[134,135] 1ns |L0.135| "
- "L0.136[135,136] 1ns |L0.136| "
- "L0.137[136,137] 1ns |L0.137| "
- "L0.138[137,138] 1ns |L0.138|"
- "L0.139[138,139] 1ns |L0.139|"
- "L0.140[139,140] 1ns |L0.140|"
- "L0.141[140,141] 1ns |L0.141|"
- "L0.142[141,142] 1ns |L0.142|"
- "L0.143[142,143] 1ns |L0.143|"
- "L0.144[143,144] 1ns |L0.144|"
- "L0.145[144,145] 1ns |L0.145|"
- "L0.146[145,146] 1ns |L0.146|"
- "L0.147[146,147] 1ns |L0.147|"
- "L0.148[147,148] 1ns |L0.148|"
- "L0.149[148,149] 1ns |L0.149|"
- "L0.150[149,150] 1ns |L0.150|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 300mb total:"
- "L0 "
- "L0.?[0,50] 1ns 100mb |--------------L0.?---------------| "
- "L0.?[51,100] 1ns 98mb |--------------L0.?--------------| "
- "L0.?[101,128] 1ns 58mb |------L0.?------| "
- "L0.?[0,50] 1ns 100mb |------------L0.?------------| "
- "L0.?[51,100] 1ns 98mb |-----------L0.?------------| "
- "L0.?[101,150] 1ns 102mb |-----------L0.?------------| "
- "Committing partition 1:"
- " Soft Deleting 128 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128"
- " Creating 3 files at level CompactionLevel::L0"
- "**** Simulation run 1, type=split(split_times=[178, 228]). 128 Input Files, 256mb total:"
- " Soft Deleting 150 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128, L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150"
- " Creating 3 files"
- "**** Simulation run 1, type=split(split_times=[200, 250]). 138 Input Files, 276mb total:"
- "L0, all files 2mb "
- "L0.129[128,129] 1ns |L0.129| "
- "L0.130[129,130] 1ns |L0.130| "
- "L0.131[130,131] 1ns |L0.131| "
- "L0.132[131,132] 1ns |L0.132| "
- "L0.133[132,133] 1ns |L0.133| "
- "L0.134[133,134] 1ns |L0.134| "
- "L0.135[134,135] 1ns |L0.135| "
- "L0.136[135,136] 1ns |L0.136| "
- "L0.137[136,137] 1ns |L0.137| "
- "L0.138[137,138] 1ns |L0.138| "
- "L0.139[138,139] 1ns |L0.139| "
- "L0.140[139,140] 1ns |L0.140| "
- "L0.141[140,141] 1ns |L0.141| "
- "L0.142[141,142] 1ns |L0.142| "
- "L0.143[142,143] 1ns |L0.143| "
- "L0.144[143,144] 1ns |L0.144| "
- "L0.145[144,145] 1ns |L0.145| "
- "L0.146[145,146] 1ns |L0.146| "
- "L0.147[146,147] 1ns |L0.147| "
- "L0.148[147,148] 1ns |L0.148| "
- "L0.149[148,149] 1ns |L0.149| "
- "L0.150[149,150] 1ns |L0.150| "
- "L0.151[150,151] 1ns |L0.151| "
- "L0.152[151,152] 1ns |L0.152| "
- "L0.153[152,153] 1ns |L0.153| "
- "L0.154[153,154] 1ns |L0.154| "
- "L0.155[154,155] 1ns |L0.155| "
- "L0.156[155,156] 1ns |L0.156| "
- "L0.157[156,157] 1ns |L0.157| "
- "L0.158[157,158] 1ns |L0.158| "
- "L0.159[158,159] 1ns |L0.159| "
- "L0.160[159,160] 1ns |L0.160| "
- "L0.161[160,161] 1ns |L0.161| "
- "L0.162[161,162] 1ns |L0.162| "
- "L0.163[162,163] 1ns |L0.163| "
- "L0.164[163,164] 1ns |L0.164| "
- "L0.165[164,165] 1ns |L0.165| "
- "L0.166[165,166] 1ns |L0.166| "
- "L0.167[166,167] 1ns |L0.167| "
- "L0.168[167,168] 1ns |L0.168| "
- "L0.169[168,169] 1ns |L0.169| "
- "L0.170[169,170] 1ns |L0.170| "
- "L0.171[170,171] 1ns |L0.171| "
- "L0.172[171,172] 1ns |L0.172| "
- "L0.173[172,173] 1ns |L0.173| "
- "L0.174[173,174] 1ns |L0.174| "
- "L0.175[174,175] 1ns |L0.175| "
- "L0.176[175,176] 1ns |L0.176| "
- "L0.177[176,177] 1ns |L0.177| "
- "L0.178[177,178] 1ns |L0.178| "
- "L0.179[178,179] 1ns |L0.179| "
- "L0.180[179,180] 1ns |L0.180| "
- "L0.181[180,181] 1ns |L0.181| "
- "L0.182[181,182] 1ns |L0.182| "
- "L0.183[182,183] 1ns |L0.183| "
- "L0.184[183,184] 1ns |L0.184| "
- "L0.185[184,185] 1ns |L0.185| "
- "L0.186[185,186] 1ns |L0.186| "
- "L0.187[186,187] 1ns |L0.187| "
- "L0.188[187,188] 1ns |L0.188| "
- "L0.189[188,189] 1ns |L0.189| "
- "L0.190[189,190] 1ns |L0.190| "
- "L0.191[190,191] 1ns |L0.191| "
- "L0.192[191,192] 1ns |L0.192| "
- "L0.193[192,193] 1ns |L0.193| "
- "L0.194[193,194] 1ns |L0.194| "
- "L0.195[194,195] 1ns |L0.195| "
- "L0.196[195,196] 1ns |L0.196| "
- "L0.197[196,197] 1ns |L0.197| "
- "L0.198[197,198] 1ns |L0.198| "
- "L0.199[198,199] 1ns |L0.199| "
- "L0.200[199,200] 1ns |L0.200| "
- "L0.201[200,201] 1ns |L0.201| "
- "L0.202[201,202] 1ns |L0.202| "
- "L0.203[202,203] 1ns |L0.203| "
- "L0.204[203,204] 1ns |L0.204| "
- "L0.205[204,205] 1ns |L0.205| "
- "L0.206[205,206] 1ns |L0.206| "
- "L0.207[206,207] 1ns |L0.207| "
- "L0.208[207,208] 1ns |L0.208| "
- "L0.209[208,209] 1ns |L0.209| "
- "L0.210[209,210] 1ns |L0.210| "
- "L0.211[210,211] 1ns |L0.211| "
- "L0.212[211,212] 1ns |L0.212| "
- "L0.213[212,213] 1ns |L0.213| "
- "L0.214[213,214] 1ns |L0.214| "
- "L0.215[214,215] 1ns |L0.215| "
- "L0.216[215,216] 1ns |L0.216| "
- "L0.217[216,217] 1ns |L0.217| "
- "L0.218[217,218] 1ns |L0.218| "
- "L0.219[218,219] 1ns |L0.219| "
- "L0.220[219,220] 1ns |L0.220| "
- "L0.221[220,221] 1ns |L0.221| "
- "L0.222[221,222] 1ns |L0.222| "
- "L0.223[222,223] 1ns |L0.223| "
- "L0.224[223,224] 1ns |L0.224| "
- "L0.225[224,225] 1ns |L0.225| "
- "L0.226[225,226] 1ns |L0.226| "
- "L0.227[226,227] 1ns |L0.227| "
- "L0.228[227,228] 1ns |L0.228| "
- "L0.229[228,229] 1ns |L0.229| "
- "L0.230[229,230] 1ns |L0.230| "
- "L0.231[230,231] 1ns |L0.231| "
- "L0.232[231,232] 1ns |L0.232| "
- "L0.233[232,233] 1ns |L0.233| "
- "L0.234[233,234] 1ns |L0.234| "
- "L0.235[234,235] 1ns |L0.235| "
- "L0.236[235,236] 1ns |L0.236| "
- "L0.237[236,237] 1ns |L0.237| "
- "L0.238[237,238] 1ns |L0.238| "
- "L0.239[238,239] 1ns |L0.239| "
- "L0.240[239,240] 1ns |L0.240| "
- "L0.241[240,241] 1ns |L0.241| "
- "L0.242[241,242] 1ns |L0.242| "
- "L0.243[242,243] 1ns |L0.243| "
- "L0.244[243,244] 1ns |L0.244| "
- "L0.245[244,245] 1ns |L0.245| "
- "L0.246[245,246] 1ns |L0.246|"
- "L0.247[246,247] 1ns |L0.247|"
- "L0.248[247,248] 1ns |L0.248|"
- "L0.249[248,249] 1ns |L0.249|"
- "L0.250[249,250] 1ns |L0.250|"
- "L0.251[250,251] 1ns |L0.251|"
- "L0.252[251,252] 1ns |L0.252|"
- "L0.253[252,253] 1ns |L0.253|"
- "L0.254[253,254] 1ns |L0.254|"
- "L0.255[254,255] 1ns |L0.255|"
- "L0.256[255,256] 1ns |L0.256|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 256mb total:"
- "L0.151[150,151] 1ns |L0.151| "
- "L0.152[151,152] 1ns |L0.152| "
- "L0.153[152,153] 1ns |L0.153| "
- "L0.154[153,154] 1ns |L0.154| "
- "L0.155[154,155] 1ns |L0.155| "
- "L0.156[155,156] 1ns |L0.156| "
- "L0.157[156,157] 1ns |L0.157| "
- "L0.158[157,158] 1ns |L0.158| "
- "L0.159[158,159] 1ns |L0.159| "
- "L0.160[159,160] 1ns |L0.160| "
- "L0.161[160,161] 1ns |L0.161| "
- "L0.162[161,162] 1ns |L0.162| "
- "L0.163[162,163] 1ns |L0.163| "
- "L0.164[163,164] 1ns |L0.164| "
- "L0.165[164,165] 1ns |L0.165| "
- "L0.166[165,166] 1ns |L0.166| "
- "L0.167[166,167] 1ns |L0.167| "
- "L0.168[167,168] 1ns |L0.168| "
- "L0.169[168,169] 1ns |L0.169| "
- "L0.170[169,170] 1ns |L0.170| "
- "L0.171[170,171] 1ns |L0.171| "
- "L0.172[171,172] 1ns |L0.172| "
- "L0.173[172,173] 1ns |L0.173| "
- "L0.174[173,174] 1ns |L0.174| "
- "L0.175[174,175] 1ns |L0.175| "
- "L0.176[175,176] 1ns |L0.176| "
- "L0.177[176,177] 1ns |L0.177| "
- "L0.178[177,178] 1ns |L0.178| "
- "L0.179[178,179] 1ns |L0.179| "
- "L0.180[179,180] 1ns |L0.180| "
- "L0.181[180,181] 1ns |L0.181| "
- "L0.182[181,182] 1ns |L0.182| "
- "L0.183[182,183] 1ns |L0.183| "
- "L0.184[183,184] 1ns |L0.184| "
- "L0.185[184,185] 1ns |L0.185| "
- "L0.186[185,186] 1ns |L0.186| "
- "L0.187[186,187] 1ns |L0.187| "
- "L0.188[187,188] 1ns |L0.188| "
- "L0.189[188,189] 1ns |L0.189| "
- "L0.190[189,190] 1ns |L0.190| "
- "L0.191[190,191] 1ns |L0.191| "
- "L0.192[191,192] 1ns |L0.192| "
- "L0.193[192,193] 1ns |L0.193| "
- "L0.194[193,194] 1ns |L0.194| "
- "L0.195[194,195] 1ns |L0.195| "
- "L0.196[195,196] 1ns |L0.196| "
- "L0.197[196,197] 1ns |L0.197| "
- "L0.198[197,198] 1ns |L0.198| "
- "L0.199[198,199] 1ns |L0.199| "
- "L0.200[199,200] 1ns |L0.200| "
- "L0.201[200,201] 1ns |L0.201| "
- "L0.202[201,202] 1ns |L0.202| "
- "L0.203[202,203] 1ns |L0.203| "
- "L0.204[203,204] 1ns |L0.204| "
- "L0.205[204,205] 1ns |L0.205| "
- "L0.206[205,206] 1ns |L0.206| "
- "L0.207[206,207] 1ns |L0.207| "
- "L0.208[207,208] 1ns |L0.208| "
- "L0.209[208,209] 1ns |L0.209| "
- "L0.210[209,210] 1ns |L0.210| "
- "L0.211[210,211] 1ns |L0.211| "
- "L0.212[211,212] 1ns |L0.212| "
- "L0.213[212,213] 1ns |L0.213| "
- "L0.214[213,214] 1ns |L0.214| "
- "L0.215[214,215] 1ns |L0.215| "
- "L0.216[215,216] 1ns |L0.216| "
- "L0.217[216,217] 1ns |L0.217| "
- "L0.218[217,218] 1ns |L0.218| "
- "L0.219[218,219] 1ns |L0.219| "
- "L0.220[219,220] 1ns |L0.220| "
- "L0.221[220,221] 1ns |L0.221| "
- "L0.222[221,222] 1ns |L0.222| "
- "L0.223[222,223] 1ns |L0.223| "
- "L0.224[223,224] 1ns |L0.224| "
- "L0.225[224,225] 1ns |L0.225| "
- "L0.226[225,226] 1ns |L0.226| "
- "L0.227[226,227] 1ns |L0.227| "
- "L0.228[227,228] 1ns |L0.228| "
- "L0.229[228,229] 1ns |L0.229| "
- "L0.230[229,230] 1ns |L0.230| "
- "L0.231[230,231] 1ns |L0.231| "
- "L0.232[231,232] 1ns |L0.232| "
- "L0.233[232,233] 1ns |L0.233| "
- "L0.234[233,234] 1ns |L0.234| "
- "L0.235[234,235] 1ns |L0.235| "
- "L0.236[235,236] 1ns |L0.236| "
- "L0.237[236,237] 1ns |L0.237| "
- "L0.238[237,238] 1ns |L0.238| "
- "L0.239[238,239] 1ns |L0.239| "
- "L0.240[239,240] 1ns |L0.240| "
- "L0.241[240,241] 1ns |L0.241| "
- "L0.242[241,242] 1ns |L0.242| "
- "L0.243[242,243] 1ns |L0.243| "
- "L0.244[243,244] 1ns |L0.244| "
- "L0.245[244,245] 1ns |L0.245| "
- "L0.246[245,246] 1ns |L0.246| "
- "L0.247[246,247] 1ns |L0.247| "
- "L0.248[247,248] 1ns |L0.248| "
- "L0.249[248,249] 1ns |L0.249| "
- "L0.250[249,250] 1ns |L0.250| "
- "L0.251[250,251] 1ns |L0.251| "
- "L0.252[251,252] 1ns |L0.252| "
- "L0.253[252,253] 1ns |L0.253| "
- "L0.254[253,254] 1ns |L0.254| "
- "L0.255[254,255] 1ns |L0.255| "
- "L0.256[255,256] 1ns |L0.256| "
- "L0.257[256,257] 1ns |L0.257| "
- "L0.258[257,258] 1ns |L0.258| "
- "L0.259[258,259] 1ns |L0.259| "
- "L0.260[259,260] 1ns |L0.260| "
- "L0.261[260,261] 1ns |L0.261| "
- "L0.262[261,262] 1ns |L0.262| "
- "L0.263[262,263] 1ns |L0.263| "
- "L0.264[263,264] 1ns |L0.264| "
- "L0.265[264,265] 1ns |L0.265| "
- "L0.266[265,266] 1ns |L0.266| "
- "L0.267[266,267] 1ns |L0.267| "
- "L0.268[267,268] 1ns |L0.268| "
- "L0.269[268,269] 1ns |L0.269| "
- "L0.270[269,270] 1ns |L0.270| "
- "L0.271[270,271] 1ns |L0.271| "
- "L0.272[271,272] 1ns |L0.272| "
- "L0.273[272,273] 1ns |L0.273| "
- "L0.274[273,274] 1ns |L0.274| "
- "L0.275[274,275] 1ns |L0.275| "
- "L0.276[275,276] 1ns |L0.276| "
- "L0.277[276,277] 1ns |L0.277|"
- "L0.278[277,278] 1ns |L0.278|"
- "L0.279[278,279] 1ns |L0.279|"
- "L0.280[279,280] 1ns |L0.280|"
- "L0.281[280,281] 1ns |L0.281|"
- "L0.282[281,282] 1ns |L0.282|"
- "L0.283[282,283] 1ns |L0.283|"
- "L0.284[283,284] 1ns |L0.284|"
- "L0.285[284,285] 1ns |L0.285|"
- "L0.286[285,286] 1ns |L0.286|"
- "L0.287[286,287] 1ns |L0.287|"
- "L0.288[287,288] 1ns |L0.288|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 276mb total:"
- "L0 "
- "L0.?[128,178] 1ns 100mb |--------------L0.?---------------| "
- "L0.?[179,228] 1ns 98mb |--------------L0.?--------------| "
- "L0.?[229,256] 1ns 58mb |------L0.?------| "
- "L0.?[150,200] 1ns 100mb |-------------L0.?-------------| "
- "L0.?[201,250] 1ns 98mb |------------L0.?-------------| "
- "L0.?[251,288] 1ns 78mb |---------L0.?---------| "
- "Committing partition 1:"
- " Soft Deleting 128 files: L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150, L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190, L0.191, L0.192, L0.193, L0.194, L0.195, L0.196, L0.197, L0.198, L0.199, L0.200, L0.201, L0.202, L0.203, L0.204, L0.205, L0.206, L0.207, L0.208, L0.209, L0.210, L0.211, L0.212, L0.213, L0.214, L0.215, L0.216, L0.217, L0.218, L0.219, L0.220, L0.221, L0.222, L0.223, L0.224, L0.225, L0.226, L0.227, L0.228, L0.229, L0.230, L0.231, L0.232, L0.233, L0.234, L0.235, L0.236, L0.237, L0.238, L0.239, L0.240, L0.241, L0.242, L0.243, L0.244, L0.245, L0.246, L0.247, L0.248, L0.249, L0.250, L0.251, L0.252, L0.253, L0.254, L0.255, L0.256"
- " Creating 3 files at level CompactionLevel::L0"
- "**** Simulation run 2, type=split(split_times=[281]). 32 Input Files, 64mb total:"
- "L0, all files 2mb "
- "L0.257[256,257] 1ns |L0.257| "
- "L0.258[257,258] 1ns |L0.258| "
- "L0.259[258,259] 1ns |L0.259| "
- "L0.260[259,260] 1ns |L0.260| "
- "L0.261[260,261] 1ns |L0.261| "
- "L0.262[261,262] 1ns |L0.262| "
- "L0.263[262,263] 1ns |L0.263| "
- "L0.264[263,264] 1ns |L0.264| "
- "L0.265[264,265] 1ns |L0.265| "
- "L0.266[265,266] 1ns |L0.266| "
- "L0.267[266,267] 1ns |L0.267| "
- "L0.268[267,268] 1ns |L0.268| "
- "L0.269[268,269] 1ns |L0.269| "
- "L0.270[269,270] 1ns |L0.270| "
- "L0.271[270,271] 1ns |L0.271| "
- "L0.272[271,272] 1ns |L0.272| "
- "L0.273[272,273] 1ns |L0.273| "
- "L0.274[273,274] 1ns |L0.274| "
- "L0.275[274,275] 1ns |L0.275| "
- "L0.276[275,276] 1ns |L0.276| "
- "L0.277[276,277] 1ns |L0.277| "
- "L0.278[277,278] 1ns |L0.278| "
- "L0.279[278,279] 1ns |L0.279| "
- "L0.280[279,280] 1ns |L0.280| "
- "L0.281[280,281] 1ns |L0.281| "
- "L0.282[281,282] 1ns |L0.282| "
- "L0.283[282,283] 1ns |L0.283| "
- "L0.284[283,284] 1ns |L0.284| "
- "L0.285[284,285] 1ns |L0.285| "
- "L0.286[285,286] 1ns |L0.286| "
- "L0.287[286,287] 1ns |L0.287|"
- "L0.288[287,288] 1ns |L0.288|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 64mb total:"
- " Soft Deleting 138 files: L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190, L0.191, L0.192, L0.193, L0.194, L0.195, L0.196, L0.197, L0.198, L0.199, L0.200, L0.201, L0.202, L0.203, L0.204, L0.205, L0.206, L0.207, L0.208, L0.209, L0.210, L0.211, L0.212, L0.213, L0.214, L0.215, L0.216, L0.217, L0.218, L0.219, L0.220, L0.221, L0.222, L0.223, L0.224, L0.225, L0.226, L0.227, L0.228, L0.229, L0.230, L0.231, L0.232, L0.233, L0.234, L0.235, L0.236, L0.237, L0.238, L0.239, L0.240, L0.241, L0.242, L0.243, L0.244, L0.245, L0.246, L0.247, L0.248, L0.249, L0.250, L0.251, L0.252, L0.253, L0.254, L0.255, L0.256, L0.257, L0.258, L0.259, L0.260, L0.261, L0.262, L0.263, L0.264, L0.265, L0.266, L0.267, L0.268, L0.269, L0.270, L0.271, L0.272, L0.273, L0.274, L0.275, L0.276, L0.277, L0.278, L0.279, L0.280, L0.281, L0.282, L0.283, L0.284, L0.285, L0.286, L0.287, L0.288"
- " Creating 3 files"
- "**** Simulation run 2, type=split(split_times=[200, 250]). 3 Input Files, 276mb total:"
- "L0 "
- "L0.?[256,281] 1ns 50mb |--------------------------------L0.?--------------------------------| "
- "L0.?[282,288] 1ns 14mb |-----L0.?-----| "
- "Committing partition 1:"
- " Soft Deleting 32 files: L0.257, L0.258, L0.259, L0.260, L0.261, L0.262, L0.263, L0.264, L0.265, L0.266, L0.267, L0.268, L0.269, L0.270, L0.271, L0.272, L0.273, L0.274, L0.275, L0.276, L0.277, L0.278, L0.279, L0.280, L0.281, L0.282, L0.283, L0.284, L0.285, L0.286, L0.287, L0.288"
- " Creating 2 files at level CompactionLevel::L0"
- "**** Simulation run 3, type=split(split_times=[229, 279]). 4 Input Files, 220mb total:"
- "L0 "
- "L0.296[282,288] 1ns 14mb |L0.296|"
- "L0.295[256,281] 1ns 50mb |------L0.295------| "
- "L0.294[229,256] 1ns 58mb |-------L0.294-------| "
- "L0.293[179,228] 1ns 98mb |----------------L0.293----------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 220mb total:"
- "L0.294[251,288] 1ns 78mb |--------L0.294--------| "
- "L0.293[201,250] 1ns 98mb |-----------L0.293------------| "
- "L0.292[150,200] 1ns 100mb|------------L0.292------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 276mb total:"
- "L1 "
- "L1.?[179,229] 1ns 100.92mb|-----------------L1.?------------------| "
- "L1.?[230,279] 1ns 98.9mb |-----------------L1.?-----------------| "
- "L1.?[280,288] 1ns 20.18mb |L1.?| "
- "L1.?[150,200] 1ns 100mb |-------------L1.?-------------| "
- "L1.?[201,250] 1ns 98mb |------------L1.?-------------| "
- "L1.?[251,288] 1ns 78mb |---------L1.?---------| "
- "Committing partition 1:"
- " Soft Deleting 4 files: L0.293, L0.294, L0.295, L0.296"
- " Soft Deleting 3 files: L0.292, L0.293, L0.294"
- " Upgrading 1 files level to CompactionLevel::L1: L0.289"
- " Creating 3 files at level CompactionLevel::L1"
- "**** Simulation run 4, type=split(split_times=[101, 151]). 3 Input Files, 256mb total:"
- " Creating 3 files"
- "**** Simulation run 3, type=split(split_times=[101, 151]). 3 Input Files, 300mb total:"
- "L0 "
- "L0.290[51,100] 1ns 98mb |-------------L0.290-------------| "
- "L0.291[101,128] 1ns 58mb |-----L0.291------| "
- "L0.292[128,178] 1ns 100mb |-------------L0.292--------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 256mb total:"
- "L0.290[51,100] 1ns 98mb |----------L0.290-----------| "
- "L0.291[101,150] 1ns 102mb |----------L0.291-----------| "
- "L1 "
- "L1.?[51,101] 1ns 100.79mb|--------------L1.?---------------| "
- "L1.?[102,151] 1ns 98.77mb |--------------L1.?--------------| "
- "L1.?[152,178] 1ns 56.44mb |------L1.?------| "
- "L1.295[150,200] 1ns 100mb |-----------L1.295-----------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 300mb total:"
- "L1 "
- "L1.?[51,101] 1ns 100.67mb|------------L1.?------------| "
- "L1.?[102,151] 1ns 98.66mb |-----------L1.?------------| "
- "L1.?[152,200] 1ns 100.67mb |-----------L1.?-----------| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.290, L0.291, L0.292"
- " Creating 3 files at level CompactionLevel::L1"
- "**** Simulation run 5, type=split(split_times=[151]). 2 Input Files, 155.21mb total:"
- " Soft Deleting 3 files: L0.290, L0.291, L1.295"
- " Creating 3 files"
- "**** Simulation run 4, type=split(split_times=[152, 202]). 3 Input Files, 297.33mb total:"
- "L1 "
- "L1.301[102,151] 1ns 98.77mb|-------------------------L1.301-------------------------| "
- "L1.302[152,178] 1ns 56.44mb |-----------L1.302-----------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 155.21mb total:"
- "L1.299[102,151] 1ns 98.66mb|----------L1.299-----------| "
- "L1.300[152,200] 1ns 100.67mb |----------L1.300-----------| "
- "L1.296[201,250] 1ns 98mb |----------L1.296-----------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 297.33mb total:"
- "L2 "
- "L2.?[102,151] 1ns 100.07mb|--------------------------L2.?--------------------------| "
- "L2.?[152,178] 1ns 55.14mb |------------L2.?------------| "
- "L2.?[102,152] 1ns 100.45mb|------------L2.?------------| "
- "L2.?[153,202] 1ns 98.44mb |-----------L2.?------------| "
- "L2.?[203,250] 1ns 98.44mb |-----------L2.?-----------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.301, L1.302"
- " Upgrading 2 files level to CompactionLevel::L2: L1.289, L1.300"
- " Creating 2 files at level CompactionLevel::L2"
- "**** Simulation run 6, type=split(split_times=[276]). 2 Input Files, 119.08mb total:"
- "L1 "
- "L1.299[280,288] 1ns 20.18mb |--L1.299--| "
- "L1.298[230,279] 1ns 98.9mb|----------------------------------L1.298----------------------------------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 119.08mb total:"
- "L2 "
- "L2.?[230,276] 1ns 94.44mb|--------------------------------L2.?---------------------------------| "
- "L2.?[277,288] 1ns 24.64mb |-----L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.298, L1.299"
- " Upgrading 1 files level to CompactionLevel::L2: L1.297"
- " Creating 2 files at level CompactionLevel::L2"
- " Soft Deleting 3 files: L1.296, L1.299, L1.300"
- " Upgrading 2 files level to CompactionLevel::L2: L1.289, L1.298"
- " Creating 3 files"
- "**** Final Output Files "
- "L1 "
- "L1.297[251,288] 1ns 78mb |-L1.297--| "
- "L2 "
- "L2.289[0,50] 1ns 100mb |---L2.289----| "
- "L2.297[179,229] 1ns 100.92mb |---L2.297----| "
- "L2.300[51,101] 1ns 100.79mb |---L2.300----| "
- "L2.303[102,151] 1ns 100.07mb |---L2.303----| "
- "L2.304[152,178] 1ns 55.14mb |L2.304| "
- "L2.305[230,276] 1ns 94.44mb |---L2.305---| "
- "L2.306[277,288] 1ns 24.64mb |L2.306|"
- "L2.298[51,101] 1ns 100.67mb |---L2.298----| "
- "L2.301[102,152] 1ns 100.45mb |---L2.301----| "
- "L2.302[153,202] 1ns 98.44mb |---L2.302----| "
- "L2.303[203,250] 1ns 98.44mb |---L2.303---| "
"###
);
}

View File

@ -87,7 +87,7 @@ async fn many_l0_files_different_created_order() {
- "L0.?[10,42] 2ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.1, L0.2"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 1, type=compact. 2 Input Files, 5.1kb total:"
- "L0, all files 2.55kb "
- "L0.3[20,32] 3ns |-------------L0.3--------------| "
@ -97,7 +97,7 @@ async fn many_l0_files_different_created_order() {
- "L0.?[20,52] 4ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.3, L0.4"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 2, type=compact. 2 Input Files, 10.2kb total:"
- "L0, all files 5.1kb "
- "L0.6[20,52] 4ns |-------------------------------L0.6-------------------------------| "
@ -107,7 +107,7 @@ async fn many_l0_files_different_created_order() {
- "L1.?[10,52] 4ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.5, L0.6"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Final Output Files "
- "L1, all files 10.2kb "
- "L1.7[10,52] 4ns |------------------------------------------L1.7------------------------------------------|"
@ -197,7 +197,7 @@ async fn many_l1_files_different_created_order() {
- "L1.?[11,30] 3ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.1, L1.3"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=compact. 2 Input Files, 5.1kb total:"
- "L1, all files 2.55kb "
- "L1.2[31,40] 2ns |------------------L1.2------------------| "
@ -207,7 +207,7 @@ async fn many_l1_files_different_created_order() {
- "L1.?[31,50] 4ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.2, L1.4"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 2, type=compact. 2 Input Files, 10.2kb total:"
- "L1, all files 5.1kb "
- "L1.6[31,50] 4ns |------------------L1.6-------------------| "
@ -217,7 +217,7 @@ async fn many_l1_files_different_created_order() {
- "L2.?[11,50] 4ns |------------------------------------------L2.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.5, L1.6"
- " Creating 1 files at level CompactionLevel::L2"
- " Creating 1 files"
- "**** Final Output Files "
- "L2, all files 10.2kb "
- "L2.7[11,50] 4ns |------------------------------------------L2.7------------------------------------------|"
@ -305,7 +305,7 @@ async fn many_l0_files_different_created_order_non_overlap() {
- "L0.?[11,40] 2ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.1, L0.2"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 1, type=compact. 2 Input Files, 5.1kb total:"
- "L0, all files 2.55kb "
- "L0.3[21,30] 3ns |----------L0.3-----------| "
@ -315,7 +315,7 @@ async fn many_l0_files_different_created_order_non_overlap() {
- "L0.?[21,50] 4ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.3, L0.4"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 2, type=compact. 2 Input Files, 10.2kb total:"
- "L0, all files 5.1kb "
- "L0.6[21,50] 4ns |------------------------------L0.6------------------------------| "
@ -325,7 +325,7 @@ async fn many_l0_files_different_created_order_non_overlap() {
- "L1.?[11,50] 4ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.5, L0.6"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Final Output Files "
- "L1, all files 10.2kb "
- "L1.7[11,50] 4ns |------------------------------------------L1.7------------------------------------------|"
@ -407,7 +407,7 @@ async fn many_l1_files() {
- "L1.?[24,25] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 4 files: L1.13, L0.21, L0.22, L0.23"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=split(split_times=[20]). 20 Input Files, 203mb total:"
- "L1 "
- "L1.14[26,27] 1ns 10mb |L1.14| "
@ -436,7 +436,7 @@ async fn many_l1_files() {
- "L2.?[21,39] 1ns 98.9mb |-----------------L2.?------------------| "
- "Committing partition 1:"
- " Soft Deleting 20 files: L1.1, L1.2, L1.3, L1.4, L1.5, L1.6, L1.7, L1.8, L1.9, L1.10, L1.11, L1.12, L1.14, L1.15, L1.16, L1.17, L1.18, L1.19, L1.20, L1.24"
- " Creating 2 files at level CompactionLevel::L2"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.25[0,20] 1ns 104.1mb |-------------------L2.25--------------------| "
@ -975,7 +975,7 @@ async fn many_tiny_l0_files() {
- "L0.?[0,200] 1ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 200 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128, L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150, L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190, L0.191, L0.192, L0.193, L0.194, L0.195, L0.196, L0.197, L0.198, L0.199, L0.200"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 1, type=compact. 88 Input Files, 616kb total:"
- "L0, all files 7kb "
- "L0.201[200,201] 1ns |L0.201| "
@ -1071,7 +1071,7 @@ async fn many_tiny_l0_files() {
- "L0.?[200,288] 1ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 88 files: L0.201, L0.202, L0.203, L0.204, L0.205, L0.206, L0.207, L0.208, L0.209, L0.210, L0.211, L0.212, L0.213, L0.214, L0.215, L0.216, L0.217, L0.218, L0.219, L0.220, L0.221, L0.222, L0.223, L0.224, L0.225, L0.226, L0.227, L0.228, L0.229, L0.230, L0.231, L0.232, L0.233, L0.234, L0.235, L0.236, L0.237, L0.238, L0.239, L0.240, L0.241, L0.242, L0.243, L0.244, L0.245, L0.246, L0.247, L0.248, L0.249, L0.250, L0.251, L0.252, L0.253, L0.254, L0.255, L0.256, L0.257, L0.258, L0.259, L0.260, L0.261, L0.262, L0.263, L0.264, L0.265, L0.266, L0.267, L0.268, L0.269, L0.270, L0.271, L0.272, L0.273, L0.274, L0.275, L0.276, L0.277, L0.278, L0.279, L0.280, L0.281, L0.282, L0.283, L0.284, L0.285, L0.286, L0.287, L0.288"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 2, type=compact. 2 Input Files, 1.97mb total:"
- "L0 "
- "L0.290[200,288] 1ns 616kb |---------L0.290----------| "
@ -1081,7 +1081,7 @@ async fn many_tiny_l0_files() {
- "L1.?[0,288] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L0.289, L0.290"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Final Output Files "
- "L1, all files 1.97mb "
- "L1.291[0,288] 1ns |-----------------------------------------L1.291-----------------------------------------|"
@ -1739,7 +1739,7 @@ async fn over_two_times_max_files_per_plan() {
- "L0.?[0,200] 1ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 200 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128, L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150, L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190, L0.191, L0.192, L0.193, L0.194, L0.195, L0.196, L0.197, L0.198, L0.199, L0.200"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 1, type=compact. 200 Input Files, 1.37mb total:"
- "L0, all files 7kb "
- "L0.201[200,201] 1ns |L0.201| "
@ -1947,7 +1947,7 @@ async fn over_two_times_max_files_per_plan() {
- "L0.?[200,400] 1ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 200 files: L0.201, L0.202, L0.203, L0.204, L0.205, L0.206, L0.207, L0.208, L0.209, L0.210, L0.211, L0.212, L0.213, L0.214, L0.215, L0.216, L0.217, L0.218, L0.219, L0.220, L0.221, L0.222, L0.223, L0.224, L0.225, L0.226, L0.227, L0.228, L0.229, L0.230, L0.231, L0.232, L0.233, L0.234, L0.235, L0.236, L0.237, L0.238, L0.239, L0.240, L0.241, L0.242, L0.243, L0.244, L0.245, L0.246, L0.247, L0.248, L0.249, L0.250, L0.251, L0.252, L0.253, L0.254, L0.255, L0.256, L0.257, L0.258, L0.259, L0.260, L0.261, L0.262, L0.263, L0.264, L0.265, L0.266, L0.267, L0.268, L0.269, L0.270, L0.271, L0.272, L0.273, L0.274, L0.275, L0.276, L0.277, L0.278, L0.279, L0.280, L0.281, L0.282, L0.283, L0.284, L0.285, L0.286, L0.287, L0.288, L0.289, L0.290, L0.291, L0.292, L0.293, L0.294, L0.295, L0.296, L0.297, L0.298, L0.299, L0.300, L0.301, L0.302, L0.303, L0.304, L0.305, L0.306, L0.307, L0.308, L0.309, L0.310, L0.311, L0.312, L0.313, L0.314, L0.315, L0.316, L0.317, L0.318, L0.319, L0.320, L0.321, L0.322, L0.323, L0.324, L0.325, L0.326, L0.327, L0.328, L0.329, L0.330, L0.331, L0.332, L0.333, L0.334, L0.335, L0.336, L0.337, L0.338, L0.339, L0.340, L0.341, L0.342, L0.343, L0.344, L0.345, L0.346, L0.347, L0.348, L0.349, L0.350, L0.351, L0.352, L0.353, L0.354, L0.355, L0.356, L0.357, L0.358, L0.359, L0.360, L0.361, L0.362, L0.363, L0.364, L0.365, L0.366, L0.367, L0.368, L0.369, L0.370, L0.371, L0.372, L0.373, L0.374, L0.375, L0.376, L0.377, L0.378, L0.379, L0.380, L0.381, L0.382, L0.383, L0.384, L0.385, L0.386, L0.387, L0.388, L0.389, L0.390, L0.391, L0.392, L0.393, L0.394, L0.395, L0.396, L0.397, L0.398, L0.399, L0.400"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 2, type=compact. 10 Input Files, 70kb total:"
- "L0, all files 7kb "
- "L0.401[400,401] 1ns |L0.401-| "
@ -1965,7 +1965,7 @@ async fn over_two_times_max_files_per_plan() {
- "L0.?[400,410] 1ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 10 files: L0.401, L0.402, L0.403, L0.404, L0.405, L0.406, L0.407, L0.408, L0.409, L0.410"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 3, type=compact. 3 Input Files, 2.8mb total:"
- "L0 "
- "L0.413[400,410] 1ns 70kb |L0.413|"
@ -1976,7 +1976,7 @@ async fn over_two_times_max_files_per_plan() {
- "L1.?[0,410] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.411, L0.412, L0.413"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Final Output Files "
- "L1, all files 2.8mb "
- "L1.414[0,410] 1ns |-----------------------------------------L1.414-----------------------------------------|"
@ -2514,7 +2514,7 @@ async fn many_tiny_l1_files() {
- "L1.?[0,399] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 200 files: L1.1, L1.2, L1.3, L1.4, L1.5, L1.6, L1.7, L1.8, L1.9, L1.10, L1.11, L1.12, L1.13, L1.14, L1.15, L1.16, L1.17, L1.18, L1.19, L1.20, L1.21, L1.22, L1.23, L1.24, L1.25, L1.26, L1.27, L1.28, L1.29, L1.30, L1.31, L1.32, L1.33, L1.34, L1.35, L1.36, L1.37, L1.38, L1.39, L1.40, L1.41, L1.42, L1.43, L1.44, L1.45, L1.46, L1.47, L1.48, L1.49, L1.50, L1.51, L1.52, L1.53, L1.54, L1.55, L1.56, L1.57, L1.58, L1.59, L1.60, L1.61, L1.62, L1.63, L1.64, L1.65, L1.66, L1.67, L1.68, L1.69, L1.70, L1.71, L1.72, L1.73, L1.74, L1.75, L1.76, L1.77, L1.78, L1.79, L1.80, L1.81, L1.82, L1.83, L1.84, L1.85, L1.86, L1.87, L1.88, L1.89, L1.90, L1.91, L1.92, L1.93, L1.94, L1.95, L1.96, L1.97, L1.98, L1.99, L1.100, L1.101, L1.102, L1.103, L1.104, L1.105, L1.106, L1.107, L1.108, L1.109, L1.110, L1.111, L1.112, L1.113, L1.114, L1.115, L1.116, L1.117, L1.118, L1.119, L1.120, L1.121, L1.122, L1.123, L1.124, L1.125, L1.126, L1.127, L1.128, L1.129, L1.130, L1.131, L1.132, L1.133, L1.134, L1.135, L1.136, L1.137, L1.138, L1.139, L1.140, L1.141, L1.142, L1.143, L1.144, L1.145, L1.146, L1.147, L1.148, L1.149, L1.150, L1.151, L1.152, L1.153, L1.154, L1.155, L1.156, L1.157, L1.158, L1.159, L1.160, L1.161, L1.162, L1.163, L1.164, L1.165, L1.166, L1.167, L1.168, L1.169, L1.170, L1.171, L1.172, L1.173, L1.174, L1.175, L1.176, L1.177, L1.178, L1.179, L1.180, L1.181, L1.182, L1.183, L1.184, L1.185, L1.186, L1.187, L1.188, L1.189, L1.190, L1.191, L1.192, L1.193, L1.194, L1.195, L1.196, L1.197, L1.198, L1.199, L1.200"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=compact. 88 Input Files, 616kb total:"
- "L1, all files 7kb "
- "L1.201[400,401] 1ns |L1.201| "
@ -2610,7 +2610,7 @@ async fn many_tiny_l1_files() {
- "L1.?[400,575] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 88 files: L1.201, L1.202, L1.203, L1.204, L1.205, L1.206, L1.207, L1.208, L1.209, L1.210, L1.211, L1.212, L1.213, L1.214, L1.215, L1.216, L1.217, L1.218, L1.219, L1.220, L1.221, L1.222, L1.223, L1.224, L1.225, L1.226, L1.227, L1.228, L1.229, L1.230, L1.231, L1.232, L1.233, L1.234, L1.235, L1.236, L1.237, L1.238, L1.239, L1.240, L1.241, L1.242, L1.243, L1.244, L1.245, L1.246, L1.247, L1.248, L1.249, L1.250, L1.251, L1.252, L1.253, L1.254, L1.255, L1.256, L1.257, L1.258, L1.259, L1.260, L1.261, L1.262, L1.263, L1.264, L1.265, L1.266, L1.267, L1.268, L1.269, L1.270, L1.271, L1.272, L1.273, L1.274, L1.275, L1.276, L1.277, L1.278, L1.279, L1.280, L1.281, L1.282, L1.283, L1.284, L1.285, L1.286, L1.287, L1.288"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Final Output Files "
- "L1 "
- "L1.289[0,399] 1ns 1.37mb |---------------------------L1.289---------------------------| "
@ -3082,7 +3082,7 @@ async fn many_l0_and_overlapped_l1_files() {
- "L0.?[0,190] 1ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 190 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128, L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150, L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190"
- " Creating 1 files at level CompactionLevel::L0"
- " Creating 1 files"
- "**** Simulation run 1, type=split(split_times=[159]). 21 Input Files, 21.3mb total:"
- "L0 "
- "L0.211[0,190] 1ns 1.3mb |--------------------------------------L0.211---------------------------------------| "
@ -3113,7 +3113,7 @@ async fn many_l0_and_overlapped_l1_files() {
- "L1.?[160,199] 1ns 4.28mb |-----L1.?------| "
- "Committing partition 1:"
- " Soft Deleting 21 files: L1.191, L1.192, L1.193, L1.194, L1.195, L1.196, L1.197, L1.198, L1.199, L1.200, L1.201, L1.202, L1.203, L1.204, L1.205, L1.206, L1.207, L1.208, L1.209, L1.210, L0.211"
- " Creating 2 files at level CompactionLevel::L1"
- " Creating 2 files"
- "**** Final Output Files "
- "L1 "
- "L1.212[0,159] 1ns 17.02mb|-------------------------------L1.212--------------------------------| "
@ -3606,7 +3606,7 @@ async fn not_many_l0_and_overlapped_l1_files() {
- "L1.?[0,190] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 195 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128, L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150, L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190, L1.191, L1.192, L1.193, L1.194, L1.195"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "**** Simulation run 1, type=split(split_times=[2407]). 16 Input Files, 21.3mb total:"
- "L1 "
- "L1.196[200,209] 1ns 1mb |L1.196| "
@ -3631,7 +3631,7 @@ async fn not_many_l0_and_overlapped_l1_files() {
- "L2.?[2408,3009] 1ns 4.26mb |-----L2.?------| "
- "Committing partition 1:"
- " Soft Deleting 16 files: L1.196, L1.197, L1.198, L1.199, L1.200, L1.201, L1.202, L1.203, L1.204, L1.205, L1.206, L1.207, L1.208, L1.209, L1.210, L1.211"
- " Creating 2 files at level CompactionLevel::L2"
- " Creating 2 files"
- "**** Final Output Files "
- "L2 "
- "L2.212[0,2407] 1ns 17.04mb|-------------------------------L2.212--------------------------------| "

View File

@ -48,6 +48,7 @@
//! ```text
//! - L0.?[300,350] 5kb |-L0.3-|
//! ```
mod common_use_cases;
mod core;
mod knobs;
mod large_files;
@ -84,7 +85,6 @@ pub(crate) async fn layout_setup_builder() -> TestSetupBuilder<false> {
.with_percentage_max_file_size(20)
.with_split_percentage(80)
.with_max_num_files_per_plan(200)
.with_max_compact_size(256 * ONE_MB as usize)
.with_min_num_l1_files_to_compact(10)
.with_max_desired_file_size_bytes(100 * ONE_MB)
.simulate_without_object_store()

View File

@ -80,7 +80,7 @@ async fn two_giant_files() {
- "L0.2[100,100] 1ns |------------------------------------------L0.2------------------------------------------|"
- "WARNING: file L0.1[100,100] 1ns 4.88gb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L0.2[100,100] 1ns 4.88gb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. This may happen if a large amount of data has the same timestamp"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0, all files 4.88gb "
- "L0.1[100,100] 1ns |------------------------------------------L0.1------------------------------------------|"
@ -122,7 +122,7 @@ async fn two_giant_files_time_range_1() {
- "L0.2[100,101] 1ns |------------------------------------------L0.2------------------------------------------|"
- "WARNING: file L0.1[100,101] 1ns 4.88gb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L0.2[100,101] 1ns 4.88gb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. This may happen if a large amount of data has the same timestamp"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0, all files 4.88gb "
- "L0.1[100,101] 1ns |------------------------------------------L0.1------------------------------------------|"
@ -181,7 +181,7 @@ async fn many_medium_files() {
- "L0.18[100,100] 1ns |-----------------------------------------L0.18------------------------------------------|"
- "L0.19[100,100] 1ns |-----------------------------------------L0.19------------------------------------------|"
- "L0.20[100,100] 1ns |-----------------------------------------L0.20------------------------------------------|"
- "**** Simulation run 0, type=compact. 8 Input Files, 240mb total:"
- "**** Simulation run 0, type=compact. 10 Input Files, 300mb total:"
- "L0, all files 30mb "
- "L0.20[100,100] 1ns |-----------------------------------------L0.20------------------------------------------|"
- "L0.19[100,100] 1ns |-----------------------------------------L0.19------------------------------------------|"
@ -191,13 +191,15 @@ async fn many_medium_files() {
- "L0.15[100,100] 1ns |-----------------------------------------L0.15------------------------------------------|"
- "L0.14[100,100] 1ns |-----------------------------------------L0.14------------------------------------------|"
- "L0.13[100,100] 1ns |-----------------------------------------L0.13------------------------------------------|"
- "**** 1 Output Files (parquet_file_id not yet assigned), 240mb total:"
- "L1, all files 240mb "
- "L0.12[100,100] 1ns |-----------------------------------------L0.12------------------------------------------|"
- "L0.11[100,100] 1ns |-----------------------------------------L0.11------------------------------------------|"
- "**** 1 Output Files (parquet_file_id not yet assigned), 300mb total:"
- "L1, all files 300mb "
- "L1.?[100,100] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 8 files: L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files at level CompactionLevel::L1"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. This may happen if a large amount of data has the same timestamp"
- " Soft Deleting 10 files: L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0 "
- "L0.1[100,100] 1ns 30mb |------------------------------------------L0.1------------------------------------------|"
@ -210,11 +212,9 @@ async fn many_medium_files() {
- "L0.8[100,100] 1ns 30mb |------------------------------------------L0.8------------------------------------------|"
- "L0.9[100,100] 1ns 30mb |------------------------------------------L0.9------------------------------------------|"
- "L0.10[100,100] 1ns 30mb |-----------------------------------------L0.10------------------------------------------|"
- "L0.11[100,100] 1ns 30mb |-----------------------------------------L0.11------------------------------------------|"
- "L0.12[100,100] 1ns 30mb |-----------------------------------------L0.12------------------------------------------|"
- "L1 "
- "L1.21[100,100] 1ns 240mb |-----------------------------------------L1.21------------------------------------------|"
- "WARNING: file L1.21[100,100] 1ns 240mb exceeds soft limit 100mb by more than 50%"
- "L1.21[100,100] 1ns 300mb |-----------------------------------------L1.21------------------------------------------|"
- "WARNING: file L1.21[100,100] 1ns 300mb exceeds soft limit 100mb by more than 50%"
"###
);
}
@ -267,7 +267,7 @@ async fn many_medium_files_time_range_1() {
- "L0.18[100,101] 1ns |-----------------------------------------L0.18------------------------------------------|"
- "L0.19[100,101] 1ns |-----------------------------------------L0.19------------------------------------------|"
- "L0.20[100,101] 1ns |-----------------------------------------L0.20------------------------------------------|"
- "**** Simulation run 0, type=compact. 8 Input Files, 240mb total:"
- "**** Simulation run 0, type=compact. 10 Input Files, 300mb total:"
- "L0, all files 30mb "
- "L0.20[100,101] 1ns |-----------------------------------------L0.20------------------------------------------|"
- "L0.19[100,101] 1ns |-----------------------------------------L0.19------------------------------------------|"
@ -277,13 +277,15 @@ async fn many_medium_files_time_range_1() {
- "L0.15[100,101] 1ns |-----------------------------------------L0.15------------------------------------------|"
- "L0.14[100,101] 1ns |-----------------------------------------L0.14------------------------------------------|"
- "L0.13[100,101] 1ns |-----------------------------------------L0.13------------------------------------------|"
- "**** 1 Output Files (parquet_file_id not yet assigned), 240mb total:"
- "L1, all files 240mb "
- "L0.12[100,101] 1ns |-----------------------------------------L0.12------------------------------------------|"
- "L0.11[100,101] 1ns |-----------------------------------------L0.11------------------------------------------|"
- "**** 1 Output Files (parquet_file_id not yet assigned), 300mb total:"
- "L1, all files 300mb "
- "L1.?[100,101] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 8 files: L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files at level CompactionLevel::L1"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. This may happen if a large amount of data has the same timestamp"
- " Soft Deleting 10 files: L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 314572800. This may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0 "
- "L0.1[100,101] 1ns 30mb |------------------------------------------L0.1------------------------------------------|"
@ -296,11 +298,9 @@ async fn many_medium_files_time_range_1() {
- "L0.8[100,101] 1ns 30mb |------------------------------------------L0.8------------------------------------------|"
- "L0.9[100,101] 1ns 30mb |------------------------------------------L0.9------------------------------------------|"
- "L0.10[100,101] 1ns 30mb |-----------------------------------------L0.10------------------------------------------|"
- "L0.11[100,101] 1ns 30mb |-----------------------------------------L0.11------------------------------------------|"
- "L0.12[100,101] 1ns 30mb |-----------------------------------------L0.12------------------------------------------|"
- "L1 "
- "L1.21[100,101] 1ns 240mb |-----------------------------------------L1.21------------------------------------------|"
- "WARNING: file L1.21[100,101] 1ns 240mb exceeds soft limit 100mb by more than 50%"
- "L1.21[100,101] 1ns 300mb |-----------------------------------------L1.21------------------------------------------|"
- "WARNING: file L1.21[100,101] 1ns 300mb exceeds soft limit 100mb by more than 50%"
"###
);
}
@ -326,7 +326,7 @@ async fn many_small_files() {
.await;
}
// L0s are compacted into a single L1 file. It can't be split becasue of single timestamp
// L0s are compacted into a single L1 file. It can't be split because of single timestamp
// Then the L1 is large enough to get upgraded to L2
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@ -381,7 +381,7 @@ async fn many_small_files() {
- "L1.?[100,100] 1ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 20 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files at level CompactionLevel::L1"
- " Creating 1 files"
- "Committing partition 1:"
- " Upgrading 1 files level to CompactionLevel::L2: L1.21"
- "**** Final Output Files "

View File

@ -63,11 +63,7 @@ impl Commit for CommitRecorder {
}
if !create.is_empty() {
run_log.push(format!(
" Creating {} files at level {}",
create.len(),
target_level
));
run_log.push(format!(" Creating {} files", create.len()));
}
}
let output_files = self

View File

@ -135,7 +135,6 @@ impl TestSetupBuilder<false> {
partitions_source: PartitionsSourceConfig::CatalogRecentWrites,
shadow_mode: false,
ignore_partition_skip_marker: false,
max_compact_size: usize::MAX,
shard_config: None,
min_num_l1_files_to_compact: MIN_NUM_L1_FILES_TO_COMPACT,
process_once: true,
@ -485,20 +484,6 @@ impl TestSetupBuilder<false> {
}
}
impl TestSetupBuilder<true> {
/// Set max_compact_size
pub fn with_max_compact_size_relative_to_total_size(self, delta: isize) -> Self {
let total_size = self.files.iter().map(|f| f.file_size_bytes).sum::<i64>();
Self {
config: Config {
max_compact_size: (total_size as isize + delta) as usize,
..self.config
},
..self
}
}
}
impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
/// Use shadow mode
pub fn with_shadow_mode(mut self) -> Self {
@ -545,12 +530,6 @@ impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
self
}
/// Set max_compact_size
pub fn with_max_compact_size(mut self, max_compact_size: usize) -> Self {
self.config.max_compact_size = max_compact_size;
self
}
/// Create a [`TestSetup`]
pub async fn build(self) -> TestSetup {
let candidate_partition = Arc::new(PartitionInfo {
@ -781,7 +760,8 @@ pub struct TestTimes {
}
impl TestTimes {
fn new(time_provider: &dyn TimeProvider) -> Self {
/// Create a new instance
pub fn new(time_provider: &dyn TimeProvider) -> Self {
let time_1_minute_future = time_provider.minutes_into_future(1).timestamp_nanos();
let time_2_minutes_future = time_provider.minutes_into_future(2).timestamp_nanos();
let time_3_minutes_future = time_provider.minutes_into_future(3).timestamp_nanos();
@ -966,6 +946,32 @@ pub fn create_l2_files() -> Vec<ParquetFile> {
vec![l2_1, l2_2]
}
/// This setup will return files with ranges as follows:
/// |--L1.1--|
/// |--L0.1--|
pub fn create_overlapped_two_overlapped_files(size: i64) -> Vec<ParquetFile> {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 500)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_1
let l0_1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(450, 620)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
// Put the files in random order
vec![l1_1, l0_1]
}
/// This setup will return files with ranges as follows:
/// |--L1.1--| |--L1.2--| |--L1.3--|
/// |--L0.1--| |--L0.2--| |--L0.3--|

View File

@ -284,15 +284,13 @@ fn even_time_split(
for split in split_times {
assert!(
split > &last_split,
"split times must be in ascending order"
);
assert!(
Timestamp::new(*split) > overall_min_time,
"split time must be greater than time range min"
"split times {last_split} {split} must be in ascending order",
);
assert!(
Timestamp::new(*split) < overall_max_time,
"split time must be less than time range max"
"split time {} must be less than time range max {}",
split,
overall_max_time.get()
);
last_split = *split;
}

View File

@ -87,7 +87,7 @@ assert_cmd = "2.0.8"
async-trait = "0.1"
predicate = { path = "../predicate" }
predicates = "2.1.0"
serde = "1.0.155"
serde = "1.0.156"
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }

View File

@ -479,7 +479,6 @@ impl Config {
partition_filter: None,
shadow_mode: false,
ignore_partition_skip_marker: false,
max_compact_size: 268_435_456, // 256 MB
shard_count: None,
shard_id: None,
min_num_l1_files_to_compact: 1,

View File

@ -32,11 +32,6 @@ use trace::TraceCollector;
const TOPIC: &str = "iox-shared";
const TRANSITION_SHARD_INDEX: i32 = TRANSITION_SHARD_NUMBER;
// Minimum multiple between max_desired_file_size_bytes and max_compact_size
// Since max_desired_file_size_bytes is softly enforced, actual file sizes can exceed it, and a
// single compaction job must be able to compact >1 max sized file, so the multiple should be at least 3.
const MIN_COMPACT_SIZE_MULTIPLE: i64 = 3;
pub struct Compactor2ServerType {
compactor: Compactor2,
metric_registry: Arc<Registry>,
@ -177,15 +172,6 @@ pub async fn create_compactor2_server_type(
),
};
if compactor_config.max_desired_file_size_bytes as i64 * MIN_COMPACT_SIZE_MULTIPLE
> compactor_config.max_compact_size.try_into().unwrap()
{
panic!("max_compact_size ({}) must be at least {} times larger than max_desired_file_size_bytes ({})",
compactor_config.max_compact_size,
MIN_COMPACT_SIZE_MULTIPLE,
compactor_config.max_desired_file_size_bytes);
}
let compactor = Compactor2::start(Config {
shard_id,
metric_registry: Arc::clone(&metric_registry),
@ -209,7 +195,6 @@ pub async fn create_compactor2_server_type(
partitions_source,
shadow_mode: compactor_config.shadow_mode,
ignore_partition_skip_marker: compactor_config.ignore_partition_skip_marker,
max_compact_size: compactor_config.max_compact_size,
shard_config,
min_num_l1_files_to_compact: compactor_config.min_num_l1_files_to_compact,
process_once: compactor_config.process_once,

View File

@ -29,7 +29,9 @@ bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
crossbeam-utils = { version = "0.8" }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "8c34ca4fa34787b137b48ce4f6ffd41b64a1a633" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "612eb1d0ce338af7980fa906df8796eb47c4be44" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "612eb1d0ce338af7980fa906df8796eb47c4be44", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "612eb1d0ce338af7980fa906df8796eb47c4be44", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
digest = { version = "0.10", features = ["mac", "std"] }
either = { version = "1" }
fixedbitset = { version = "0.4" }