Merge branch 'main' into idpe-17789/provide-job-on-commit
commit
cc70a2c38b
|
@ -699,7 +699,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
|
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-automata 0.3.3",
|
"regex-automata 0.3.4",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -763,6 +763,7 @@ dependencies = [
|
||||||
"pdatastructs",
|
"pdatastructs",
|
||||||
"proptest",
|
"proptest",
|
||||||
"rand",
|
"rand",
|
||||||
|
"test_helpers",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
"trace",
|
"trace",
|
||||||
|
@ -2662,6 +2663,7 @@ dependencies = [
|
||||||
"flatbuffers",
|
"flatbuffers",
|
||||||
"futures",
|
"futures",
|
||||||
"generated_types",
|
"generated_types",
|
||||||
|
"gossip",
|
||||||
"hashbrown 0.14.0",
|
"hashbrown 0.14.0",
|
||||||
"influxdb_iox_client",
|
"influxdb_iox_client",
|
||||||
"ingester_query_grpc",
|
"ingester_query_grpc",
|
||||||
|
@ -3107,6 +3109,7 @@ dependencies = [
|
||||||
"authz",
|
"authz",
|
||||||
"clap_blocks",
|
"clap_blocks",
|
||||||
"data_types",
|
"data_types",
|
||||||
|
"gossip",
|
||||||
"hashbrown 0.14.0",
|
"hashbrown 0.14.0",
|
||||||
"hyper",
|
"hyper",
|
||||||
"iox_catalog",
|
"iox_catalog",
|
||||||
|
@ -4575,7 +4578,7 @@ checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-automata 0.3.3",
|
"regex-automata 0.3.4",
|
||||||
"regex-syntax 0.7.4",
|
"regex-syntax 0.7.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -4590,9 +4593,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-automata"
|
name = "regex-automata"
|
||||||
version = "0.3.3"
|
version = "0.3.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
|
checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
|
@ -4693,6 +4696,7 @@ dependencies = [
|
||||||
"flate2",
|
"flate2",
|
||||||
"futures",
|
"futures",
|
||||||
"generated_types",
|
"generated_types",
|
||||||
|
"gossip",
|
||||||
"hashbrown 0.14.0",
|
"hashbrown 0.14.0",
|
||||||
"hyper",
|
"hyper",
|
||||||
"influxdb-line-protocol",
|
"influxdb-line-protocol",
|
||||||
|
@ -4906,18 +4910,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.177"
|
version = "1.0.179"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "63ba2516aa6bf82e0b19ca8b50019d52df58455d3cf9bdaf6315225fdd0c560a"
|
checksum = "0a5bf42b8d227d4abf38a1ddb08602e229108a517cd4e5bb28f9c7eaafdce5c0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_derive",
|
"serde_derive",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_derive"
|
name = "serde_derive"
|
||||||
version = "1.0.177"
|
version = "1.0.179"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "401797fe7833d72109fedec6bfcbe67c0eed9b99772f26eb8afd261f0abc6fd3"
|
checksum = "741e124f5485c7e60c03b043f79f320bff3527f4bbf12cf3831750dc46a0ec2c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -6265,6 +6269,7 @@ dependencies = [
|
||||||
"pin-project",
|
"pin-project",
|
||||||
"sysinfo",
|
"sysinfo",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
"test_helpers",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
"trace",
|
"trace",
|
||||||
|
@ -6879,7 +6884,7 @@ dependencies = [
|
||||||
"rand",
|
"rand",
|
||||||
"rand_core",
|
"rand_core",
|
||||||
"regex",
|
"regex",
|
||||||
"regex-automata 0.3.3",
|
"regex-automata 0.3.4",
|
||||||
"regex-syntax 0.7.4",
|
"regex-syntax 0.7.4",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"ring",
|
"ring",
|
||||||
|
|
|
@ -23,6 +23,7 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = { version = "0.5", default-features = false, features = ["rayon"]}
|
criterion = { version = "0.5", default-features = false, features = ["rayon"]}
|
||||||
proptest = { version = "1", default_features = false, features = ["std"] }
|
proptest = { version = "1", default_features = false, features = ["std"] }
|
||||||
|
test_helpers = { path = "../test_helpers" }
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
# Allow --save-baseline to work
|
# Allow --save-baseline to work
|
||||||
|
|
|
@ -5,6 +5,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||||
use iox_time::{MockProvider, Time};
|
use iox_time::{MockProvider, Time};
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use rand::rngs::mock::StepRng;
|
use rand::rngs::mock::StepRng;
|
||||||
|
use test_helpers::maybe_start_logging;
|
||||||
use tokio::{runtime::Handle, sync::Notify};
|
use tokio::{runtime::Handle, sync::Notify};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -116,6 +117,7 @@ async fn test_refresh_does_not_update_lru_time() {
|
||||||
time_provider,
|
time_provider,
|
||||||
loader,
|
loader,
|
||||||
notify_idle,
|
notify_idle,
|
||||||
|
pool,
|
||||||
..
|
..
|
||||||
} = TestStateLruAndRefresh::new();
|
} = TestStateLruAndRefresh::new();
|
||||||
|
|
||||||
|
@ -135,12 +137,14 @@ async fn test_refresh_does_not_update_lru_time() {
|
||||||
|
|
||||||
let barrier = loader.block_next(1, String::from("foo"));
|
let barrier = loader.block_next(1, String::from("foo"));
|
||||||
backend.set(1, String::from("a"));
|
backend.set(1, String::from("a"));
|
||||||
|
pool.wait_converged().await;
|
||||||
|
|
||||||
// trigger refresh
|
// trigger refresh
|
||||||
time_provider.inc(Duration::from_secs(1));
|
time_provider.inc(Duration::from_secs(1));
|
||||||
|
|
||||||
time_provider.inc(Duration::from_secs(1));
|
time_provider.inc(Duration::from_secs(1));
|
||||||
backend.set(2, String::from("b"));
|
backend.set(2, String::from("b"));
|
||||||
|
pool.wait_converged().await;
|
||||||
|
|
||||||
time_provider.inc(Duration::from_secs(1));
|
time_provider.inc(Duration::from_secs(1));
|
||||||
|
|
||||||
|
@ -150,6 +154,7 @@ async fn test_refresh_does_not_update_lru_time() {
|
||||||
|
|
||||||
// add a third item to the cache, forcing LRU to evict one of the items
|
// add a third item to the cache, forcing LRU to evict one of the items
|
||||||
backend.set(3, String::from("c"));
|
backend.set(3, String::from("c"));
|
||||||
|
pool.wait_converged().await;
|
||||||
|
|
||||||
// Should evict `1` even though it was refreshed after `2` was added
|
// Should evict `1` even though it was refreshed after `2` was added
|
||||||
assert_eq!(backend.get(&1), None);
|
assert_eq!(backend.get(&1), None);
|
||||||
|
@ -192,6 +197,8 @@ async fn test_if_refresh_to_slow_then_expire() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_refresh_can_trigger_lru_eviction() {
|
async fn test_refresh_can_trigger_lru_eviction() {
|
||||||
|
maybe_start_logging();
|
||||||
|
|
||||||
let TestStateLRUAndRefresh {
|
let TestStateLRUAndRefresh {
|
||||||
mut backend,
|
mut backend,
|
||||||
refresh_duration_provider,
|
refresh_duration_provider,
|
||||||
|
@ -224,13 +231,16 @@ async fn test_refresh_can_trigger_lru_eviction() {
|
||||||
backend.set(1, String::from("a"));
|
backend.set(1, String::from("a"));
|
||||||
backend.set(2, String::from("c"));
|
backend.set(2, String::from("c"));
|
||||||
backend.set(3, String::from("d"));
|
backend.set(3, String::from("d"));
|
||||||
assert_eq!(backend.get(&1), Some(String::from("a")));
|
pool.wait_converged().await;
|
||||||
assert_eq!(backend.get(&2), Some(String::from("c")));
|
assert_eq!(backend.get(&2), Some(String::from("c")));
|
||||||
assert_eq!(backend.get(&3), Some(String::from("d")));
|
assert_eq!(backend.get(&3), Some(String::from("d")));
|
||||||
|
time_provider.inc(Duration::from_millis(1));
|
||||||
|
assert_eq!(backend.get(&1), Some(String::from("a")));
|
||||||
|
|
||||||
// refresh
|
// refresh
|
||||||
time_provider.inc(Duration::from_secs(1));
|
time_provider.inc(Duration::from_secs(10));
|
||||||
notify_idle.notified_with_timeout().await;
|
notify_idle.notified_with_timeout().await;
|
||||||
|
pool.wait_converged().await;
|
||||||
|
|
||||||
// needed to evict 2->"c"
|
// needed to evict 2->"c"
|
||||||
assert_eq!(backend.get(&1), Some(String::from("b")));
|
assert_eq!(backend.get(&1), Some(String::from("b")));
|
||||||
|
@ -285,6 +295,7 @@ async fn test_remove_if_check_does_not_extend_lifetime() {
|
||||||
size_estimator,
|
size_estimator,
|
||||||
time_provider,
|
time_provider,
|
||||||
remove_if_handle,
|
remove_if_handle,
|
||||||
|
pool,
|
||||||
..
|
..
|
||||||
} = TestStateLruAndRemoveIf::new().await;
|
} = TestStateLruAndRemoveIf::new().await;
|
||||||
|
|
||||||
|
@ -293,15 +304,18 @@ async fn test_remove_if_check_does_not_extend_lifetime() {
|
||||||
size_estimator.mock_size(3, String::from("c"), TestSize(4));
|
size_estimator.mock_size(3, String::from("c"), TestSize(4));
|
||||||
|
|
||||||
backend.set(1, String::from("a"));
|
backend.set(1, String::from("a"));
|
||||||
|
pool.wait_converged().await;
|
||||||
time_provider.inc(Duration::from_secs(1));
|
time_provider.inc(Duration::from_secs(1));
|
||||||
|
|
||||||
backend.set(2, String::from("b"));
|
backend.set(2, String::from("b"));
|
||||||
|
pool.wait_converged().await;
|
||||||
time_provider.inc(Duration::from_secs(1));
|
time_provider.inc(Duration::from_secs(1));
|
||||||
|
|
||||||
// Checking remove_if should not count as a "use" of 1
|
// Checking remove_if should not count as a "use" of 1
|
||||||
// for the "least recently used" calculation
|
// for the "least recently used" calculation
|
||||||
remove_if_handle.remove_if(&1, |_| false);
|
remove_if_handle.remove_if(&1, |_| false);
|
||||||
backend.set(3, String::from("c"));
|
backend.set(3, String::from("c"));
|
||||||
|
pool.wait_converged().await;
|
||||||
|
|
||||||
// adding "c" totals 12 size, but backend has room for only 10
|
// adding "c" totals 12 size, but backend has room for only 10
|
||||||
// so "least recently used" (in this case 1, not 2) should be removed
|
// so "least recently used" (in this case 1, not 2) should be removed
|
||||||
|
@ -397,6 +411,7 @@ impl TestStateLRUAndRefresh {
|
||||||
"my_pool",
|
"my_pool",
|
||||||
TestSize(10),
|
TestSize(10),
|
||||||
Arc::clone(&metric_registry),
|
Arc::clone(&metric_registry),
|
||||||
|
&Handle::current(),
|
||||||
));
|
));
|
||||||
backend.add_policy(LruPolicy::new(
|
backend.add_policy(LruPolicy::new(
|
||||||
Arc::clone(&pool),
|
Arc::clone(&pool),
|
||||||
|
@ -442,6 +457,7 @@ impl TestStateTtlAndLRU {
|
||||||
"my_pool",
|
"my_pool",
|
||||||
TestSize(10),
|
TestSize(10),
|
||||||
Arc::clone(&metric_registry),
|
Arc::clone(&metric_registry),
|
||||||
|
&Handle::current(),
|
||||||
));
|
));
|
||||||
backend.add_policy(LruPolicy::new(
|
backend.add_policy(LruPolicy::new(
|
||||||
Arc::clone(&pool),
|
Arc::clone(&pool),
|
||||||
|
@ -465,6 +481,7 @@ struct TestStateLruAndRemoveIf {
|
||||||
time_provider: Arc<MockProvider>,
|
time_provider: Arc<MockProvider>,
|
||||||
size_estimator: Arc<TestSizeEstimator>,
|
size_estimator: Arc<TestSizeEstimator>,
|
||||||
remove_if_handle: RemoveIfHandle<u8, String>,
|
remove_if_handle: RemoveIfHandle<u8, String>,
|
||||||
|
pool: Arc<ResourcePool<TestSize>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TestStateLruAndRemoveIf {
|
impl TestStateLruAndRemoveIf {
|
||||||
|
@ -479,6 +496,7 @@ impl TestStateLruAndRemoveIf {
|
||||||
"my_pool",
|
"my_pool",
|
||||||
TestSize(10),
|
TestSize(10),
|
||||||
Arc::clone(&metric_registry),
|
Arc::clone(&metric_registry),
|
||||||
|
&Handle::current(),
|
||||||
));
|
));
|
||||||
backend.add_policy(LruPolicy::new(
|
backend.add_policy(LruPolicy::new(
|
||||||
Arc::clone(&pool),
|
Arc::clone(&pool),
|
||||||
|
@ -495,6 +513,7 @@ impl TestStateLruAndRemoveIf {
|
||||||
time_provider,
|
time_provider,
|
||||||
size_estimator,
|
size_estimator,
|
||||||
remove_if_handle,
|
remove_if_handle,
|
||||||
|
pool,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -507,6 +526,7 @@ struct TestStateLruAndRefresh {
|
||||||
time_provider: Arc<MockProvider>,
|
time_provider: Arc<MockProvider>,
|
||||||
loader: Arc<TestLoader<u8, (), String>>,
|
loader: Arc<TestLoader<u8, (), String>>,
|
||||||
notify_idle: Arc<Notify>,
|
notify_idle: Arc<Notify>,
|
||||||
|
pool: Arc<ResourcePool<TestSize>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TestStateLruAndRefresh {
|
impl TestStateLruAndRefresh {
|
||||||
|
@ -537,6 +557,7 @@ impl TestStateLruAndRefresh {
|
||||||
"my_pool",
|
"my_pool",
|
||||||
TestSize(10),
|
TestSize(10),
|
||||||
Arc::clone(&metric_registry),
|
Arc::clone(&metric_registry),
|
||||||
|
&Handle::current(),
|
||||||
));
|
));
|
||||||
backend.add_policy(LruPolicy::new(
|
backend.add_policy(LruPolicy::new(
|
||||||
Arc::clone(&pool),
|
Arc::clone(&pool),
|
||||||
|
@ -551,6 +572,7 @@ impl TestStateLruAndRefresh {
|
||||||
time_provider,
|
time_provider,
|
||||||
loader,
|
loader,
|
||||||
notify_idle,
|
notify_idle,
|
||||||
|
pool,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -393,7 +393,11 @@ where
|
||||||
/// structures while calling this function if you plan to also [subscribe](Subscriber) to
|
/// structures while calling this function if you plan to also [subscribe](Subscriber) to
|
||||||
/// changes because this would easily lead to deadlocks.
|
/// changes because this would easily lead to deadlocks.
|
||||||
pub fn execute_requests(&mut self, change_requests: Vec<ChangeRequest<'_, K, V>>) {
|
pub fn execute_requests(&mut self, change_requests: Vec<ChangeRequest<'_, K, V>>) {
|
||||||
let inner = self.inner.upgrade().expect("backend gone");
|
let Some(inner) = self.inner.upgrade() else {
|
||||||
|
// backend gone, can happen during shutdowns, try not to panic
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
lock_inner!(mut guard = inner);
|
lock_inner!(mut guard = inner);
|
||||||
perform_changes(&mut guard, change_requests);
|
perform_changes(&mut guard, change_requests);
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,15 @@ use std::{
|
||||||
///
|
///
|
||||||
/// Can be used to represent in-RAM memory as well as on-disc memory.
|
/// Can be used to represent in-RAM memory as well as on-disc memory.
|
||||||
pub trait Resource:
|
pub trait Resource:
|
||||||
Add<Output = Self> + Copy + Debug + Into<u64> + PartialOrd + Send + Sub<Output = Self> + 'static
|
Add<Output = Self>
|
||||||
|
+ Copy
|
||||||
|
+ Debug
|
||||||
|
+ Into<u64>
|
||||||
|
+ PartialOrd
|
||||||
|
+ Send
|
||||||
|
+ Sync
|
||||||
|
+ Sub<Output = Self>
|
||||||
|
+ 'static
|
||||||
{
|
{
|
||||||
/// Create resource consumption of zero.
|
/// Create resource consumption of zero.
|
||||||
fn zero() -> Self;
|
fn zero() -> Self;
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
//! CLI config for cluster gossip communication.
|
||||||
|
|
||||||
|
use crate::socket_addr::SocketAddr;
|
||||||
|
|
||||||
|
/// Configuration parameters for the cluster gossip communication mechanism.
|
||||||
|
#[derive(Debug, Clone, clap::Parser)]
|
||||||
|
#[allow(missing_copy_implementations)]
|
||||||
|
pub struct GossipConfig {
|
||||||
|
/// A comma-delimited set of seed gossip peer addresses.
|
||||||
|
///
|
||||||
|
/// Example: "10.0.0.1:4242,10.0.0.2:4242"
|
||||||
|
///
|
||||||
|
/// These seeds will be used to discover all other peers that talk to the
|
||||||
|
/// same seeds. Typically all nodes in the cluster should use the same set
|
||||||
|
/// of seeds.
|
||||||
|
#[clap(
|
||||||
|
long = "gossip-seed-list",
|
||||||
|
env = "INFLUXDB_IOX_GOSSIP_SEED_LIST",
|
||||||
|
required = false,
|
||||||
|
num_args=1..,
|
||||||
|
value_delimiter = ',',
|
||||||
|
requires = "gossip_bind_address", // Field name, not flag
|
||||||
|
)]
|
||||||
|
pub seed_list: Vec<String>,
|
||||||
|
|
||||||
|
/// The UDP socket address IOx will use for gossip communication between
|
||||||
|
/// peers.
|
||||||
|
///
|
||||||
|
/// Example: "0.0.0.0:4242"
|
||||||
|
///
|
||||||
|
/// If not provided, the gossip sub-system is disabled.
|
||||||
|
#[clap(
|
||||||
|
long = "gossip-bind-address",
|
||||||
|
env = "INFLUXDB_IOX_GOSSIP_BIND_ADDR",
|
||||||
|
requires = "seed_list", // Field name, not flag
|
||||||
|
action
|
||||||
|
)]
|
||||||
|
pub gossip_bind_address: Option<SocketAddr>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GossipConfig {
|
||||||
|
/// Initialise the gossip config to be disabled.
|
||||||
|
pub fn disabled() -> Self {
|
||||||
|
Self {
|
||||||
|
seed_list: vec![],
|
||||||
|
gossip_bind_address: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,10 +2,16 @@
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use crate::gossip::GossipConfig;
|
||||||
|
|
||||||
/// CLI config for the ingester using the RPC write path
|
/// CLI config for the ingester using the RPC write path
|
||||||
#[derive(Debug, Clone, clap::Parser)]
|
#[derive(Debug, Clone, clap::Parser)]
|
||||||
#[allow(missing_copy_implementations)]
|
#[allow(missing_copy_implementations)]
|
||||||
pub struct IngesterConfig {
|
pub struct IngesterConfig {
|
||||||
|
/// Gossip config.
|
||||||
|
#[clap(flatten)]
|
||||||
|
pub gossip_config: GossipConfig,
|
||||||
|
|
||||||
/// Where this ingester instance should store its write-ahead log files. Each ingester instance
|
/// Where this ingester instance should store its write-ahead log files. Each ingester instance
|
||||||
/// must have its own directory.
|
/// must have its own directory.
|
||||||
#[clap(long = "wal-directory", env = "INFLUXDB_IOX_WAL_DIRECTORY", action)]
|
#[clap(long = "wal-directory", env = "INFLUXDB_IOX_WAL_DIRECTORY", action)]
|
||||||
|
|
|
@ -22,6 +22,7 @@ pub mod catalog_dsn;
|
||||||
pub mod compactor;
|
pub mod compactor;
|
||||||
pub mod compactor_scheduler;
|
pub mod compactor_scheduler;
|
||||||
pub mod garbage_collector;
|
pub mod garbage_collector;
|
||||||
|
pub mod gossip;
|
||||||
pub mod ingester;
|
pub mod ingester;
|
||||||
pub mod ingester_address;
|
pub mod ingester_address;
|
||||||
pub mod object_store;
|
pub mod object_store;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
//! CLI config for the router using the RPC write path
|
//! CLI config for the router using the RPC write path
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
gossip::GossipConfig,
|
||||||
ingester_address::IngesterAddress,
|
ingester_address::IngesterAddress,
|
||||||
single_tenant::{
|
single_tenant::{
|
||||||
CONFIG_AUTHZ_ENV_NAME, CONFIG_AUTHZ_FLAG, CONFIG_CST_ENV_NAME, CONFIG_CST_FLAG,
|
CONFIG_AUTHZ_ENV_NAME, CONFIG_AUTHZ_FLAG, CONFIG_CST_ENV_NAME, CONFIG_CST_FLAG,
|
||||||
|
@ -15,6 +16,10 @@ use std::{
|
||||||
#[derive(Debug, Clone, clap::Parser)]
|
#[derive(Debug, Clone, clap::Parser)]
|
||||||
#[allow(missing_copy_implementations)]
|
#[allow(missing_copy_implementations)]
|
||||||
pub struct RouterConfig {
|
pub struct RouterConfig {
|
||||||
|
/// Gossip config.
|
||||||
|
#[clap(flatten)]
|
||||||
|
pub gossip_config: GossipConfig,
|
||||||
|
|
||||||
/// Addr for connection to authz
|
/// Addr for connection to authz
|
||||||
#[clap(
|
#[clap(
|
||||||
long = CONFIG_AUTHZ_FLAG,
|
long = CONFIG_AUTHZ_FLAG,
|
||||||
|
|
|
@ -171,7 +171,7 @@ fn to_queryable_parquet_chunk(
|
||||||
parquet_file_id = file.file.id.get(),
|
parquet_file_id = file.file.id.get(),
|
||||||
parquet_file_namespace_id = file.file.namespace_id.get(),
|
parquet_file_namespace_id = file.file.namespace_id.get(),
|
||||||
parquet_file_table_id = file.file.table_id.get(),
|
parquet_file_table_id = file.file.table_id.get(),
|
||||||
parquet_file_partition_id = file.file.partition_id.get(),
|
parquet_file_partition_id = %file.file.partition_id,
|
||||||
parquet_file_object_store_id = uuid.to_string().as_str(),
|
parquet_file_object_store_id = uuid.to_string().as_str(),
|
||||||
"built parquet chunk from metadata"
|
"built parquet chunk from metadata"
|
||||||
);
|
);
|
||||||
|
|
|
@ -70,8 +70,7 @@ impl ParquetFileSink for MockParquetFileSink {
|
||||||
let out = ((row_count > 0) || !self.filter_empty_files).then(|| ParquetFileParams {
|
let out = ((row_count > 0) || !self.filter_empty_files).then(|| ParquetFileParams {
|
||||||
namespace_id: partition.namespace_id,
|
namespace_id: partition.namespace_id,
|
||||||
table_id: partition.table.id,
|
table_id: partition.table.id,
|
||||||
partition_id: partition.partition_id,
|
partition_id: partition.transition_partition_id(),
|
||||||
partition_hash_id: partition.partition_hash_id.clone(),
|
|
||||||
object_store_id: Uuid::from_u128(guard.len() as u128),
|
object_store_id: Uuid::from_u128(guard.len() as u128),
|
||||||
min_time: Timestamp::new(0),
|
min_time: Timestamp::new(0),
|
||||||
max_time: Timestamp::new(0),
|
max_time: Timestamp::new(0),
|
||||||
|
@ -95,7 +94,7 @@ impl ParquetFileSink for MockParquetFileSink {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use arrow_util::assert_batches_eq;
|
use arrow_util::assert_batches_eq;
|
||||||
use data_types::{NamespaceId, PartitionId, TableId};
|
use data_types::{NamespaceId, TableId};
|
||||||
use datafusion::{
|
use datafusion::{
|
||||||
arrow::{array::new_null_array, datatypes::DataType},
|
arrow::{array::new_null_array, datatypes::DataType},
|
||||||
physical_plan::stream::RecordBatchStreamAdapter,
|
physical_plan::stream::RecordBatchStreamAdapter,
|
||||||
|
@ -159,7 +158,7 @@ mod tests {
|
||||||
Arc::clone(&schema),
|
Arc::clone(&schema),
|
||||||
futures::stream::once(async move { Ok(record_batch_captured) }),
|
futures::stream::once(async move { Ok(record_batch_captured) }),
|
||||||
));
|
));
|
||||||
let partition_hash_id = partition.partition_hash_id.clone();
|
let partition_id = partition.transition_partition_id();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
|
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
|
||||||
.await
|
.await
|
||||||
|
@ -167,8 +166,7 @@ mod tests {
|
||||||
Some(ParquetFileParams {
|
Some(ParquetFileParams {
|
||||||
namespace_id: NamespaceId::new(2),
|
namespace_id: NamespaceId::new(2),
|
||||||
table_id: TableId::new(3),
|
table_id: TableId::new(3),
|
||||||
partition_id: PartitionId::new(1),
|
partition_id,
|
||||||
partition_hash_id,
|
|
||||||
object_store_id: Uuid::from_u128(2),
|
object_store_id: Uuid::from_u128(2),
|
||||||
min_time: Timestamp::new(0),
|
min_time: Timestamp::new(0),
|
||||||
max_time: Timestamp::new(0),
|
max_time: Timestamp::new(0),
|
||||||
|
@ -223,7 +221,7 @@ mod tests {
|
||||||
Arc::clone(&schema),
|
Arc::clone(&schema),
|
||||||
futures::stream::empty(),
|
futures::stream::empty(),
|
||||||
));
|
));
|
||||||
let partition_hash_id = partition.partition_hash_id.clone();
|
let partition_id = partition.transition_partition_id();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
|
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
|
||||||
.await
|
.await
|
||||||
|
@ -231,8 +229,7 @@ mod tests {
|
||||||
Some(ParquetFileParams {
|
Some(ParquetFileParams {
|
||||||
namespace_id: NamespaceId::new(2),
|
namespace_id: NamespaceId::new(2),
|
||||||
table_id: TableId::new(3),
|
table_id: TableId::new(3),
|
||||||
partition_id: PartitionId::new(1),
|
partition_id,
|
||||||
partition_hash_id,
|
|
||||||
object_store_id: Uuid::from_u128(0),
|
object_store_id: Uuid::from_u128(0),
|
||||||
min_time: Timestamp::new(0),
|
min_time: Timestamp::new(0),
|
||||||
max_time: Timestamp::new(0),
|
max_time: Timestamp::new(0),
|
||||||
|
|
|
@ -1,19 +1,35 @@
|
||||||
use std::{collections::HashMap, fmt::Display};
|
use std::{collections::HashMap, fmt::Display};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
|
||||||
use data_types::{ParquetFile, PartitionId};
|
|
||||||
|
|
||||||
use super::PartitionFilesSource;
|
use super::PartitionFilesSource;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use data_types::{ParquetFile, PartitionId, TransitionPartitionId};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct MockPartitionFilesSource {
|
pub struct MockPartitionFilesSource {
|
||||||
files: HashMap<PartitionId, Vec<ParquetFile>>,
|
// This complexity is because we're in the process of moving to partition hash IDs rather than
|
||||||
|
// partition catalog IDs, and Parquet files might only have the partition hash ID on their
|
||||||
|
// record, but the compactor deals with partition catalog IDs because we haven't transitioned
|
||||||
|
// it yet. This should become simpler when the transition is complete.
|
||||||
|
partition_lookup: HashMap<PartitionId, TransitionPartitionId>,
|
||||||
|
file_lookup: HashMap<TransitionPartitionId, Vec<ParquetFile>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MockPartitionFilesSource {
|
impl MockPartitionFilesSource {
|
||||||
#[allow(dead_code)] // not used anywhere
|
#[cfg(test)]
|
||||||
pub fn new(files: HashMap<PartitionId, Vec<ParquetFile>>) -> Self {
|
pub fn new(
|
||||||
Self { files }
|
partition_lookup: HashMap<PartitionId, TransitionPartitionId>,
|
||||||
|
parquet_files: Vec<ParquetFile>,
|
||||||
|
) -> Self {
|
||||||
|
let mut file_lookup: HashMap<TransitionPartitionId, Vec<ParquetFile>> = HashMap::new();
|
||||||
|
for file in parquet_files {
|
||||||
|
let files = file_lookup.entry(file.partition_id.clone()).or_default();
|
||||||
|
files.push(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
partition_lookup,
|
||||||
|
file_lookup,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,46 +41,60 @@ impl Display for MockPartitionFilesSource {
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl PartitionFilesSource for MockPartitionFilesSource {
|
impl PartitionFilesSource for MockPartitionFilesSource {
|
||||||
async fn fetch(&self, partition: PartitionId) -> Vec<ParquetFile> {
|
async fn fetch(&self, partition_id: PartitionId) -> Vec<ParquetFile> {
|
||||||
self.files.get(&partition).cloned().unwrap_or_default()
|
self.partition_lookup
|
||||||
|
.get(&partition_id)
|
||||||
|
.and_then(|partition_hash_id| self.file_lookup.get(partition_hash_id).cloned())
|
||||||
|
.unwrap_or_default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use iox_tests::ParquetFileBuilder;
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_display() {
|
fn test_display() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
MockPartitionFilesSource::new(HashMap::default()).to_string(),
|
MockPartitionFilesSource::new(Default::default(), Default::default()).to_string(),
|
||||||
"mock",
|
"mock",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_fetch() {
|
async fn test_fetch() {
|
||||||
let f_1_1 = ParquetFileBuilder::new(1).with_partition(1).build();
|
let partition_id_1 = PartitionId::new(1);
|
||||||
let f_1_2 = ParquetFileBuilder::new(2).with_partition(1).build();
|
let partition_id_2 = PartitionId::new(2);
|
||||||
let f_2_1 = ParquetFileBuilder::new(3).with_partition(2).build();
|
let partition_identifier_1 = partition_identifier(1);
|
||||||
|
let partition_identifier_2 = partition_identifier(2);
|
||||||
|
let f_1_1 = ParquetFileBuilder::new(1)
|
||||||
|
.with_partition(partition_identifier_1.clone())
|
||||||
|
.build();
|
||||||
|
let f_1_2 = ParquetFileBuilder::new(2)
|
||||||
|
.with_partition(partition_identifier_1.clone())
|
||||||
|
.build();
|
||||||
|
let f_2_1 = ParquetFileBuilder::new(3)
|
||||||
|
.with_partition(partition_identifier_2.clone())
|
||||||
|
.build();
|
||||||
|
|
||||||
let files = HashMap::from([
|
let partition_lookup = HashMap::from([
|
||||||
(PartitionId::new(1), vec![f_1_1.clone(), f_1_2.clone()]),
|
(partition_id_1, partition_identifier_1.clone()),
|
||||||
(PartitionId::new(2), vec![f_2_1.clone()]),
|
(partition_id_2, partition_identifier_2.clone()),
|
||||||
]);
|
]);
|
||||||
let source = MockPartitionFilesSource::new(files);
|
|
||||||
|
let files = vec![f_1_1.clone(), f_1_2.clone(), f_2_1.clone()];
|
||||||
|
let source = MockPartitionFilesSource::new(partition_lookup, files);
|
||||||
|
|
||||||
// different partitions
|
// different partitions
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
source.fetch(PartitionId::new(1)).await,
|
source.fetch(partition_id_1).await,
|
||||||
vec![f_1_1.clone(), f_1_2.clone()],
|
vec![f_1_1.clone(), f_1_2.clone()],
|
||||||
);
|
);
|
||||||
assert_eq!(source.fetch(PartitionId::new(2)).await, vec![f_2_1],);
|
assert_eq!(source.fetch(partition_id_2).await, vec![f_2_1],);
|
||||||
|
|
||||||
// fetching does not drain
|
// fetching does not drain
|
||||||
assert_eq!(source.fetch(PartitionId::new(1)).await, vec![f_1_1, f_1_2],);
|
assert_eq!(source.fetch(partition_id_1).await, vec![f_1_1, f_1_2],);
|
||||||
|
|
||||||
// unknown partition => empty result
|
// unknown partition => empty result
|
||||||
assert_eq!(source.fetch(PartitionId::new(3)).await, vec![],);
|
assert_eq!(source.fetch(PartitionId::new(3)).await, vec![],);
|
||||||
|
|
|
@ -172,7 +172,11 @@ impl RoundInfoSource for LevelBasedRoundInfo {
|
||||||
_partition_info: &PartitionInfo,
|
_partition_info: &PartitionInfo,
|
||||||
files: &[ParquetFile],
|
files: &[ParquetFile],
|
||||||
) -> Result<RoundInfo, DynError> {
|
) -> Result<RoundInfo, DynError> {
|
||||||
let start_level = get_start_level(files);
|
let start_level = get_start_level(
|
||||||
|
files,
|
||||||
|
self.max_num_files_per_plan,
|
||||||
|
self.max_total_file_size_per_plan,
|
||||||
|
);
|
||||||
|
|
||||||
if self.too_many_small_files_to_compact(files, start_level) {
|
if self.too_many_small_files_to_compact(files, start_level) {
|
||||||
return Ok(RoundInfo::ManySmallFiles {
|
return Ok(RoundInfo::ManySmallFiles {
|
||||||
|
@ -187,23 +191,53 @@ impl RoundInfoSource for LevelBasedRoundInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_start_level(files: &[ParquetFile]) -> CompactionLevel {
|
// get_start_level decides what level to start compaction from. Often this is the lowest level
|
||||||
|
// we have ParquetFiles in, but occasionally we decide to compact L1->L2 when L0s still exist.
|
||||||
|
//
|
||||||
|
// If we ignore the invariants (where intra-level overlaps are allowed), this would be a math problem
|
||||||
|
// to optimize write amplification.
|
||||||
|
//
|
||||||
|
// However, allowing intra-level overlaps in L0 but not L1/L2 adds extra challenge to compacting L0s to L1.
|
||||||
|
// This is especially true when there are large quantitites of overlapping L0s and L1s, potentially resulting
|
||||||
|
// in many split/compact cycles to resolve the overlaps.
|
||||||
|
//
|
||||||
|
// Since L1 & L2 only have inter-level overlaps, they can be compacted with just a few splits to align the L1s
|
||||||
|
// with the L2s. The relative ease of moving data from L1 to L2 provides additional motivation to compact the
|
||||||
|
// L1s to L2s when a backlog of L0s exist. The easily solvable L1->L2 compaction can give us a clean slate in
|
||||||
|
// L1, greatly simplifying the remaining L0->L1 compactions.
|
||||||
|
fn get_start_level(files: &[ParquetFile], max_files: usize, max_bytes: usize) -> CompactionLevel {
|
||||||
// panic if the files are empty
|
// panic if the files are empty
|
||||||
assert!(!files.is_empty());
|
assert!(!files.is_empty());
|
||||||
|
|
||||||
// Start with initial level
|
let mut l0_cnt: usize = 0;
|
||||||
// If there are files in this level, itis the start level
|
let mut l0_bytes: usize = 0;
|
||||||
// Otherwise repeat until reaching the final level.
|
let mut l1_bytes: usize = 0;
|
||||||
let mut level = CompactionLevel::Initial;
|
|
||||||
while level != CompactionLevel::Final {
|
for f in files {
|
||||||
if files.iter().any(|f| f.compaction_level == level) {
|
match f.compaction_level {
|
||||||
return level;
|
CompactionLevel::Initial => {
|
||||||
|
l0_cnt += 1;
|
||||||
|
l0_bytes += f.file_size_bytes as usize;
|
||||||
|
}
|
||||||
|
CompactionLevel::FileNonOverlapped => {
|
||||||
|
l1_bytes += f.file_size_bytes as usize;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
level = level.next();
|
if l1_bytes > 3 * max_bytes && (l0_cnt > max_files || l0_bytes > max_bytes) {
|
||||||
|
// L1 is big enough to pose an overlap challenge compacting from L0, and there is quite a bit more coming from L0.
|
||||||
|
// The criteria for this early L1->L2 compaction significanly impacts write amplification. The above values optimize
|
||||||
|
// existing test cases, but may be changed as additional test cases are added.
|
||||||
|
CompactionLevel::FileNonOverlapped
|
||||||
|
} else if l0_bytes > 0 {
|
||||||
|
CompactionLevel::Initial
|
||||||
|
} else if l1_bytes > 0 {
|
||||||
|
CompactionLevel::FileNonOverlapped
|
||||||
|
} else {
|
||||||
|
CompactionLevel::Final
|
||||||
}
|
}
|
||||||
|
|
||||||
level
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_num_overlapped_files(
|
fn get_num_overlapped_files(
|
||||||
|
|
|
@ -301,7 +301,26 @@ pub fn merge_small_l0_chains(
|
||||||
for chain in &chains {
|
for chain in &chains {
|
||||||
let this_chain_bytes = chain.iter().map(|f| f.file_size_bytes as usize).sum();
|
let this_chain_bytes = chain.iter().map(|f| f.file_size_bytes as usize).sum();
|
||||||
|
|
||||||
if prior_chain_bytes > 0 && prior_chain_bytes + this_chain_bytes <= max_compact_size {
|
// matching max_lo_created_at times indicates that the files were deliberately split. We shouldn't merge
|
||||||
|
// chains with matching max_lo_created_at times, because that would encourage undoing the previous split,
|
||||||
|
// which minimally increases write amplification, and may cause unproductive split/compact loops.
|
||||||
|
let mut matches = 0;
|
||||||
|
if prior_chain_bytes > 0 {
|
||||||
|
for f in chain.iter() {
|
||||||
|
for f2 in &merged_chains[prior_chain_idx as usize] {
|
||||||
|
if f.max_l0_created_at == f2.max_l0_created_at {
|
||||||
|
matches += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge it if: there a prior chain to merge with, and merging wouldn't make it too big, or undo a previous split
|
||||||
|
if prior_chain_bytes > 0
|
||||||
|
&& prior_chain_bytes + this_chain_bytes <= max_compact_size
|
||||||
|
&& matches == 0
|
||||||
|
{
|
||||||
// this chain can be added to the prior chain.
|
// this chain can be added to the prior chain.
|
||||||
merged_chains[prior_chain_idx as usize].append(&mut chain.clone());
|
merged_chains[prior_chain_idx as usize].append(&mut chain.clone());
|
||||||
prior_chain_bytes += this_chain_bytes;
|
prior_chain_bytes += this_chain_bytes;
|
||||||
|
|
|
@ -68,8 +68,8 @@ async fn test_num_files_over_limit() {
|
||||||
assert_levels(
|
assert_levels(
|
||||||
&files,
|
&files,
|
||||||
vec![
|
vec![
|
||||||
(8, CompactionLevel::FileNonOverlapped),
|
|
||||||
(9, CompactionLevel::FileNonOverlapped),
|
(9, CompactionLevel::FileNonOverlapped),
|
||||||
|
(10, CompactionLevel::FileNonOverlapped),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -746,97 +746,85 @@ async fn random_backfill_over_l2s() {
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 4 files: L0.76, L0.77, L0.79, L0.80"
|
- " Soft Deleting 4 files: L0.76, L0.77, L0.79, L0.80"
|
||||||
- " Creating 8 files"
|
- " Creating 8 files"
|
||||||
- "**** Simulation run 15, type=compact(ManySmallFiles). 10 Input Files, 200mb total:"
|
- "**** Simulation run 15, type=compact(ManySmallFiles). 2 Input Files, 67mb total:"
|
||||||
- "L0 "
|
- "L0, all files 33mb "
|
||||||
- "L0.75[42,356] 1.04us 33mb|-----------L0.75-----------| "
|
- "L0.75[42,356] 1.04us |-----------------------------------------L0.75------------------------------------------|"
|
||||||
- "L0.86[357,357] 1.04us 0b |L0.86| "
|
- "L0.78[42,356] 1.05us |-----------------------------------------L0.78------------------------------------------|"
|
||||||
- "L0.87[358,670] 1.04us 33mb |-----------L0.87-----------| "
|
- "**** 1 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
||||||
- "L0.84[671,672] 1.04us 109kb |L0.84| "
|
|
||||||
- "L0.85[673,986] 1.04us 33mb |-----------L0.85-----------| "
|
|
||||||
- "L0.78[42,356] 1.05us 33mb|-----------L0.78-----------| "
|
|
||||||
- "L0.90[357,357] 1.05us 0b |L0.90| "
|
|
||||||
- "L0.91[358,670] 1.05us 33mb |-----------L0.91-----------| "
|
|
||||||
- "L0.88[671,672] 1.05us 109kb |L0.88| "
|
|
||||||
- "L0.89[673,986] 1.05us 33mb |-----------L0.89-----------| "
|
|
||||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 200mb total:"
|
|
||||||
- "L0, all files 200mb "
|
|
||||||
- "L0.?[42,986] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
|
||||||
- "Committing partition 1:"
|
|
||||||
- " Soft Deleting 10 files: L0.75, L0.78, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91"
|
|
||||||
- " Creating 1 files"
|
|
||||||
- "**** Simulation run 16, type=split(HighL0OverlapSingleFile)(split_times=[670]). 1 Input Files, 100mb total:"
|
|
||||||
- "L1, all files 100mb "
|
|
||||||
- "L1.82[358,672] 1.03us |-----------------------------------------L1.82------------------------------------------|"
|
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
|
||||||
- "L1 "
|
|
||||||
- "L1.?[358,670] 1.03us 99mb|-----------------------------------------L1.?------------------------------------------| "
|
|
||||||
- "L1.?[671,672] 1.03us 651kb |L1.?|"
|
|
||||||
- "**** Simulation run 17, type=split(HighL0OverlapSingleFile)(split_times=[356]). 1 Input Files, 100mb total:"
|
|
||||||
- "L1, all files 100mb "
|
|
||||||
- "L1.81[42,357] 1.03us |-----------------------------------------L1.81------------------------------------------|"
|
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
|
||||||
- "L1 "
|
|
||||||
- "L1.?[42,356] 1.03us 100mb|-----------------------------------------L1.?------------------------------------------| "
|
|
||||||
- "L1.?[357,357] 1.03us 325kb |L1.?|"
|
|
||||||
- "**** Simulation run 18, type=split(HighL0OverlapSingleFile)(split_times=[356, 670]). 1 Input Files, 200mb total:"
|
|
||||||
- "L0, all files 200mb "
|
|
||||||
- "L0.92[42,986] 1.05us |-----------------------------------------L0.92------------------------------------------|"
|
|
||||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 200mb total:"
|
|
||||||
- "L0 "
|
|
||||||
- "L0.?[42,356] 1.05us 67mb |-----------L0.?------------| "
|
|
||||||
- "L0.?[357,670] 1.05us 66mb |-----------L0.?------------| "
|
|
||||||
- "L0.?[671,986] 1.05us 67mb |------------L0.?------------| "
|
|
||||||
- "Committing partition 1:"
|
|
||||||
- " Soft Deleting 3 files: L1.81, L1.82, L0.92"
|
|
||||||
- " Creating 7 files"
|
|
||||||
- "**** Simulation run 19, type=split(ReduceOverlap)(split_times=[672]). 1 Input Files, 67mb total:"
|
|
||||||
- "L0, all files 67mb "
|
- "L0, all files 67mb "
|
||||||
- "L0.99[671,986] 1.05us |-----------------------------------------L0.99------------------------------------------|"
|
- "L0.?[42,356] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
|
||||||
- "L0 "
|
|
||||||
- "L0.?[671,672] 1.05us 218kb|L0.?| "
|
|
||||||
- "L0.?[673,986] 1.05us 67mb|-----------------------------------------L0.?------------------------------------------| "
|
|
||||||
- "**** Simulation run 20, type=split(ReduceOverlap)(split_times=[357]). 1 Input Files, 66mb total:"
|
|
||||||
- "L0, all files 66mb "
|
|
||||||
- "L0.98[357,670] 1.05us |-----------------------------------------L0.98------------------------------------------|"
|
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
|
||||||
- "L0 "
|
|
||||||
- "L0.?[357,357] 1.05us 0b |L0.?| "
|
|
||||||
- "L0.?[358,670] 1.05us 66mb|-----------------------------------------L0.?------------------------------------------| "
|
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 2 files: L0.98, L0.99"
|
- " Soft Deleting 2 files: L0.75, L0.78"
|
||||||
- " Creating 4 files"
|
- " Creating 1 files"
|
||||||
- "**** Simulation run 21, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[232]). 4 Input Files, 167mb total:"
|
- "**** Simulation run 16, type=compact(ManySmallFiles). 2 Input Files, 66mb total:"
|
||||||
|
- "L0, all files 33mb "
|
||||||
|
- "L0.87[358,670] 1.04us |-----------------------------------------L0.87------------------------------------------|"
|
||||||
|
- "L0.91[358,670] 1.05us |-----------------------------------------L0.91------------------------------------------|"
|
||||||
|
- "**** 1 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
||||||
|
- "L0, all files 66mb "
|
||||||
|
- "L0.?[358,670] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||||
|
- "Committing partition 1:"
|
||||||
|
- " Soft Deleting 2 files: L0.87, L0.91"
|
||||||
|
- " Creating 1 files"
|
||||||
|
- "**** Simulation run 17, type=compact(ManySmallFiles). 2 Input Files, 218kb total:"
|
||||||
|
- "L0, all files 109kb "
|
||||||
|
- "L0.84[671,672] 1.04us |-----------------------------------------L0.84------------------------------------------|"
|
||||||
|
- "L0.88[671,672] 1.05us |-----------------------------------------L0.88------------------------------------------|"
|
||||||
|
- "**** 1 Output Files (parquet_file_id not yet assigned), 218kb total:"
|
||||||
|
- "L0, all files 218kb "
|
||||||
|
- "L0.?[671,672] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||||
|
- "Committing partition 1:"
|
||||||
|
- " Soft Deleting 2 files: L0.84, L0.88"
|
||||||
|
- " Creating 1 files"
|
||||||
|
- "**** Simulation run 18, type=compact(ManySmallFiles). 2 Input Files, 67mb total:"
|
||||||
|
- "L0, all files 33mb "
|
||||||
|
- "L0.85[673,986] 1.04us |-----------------------------------------L0.85------------------------------------------|"
|
||||||
|
- "L0.89[673,986] 1.05us |-----------------------------------------L0.89------------------------------------------|"
|
||||||
|
- "**** 1 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
||||||
|
- "L0, all files 67mb "
|
||||||
|
- "L0.?[673,986] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||||
|
- "Committing partition 1:"
|
||||||
|
- " Soft Deleting 2 files: L0.85, L0.89"
|
||||||
|
- " Creating 1 files"
|
||||||
|
- "**** Simulation run 19, type=compact(ManySmallFiles). 2 Input Files, 0b total:"
|
||||||
|
- "L0, all files 0b "
|
||||||
|
- "L0.86[357,357] 1.04us |-----------------------------------------L0.86------------------------------------------|"
|
||||||
|
- "L0.90[357,357] 1.05us |-----------------------------------------L0.90------------------------------------------|"
|
||||||
|
- "**** 1 Output Files (parquet_file_id not yet assigned), 0b total:"
|
||||||
|
- "L0, all files 0b "
|
||||||
|
- "L0.?[357,357] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||||
|
- "Committing partition 1:"
|
||||||
|
- " Soft Deleting 2 files: L0.86, L0.90"
|
||||||
|
- " Creating 1 files"
|
||||||
|
- "**** Simulation run 20, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[232]). 3 Input Files, 167mb total:"
|
||||||
- "L0 "
|
- "L0 "
|
||||||
- "L0.97[42,356] 1.05us 67mb|-----------------------------------------L0.97-----------------------------------------| "
|
- "L0.92[42,356] 1.05us 67mb|-----------------------------------------L0.92-----------------------------------------| "
|
||||||
- "L0.102[357,357] 1.05us 0b |L0.102|"
|
- "L0.96[357,357] 1.05us 0b |L0.96|"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.95[42,356] 1.03us 100mb|-----------------------------------------L1.95-----------------------------------------| "
|
- "L1.81[42,357] 1.03us 100mb|-----------------------------------------L1.81------------------------------------------|"
|
||||||
- "L1.96[357,357] 1.03us 325kb |L1.96|"
|
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 167mb total:"
|
- "**** 2 Output Files (parquet_file_id not yet assigned), 167mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[42,232] 1.05us 101mb|------------------------L1.?------------------------| "
|
- "L1.?[42,232] 1.05us 101mb|------------------------L1.?------------------------| "
|
||||||
- "L1.?[233,357] 1.05us 66mb |--------------L1.?---------------| "
|
- "L1.?[233,357] 1.05us 66mb |--------------L1.?---------------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 4 files: L1.95, L1.96, L0.97, L0.102"
|
- " Soft Deleting 3 files: L1.81, L0.92, L0.96"
|
||||||
- " Creating 2 files"
|
- " Creating 2 files"
|
||||||
- "**** Simulation run 22, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[547]). 4 Input Files, 166mb total:"
|
- "**** Simulation run 21, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[547]). 3 Input Files, 166mb total:"
|
||||||
- "L0 "
|
- "L0 "
|
||||||
- "L0.103[358,670] 1.05us 66mb|----------------------------------------L0.103-----------------------------------------| "
|
- "L0.93[358,670] 1.05us 66mb|-----------------------------------------L0.93-----------------------------------------| "
|
||||||
- "L0.100[671,672] 1.05us 218kb |L0.100|"
|
- "L0.94[671,672] 1.05us 218kb |L0.94|"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.93[358,670] 1.03us 99mb|-----------------------------------------L1.93-----------------------------------------| "
|
- "L1.82[358,672] 1.03us 100mb|-----------------------------------------L1.82------------------------------------------|"
|
||||||
- "L1.94[671,672] 1.03us 651kb |L1.94|"
|
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 166mb total:"
|
- "**** 2 Output Files (parquet_file_id not yet assigned), 166mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[358,547] 1.05us 100mb|------------------------L1.?------------------------| "
|
- "L1.?[358,547] 1.05us 100mb|------------------------L1.?------------------------| "
|
||||||
- "L1.?[548,672] 1.05us 66mb |--------------L1.?---------------| "
|
- "L1.?[548,672] 1.05us 66mb |--------------L1.?---------------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 4 files: L1.93, L1.94, L0.100, L0.103"
|
- " Soft Deleting 3 files: L1.82, L0.93, L0.94"
|
||||||
- " Creating 2 files"
|
- " Creating 2 files"
|
||||||
- "**** Simulation run 23, type=split(CompactAndSplitOutput(TotalSizeLessThanMaxCompactSize))(split_times=[861]). 2 Input Files, 167mb total:"
|
- "**** Simulation run 22, type=split(CompactAndSplitOutput(TotalSizeLessThanMaxCompactSize))(split_times=[861]). 2 Input Files, 167mb total:"
|
||||||
- "L0 "
|
- "L0 "
|
||||||
- "L0.101[673,986] 1.05us 67mb|-----------------------------------------L0.101-----------------------------------------|"
|
- "L0.95[673,986] 1.05us 67mb|-----------------------------------------L0.95------------------------------------------|"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.83[673,986] 1.03us 100mb|-----------------------------------------L1.83------------------------------------------|"
|
- "L1.83[673,986] 1.03us 100mb|-----------------------------------------L1.83------------------------------------------|"
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 167mb total:"
|
- "**** 2 Output Files (parquet_file_id not yet assigned), 167mb total:"
|
||||||
|
@ -844,60 +832,60 @@ async fn random_backfill_over_l2s() {
|
||||||
- "L1.?[673,861] 1.05us 100mb|------------------------L1.?------------------------| "
|
- "L1.?[673,861] 1.05us 100mb|------------------------L1.?------------------------| "
|
||||||
- "L1.?[862,986] 1.05us 67mb |--------------L1.?---------------| "
|
- "L1.?[862,986] 1.05us 67mb |--------------L1.?---------------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 2 files: L1.83, L0.101"
|
- " Soft Deleting 2 files: L1.83, L0.95"
|
||||||
- " Creating 2 files"
|
- " Creating 2 files"
|
||||||
- "**** Simulation run 24, type=split(ReduceOverlap)(split_times=[399, 499]). 1 Input Files, 100mb total:"
|
- "**** Simulation run 23, type=split(ReduceOverlap)(split_times=[399, 499]). 1 Input Files, 100mb total:"
|
||||||
- "L1, all files 100mb "
|
- "L1, all files 100mb "
|
||||||
- "L1.106[358,547] 1.05us |-----------------------------------------L1.106-----------------------------------------|"
|
- "L1.99[358,547] 1.05us |-----------------------------------------L1.99------------------------------------------|"
|
||||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
- "**** 3 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[358,399] 1.05us 22mb|------L1.?-------| "
|
- "L1.?[358,399] 1.05us 22mb|------L1.?-------| "
|
||||||
- "L1.?[400,499] 1.05us 52mb |--------------------L1.?---------------------| "
|
- "L1.?[400,499] 1.05us 52mb |--------------------L1.?---------------------| "
|
||||||
- "L1.?[500,547] 1.05us 26mb |--------L1.?--------| "
|
- "L1.?[500,547] 1.05us 26mb |--------L1.?--------| "
|
||||||
- "**** Simulation run 25, type=split(ReduceOverlap)(split_times=[299]). 1 Input Files, 66mb total:"
|
- "**** Simulation run 24, type=split(ReduceOverlap)(split_times=[299]). 1 Input Files, 66mb total:"
|
||||||
- "L1, all files 66mb "
|
- "L1, all files 66mb "
|
||||||
- "L1.105[233,357] 1.05us |-----------------------------------------L1.105-----------------------------------------|"
|
- "L1.98[233,357] 1.05us |-----------------------------------------L1.98------------------------------------------|"
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[233,299] 1.05us 35mb|--------------------L1.?---------------------| "
|
- "L1.?[233,299] 1.05us 35mb|--------------------L1.?---------------------| "
|
||||||
- "L1.?[300,357] 1.05us 31mb |-----------------L1.?------------------| "
|
- "L1.?[300,357] 1.05us 31mb |-----------------L1.?------------------| "
|
||||||
- "**** Simulation run 26, type=split(ReduceOverlap)(split_times=[99, 199]). 1 Input Files, 101mb total:"
|
- "**** Simulation run 25, type=split(ReduceOverlap)(split_times=[99, 199]). 1 Input Files, 101mb total:"
|
||||||
- "L1, all files 101mb "
|
- "L1, all files 101mb "
|
||||||
- "L1.104[42,232] 1.05us |-----------------------------------------L1.104-----------------------------------------|"
|
- "L1.97[42,232] 1.05us |-----------------------------------------L1.97------------------------------------------|"
|
||||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 101mb total:"
|
- "**** 3 Output Files (parquet_file_id not yet assigned), 101mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[42,99] 1.05us 30mb |----------L1.?-----------| "
|
- "L1.?[42,99] 1.05us 30mb |----------L1.?-----------| "
|
||||||
- "L1.?[100,199] 1.05us 52mb |--------------------L1.?--------------------| "
|
- "L1.?[100,199] 1.05us 52mb |--------------------L1.?--------------------| "
|
||||||
- "L1.?[200,232] 1.05us 18mb |----L1.?-----| "
|
- "L1.?[200,232] 1.05us 18mb |----L1.?-----| "
|
||||||
- "**** Simulation run 27, type=split(ReduceOverlap)(split_times=[599]). 1 Input Files, 66mb total:"
|
- "**** Simulation run 26, type=split(ReduceOverlap)(split_times=[599]). 1 Input Files, 66mb total:"
|
||||||
- "L1, all files 66mb "
|
- "L1, all files 66mb "
|
||||||
- "L1.107[548,672] 1.05us |-----------------------------------------L1.107-----------------------------------------|"
|
- "L1.100[548,672] 1.05us |-----------------------------------------L1.100-----------------------------------------|"
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[548,599] 1.05us 27mb|---------------L1.?----------------| "
|
- "L1.?[548,599] 1.05us 27mb|---------------L1.?----------------| "
|
||||||
- "L1.?[600,672] 1.05us 39mb |-----------------------L1.?-----------------------| "
|
- "L1.?[600,672] 1.05us 39mb |-----------------------L1.?-----------------------| "
|
||||||
- "**** Simulation run 28, type=split(ReduceOverlap)(split_times=[899]). 1 Input Files, 67mb total:"
|
- "**** Simulation run 27, type=split(ReduceOverlap)(split_times=[899]). 1 Input Files, 67mb total:"
|
||||||
- "L1, all files 67mb "
|
- "L1, all files 67mb "
|
||||||
- "L1.109[862,986] 1.05us |-----------------------------------------L1.109-----------------------------------------|"
|
- "L1.102[862,986] 1.05us |-----------------------------------------L1.102-----------------------------------------|"
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
- "**** 2 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[862,899] 1.05us 20mb|----------L1.?----------| "
|
- "L1.?[862,899] 1.05us 20mb|----------L1.?----------| "
|
||||||
- "L1.?[900,986] 1.05us 47mb |----------------------------L1.?----------------------------| "
|
- "L1.?[900,986] 1.05us 47mb |----------------------------L1.?----------------------------| "
|
||||||
- "**** Simulation run 29, type=split(ReduceOverlap)(split_times=[699, 799]). 1 Input Files, 100mb total:"
|
- "**** Simulation run 28, type=split(ReduceOverlap)(split_times=[699, 799]). 1 Input Files, 100mb total:"
|
||||||
- "L1, all files 100mb "
|
- "L1, all files 100mb "
|
||||||
- "L1.108[673,861] 1.05us |-----------------------------------------L1.108-----------------------------------------|"
|
- "L1.101[673,861] 1.05us |-----------------------------------------L1.101-----------------------------------------|"
|
||||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
- "**** 3 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.?[673,699] 1.05us 14mb|---L1.?---| "
|
- "L1.?[673,699] 1.05us 14mb|---L1.?---| "
|
||||||
- "L1.?[700,799] 1.05us 53mb |--------------------L1.?---------------------| "
|
- "L1.?[700,799] 1.05us 53mb |--------------------L1.?---------------------| "
|
||||||
- "L1.?[800,861] 1.05us 34mb |-----------L1.?------------| "
|
- "L1.?[800,861] 1.05us 34mb |-----------L1.?------------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 6 files: L1.104, L1.105, L1.106, L1.107, L1.108, L1.109"
|
- " Soft Deleting 6 files: L1.97, L1.98, L1.99, L1.100, L1.101, L1.102"
|
||||||
- " Creating 15 files"
|
- " Creating 15 files"
|
||||||
- "**** Simulation run 30, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[71, 142]). 4 Input Files, 283mb total:"
|
- "**** Simulation run 29, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[71, 142]). 4 Input Files, 283mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.115[42,99] 1.05us 30mb |--------L1.115---------| "
|
- "L1.108[42,99] 1.05us 30mb |--------L1.108---------| "
|
||||||
- "L1.116[100,199] 1.05us 52mb |------------------L1.116------------------| "
|
- "L1.109[100,199] 1.05us 52mb |------------------L1.109------------------| "
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.1[0,99] 99ns 100mb |-------------------L2.1-------------------| "
|
- "L2.1[0,99] 99ns 100mb |-------------------L2.1-------------------| "
|
||||||
- "L2.2[100,199] 199ns 100mb |-------------------L2.2-------------------| "
|
- "L2.2[100,199] 199ns 100mb |-------------------L2.2-------------------| "
|
||||||
|
@ -907,13 +895,13 @@ async fn random_backfill_over_l2s() {
|
||||||
- "L2.?[72,142] 1.05us 99mb |------------L2.?-------------| "
|
- "L2.?[72,142] 1.05us 99mb |------------L2.?-------------| "
|
||||||
- "L2.?[143,199] 1.05us 82mb |---------L2.?----------| "
|
- "L2.?[143,199] 1.05us 82mb |---------L2.?----------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 4 files: L2.1, L2.2, L1.115, L1.116"
|
- " Soft Deleting 4 files: L2.1, L2.2, L1.108, L1.109"
|
||||||
- " Creating 3 files"
|
- " Creating 3 files"
|
||||||
- "**** Simulation run 31, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[271, 342]). 5 Input Files, 284mb total:"
|
- "**** Simulation run 30, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[271, 342]). 5 Input Files, 284mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.117[200,232] 1.05us 18mb|---L1.117---| "
|
- "L1.110[200,232] 1.05us 18mb|---L1.110---| "
|
||||||
- "L1.113[233,299] 1.05us 35mb |----------L1.113-----------| "
|
- "L1.106[233,299] 1.05us 35mb |----------L1.106-----------| "
|
||||||
- "L1.114[300,357] 1.05us 31mb |--------L1.114---------| "
|
- "L1.107[300,357] 1.05us 31mb |--------L1.107---------| "
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.3[200,299] 299ns 100mb|-------------------L2.3-------------------| "
|
- "L2.3[200,299] 299ns 100mb|-------------------L2.3-------------------| "
|
||||||
- "L2.4[300,399] 399ns 100mb |-------------------L2.4-------------------| "
|
- "L2.4[300,399] 399ns 100mb |-------------------L2.4-------------------| "
|
||||||
|
@ -923,14 +911,14 @@ async fn random_backfill_over_l2s() {
|
||||||
- "L2.?[272,342] 1.05us 100mb |------------L2.?-------------| "
|
- "L2.?[272,342] 1.05us 100mb |------------L2.?-------------| "
|
||||||
- "L2.?[343,399] 1.05us 83mb |---------L2.?----------| "
|
- "L2.?[343,399] 1.05us 83mb |---------L2.?----------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 5 files: L2.3, L2.4, L1.113, L1.114, L1.117"
|
- " Soft Deleting 5 files: L2.3, L2.4, L1.106, L1.107, L1.110"
|
||||||
- " Creating 3 files"
|
- " Creating 3 files"
|
||||||
- "**** Simulation run 32, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[404, 465]). 4 Input Files, 257mb total:"
|
- "**** Simulation run 31, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[404, 465]). 4 Input Files, 257mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.110[358,399] 1.05us 22mb |-------L1.110--------| "
|
- "L1.103[358,399] 1.05us 22mb |-------L1.103--------| "
|
||||||
- "L1.111[400,499] 1.05us 52mb |------------------------L1.111-------------------------| "
|
- "L1.104[400,499] 1.05us 52mb |------------------------L1.104-------------------------| "
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.130[343,399] 1.05us 83mb|------------L2.130------------| "
|
- "L2.123[343,399] 1.05us 83mb|------------L2.123------------| "
|
||||||
- "L2.5[400,499] 499ns 100mb |-------------------------L2.5--------------------------| "
|
- "L2.5[400,499] 499ns 100mb |-------------------------L2.5--------------------------| "
|
||||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 257mb total:"
|
- "**** 3 Output Files (parquet_file_id not yet assigned), 257mb total:"
|
||||||
- "L2 "
|
- "L2 "
|
||||||
|
@ -938,13 +926,13 @@ async fn random_backfill_over_l2s() {
|
||||||
- "L2.?[405,465] 1.05us 99mb |--------------L2.?--------------| "
|
- "L2.?[405,465] 1.05us 99mb |--------------L2.?--------------| "
|
||||||
- "L2.?[466,499] 1.05us 58mb |------L2.?-------| "
|
- "L2.?[466,499] 1.05us 58mb |------L2.?-------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 4 files: L2.5, L1.110, L1.111, L2.130"
|
- " Soft Deleting 4 files: L2.5, L1.103, L1.104, L2.123"
|
||||||
- " Creating 3 files"
|
- " Creating 3 files"
|
||||||
- "**** Simulation run 33, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[569, 638]). 5 Input Files, 292mb total:"
|
- "**** Simulation run 32, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[569, 638]). 5 Input Files, 292mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.112[500,547] 1.05us 26mb|------L1.112-------| "
|
- "L1.105[500,547] 1.05us 26mb|------L1.105-------| "
|
||||||
- "L1.118[548,599] 1.05us 27mb |-------L1.118--------| "
|
- "L1.111[548,599] 1.05us 27mb |-------L1.111--------| "
|
||||||
- "L1.119[600,672] 1.05us 39mb |------------L1.119------------| "
|
- "L1.112[600,672] 1.05us 39mb |------------L1.112------------| "
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.6[500,599] 599ns 100mb|-------------------L2.6-------------------| "
|
- "L2.6[500,599] 599ns 100mb|-------------------L2.6-------------------| "
|
||||||
- "L2.7[600,699] 699ns 100mb |-------------------L2.7-------------------| "
|
- "L2.7[600,699] 699ns 100mb |-------------------L2.7-------------------| "
|
||||||
|
@ -954,14 +942,14 @@ async fn random_backfill_over_l2s() {
|
||||||
- "L2.?[570,638] 1.05us 100mb |------------L2.?------------| "
|
- "L2.?[570,638] 1.05us 100mb |------------L2.?------------| "
|
||||||
- "L2.?[639,699] 1.05us 91mb |----------L2.?-----------| "
|
- "L2.?[639,699] 1.05us 91mb |----------L2.?-----------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 5 files: L2.6, L2.7, L1.112, L1.118, L1.119"
|
- " Soft Deleting 5 files: L2.6, L2.7, L1.105, L1.111, L1.112"
|
||||||
- " Creating 3 files"
|
- " Creating 3 files"
|
||||||
- "**** Simulation run 34, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[702, 765]). 4 Input Files, 258mb total:"
|
- "**** Simulation run 33, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[702, 765]). 4 Input Files, 258mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.122[673,699] 1.05us 14mb |---L1.122---| "
|
- "L1.115[673,699] 1.05us 14mb |---L1.115---| "
|
||||||
- "L1.123[700,799] 1.05us 53mb |-----------------------L1.123------------------------| "
|
- "L1.116[700,799] 1.05us 53mb |-----------------------L1.116------------------------| "
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.136[639,699] 1.05us 91mb|------------L2.136-------------| "
|
- "L2.129[639,699] 1.05us 91mb|------------L2.129-------------| "
|
||||||
- "L2.8[700,799] 799ns 100mb |------------------------L2.8-------------------------| "
|
- "L2.8[700,799] 799ns 100mb |------------------------L2.8-------------------------| "
|
||||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 258mb total:"
|
- "**** 3 Output Files (parquet_file_id not yet assigned), 258mb total:"
|
||||||
- "L2 "
|
- "L2 "
|
||||||
|
@ -969,12 +957,12 @@ async fn random_backfill_over_l2s() {
|
||||||
- "L2.?[703,765] 1.05us 100mb |--------------L2.?--------------| "
|
- "L2.?[703,765] 1.05us 100mb |--------------L2.?--------------| "
|
||||||
- "L2.?[766,799] 1.05us 56mb |------L2.?------| "
|
- "L2.?[766,799] 1.05us 56mb |------L2.?------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 4 files: L2.8, L1.122, L1.123, L2.136"
|
- " Soft Deleting 4 files: L2.8, L1.115, L1.116, L2.129"
|
||||||
- " Creating 3 files"
|
- " Creating 3 files"
|
||||||
- "**** Simulation run 35, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[865]). 3 Input Files, 154mb total:"
|
- "**** Simulation run 34, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[865]). 3 Input Files, 154mb total:"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.124[800,861] 1.05us 34mb|-----------------------L1.124------------------------| "
|
- "L1.117[800,861] 1.05us 34mb|-----------------------L1.117------------------------| "
|
||||||
- "L1.120[862,899] 1.05us 20mb |------------L1.120-------------| "
|
- "L1.113[862,899] 1.05us 20mb |------------L1.113-------------| "
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.9[800,899] 899ns 100mb|-----------------------------------------L2.9------------------------------------------| "
|
- "L2.9[800,899] 899ns 100mb|-----------------------------------------L2.9------------------------------------------| "
|
||||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 154mb total:"
|
- "**** 2 Output Files (parquet_file_id not yet assigned), 154mb total:"
|
||||||
|
@ -982,28 +970,28 @@ async fn random_backfill_over_l2s() {
|
||||||
- "L2.?[800,865] 1.05us 101mb|--------------------------L2.?---------------------------| "
|
- "L2.?[800,865] 1.05us 101mb|--------------------------L2.?---------------------------| "
|
||||||
- "L2.?[866,899] 1.05us 53mb |-----------L2.?------------| "
|
- "L2.?[866,899] 1.05us 53mb |-----------L2.?------------| "
|
||||||
- "Committing partition 1:"
|
- "Committing partition 1:"
|
||||||
- " Soft Deleting 3 files: L2.9, L1.120, L1.124"
|
- " Soft Deleting 3 files: L2.9, L1.113, L1.117"
|
||||||
- " Creating 2 files"
|
- " Creating 2 files"
|
||||||
- "**** Final Output Files (4.58gb written)"
|
- "**** Final Output Files (4.06gb written)"
|
||||||
- "L1 "
|
- "L1 "
|
||||||
- "L1.121[900,986] 1.05us 47mb |L1.121| "
|
- "L1.114[900,986] 1.05us 47mb |L1.114| "
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.10[900,999] 999ns 100mb |L2.10-| "
|
- "L2.10[900,999] 999ns 100mb |L2.10-| "
|
||||||
- "L2.125[0,71] 1.05us 101mb|L2.125| "
|
- "L2.118[0,71] 1.05us 101mb|L2.118| "
|
||||||
- "L2.126[72,142] 1.05us 99mb |L2.126| "
|
- "L2.119[72,142] 1.05us 99mb |L2.119| "
|
||||||
- "L2.127[143,199] 1.05us 82mb |L2.127| "
|
- "L2.120[143,199] 1.05us 82mb |L2.120| "
|
||||||
- "L2.128[200,271] 1.05us 101mb |L2.128| "
|
- "L2.121[200,271] 1.05us 101mb |L2.121| "
|
||||||
- "L2.129[272,342] 1.05us 100mb |L2.129| "
|
- "L2.122[272,342] 1.05us 100mb |L2.122| "
|
||||||
- "L2.131[343,404] 1.05us 100mb |L2.131| "
|
- "L2.124[343,404] 1.05us 100mb |L2.124| "
|
||||||
- "L2.132[405,465] 1.05us 99mb |L2.132| "
|
- "L2.125[405,465] 1.05us 99mb |L2.125| "
|
||||||
- "L2.133[466,499] 1.05us 58mb |L2.133| "
|
- "L2.126[466,499] 1.05us 58mb |L2.126| "
|
||||||
- "L2.134[500,569] 1.05us 101mb |L2.134| "
|
- "L2.127[500,569] 1.05us 101mb |L2.127| "
|
||||||
- "L2.135[570,638] 1.05us 100mb |L2.135| "
|
- "L2.128[570,638] 1.05us 100mb |L2.128| "
|
||||||
- "L2.137[639,702] 1.05us 101mb |L2.137| "
|
- "L2.130[639,702] 1.05us 101mb |L2.130| "
|
||||||
- "L2.138[703,765] 1.05us 100mb |L2.138| "
|
- "L2.131[703,765] 1.05us 100mb |L2.131| "
|
||||||
- "L2.139[766,799] 1.05us 56mb |L2.139| "
|
- "L2.132[766,799] 1.05us 56mb |L2.132| "
|
||||||
- "L2.140[800,865] 1.05us 101mb |L2.140| "
|
- "L2.133[800,865] 1.05us 101mb |L2.133| "
|
||||||
- "L2.141[866,899] 1.05us 53mb |L2.141| "
|
- "L2.134[866,899] 1.05us 53mb |L2.134| "
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -3020,63 +3008,66 @@ async fn actual_case_from_catalog_1() {
|
||||||
- "WARNING: file L0.161[327,333] 336ns 183mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L0.161[327,333] 336ns 183mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L0.162[330,338] 340ns 231mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L0.162[330,338] 340ns 231mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L0.163[331,338] 341ns 232mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L0.163[331,338] 341ns 232mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "**** Final Output Files (17.64gb written)"
|
- "**** Final Output Files (15.47gb written)"
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.578[134,149] 342ns 202mb |L2.578| "
|
- "L2.594[150,165] 342ns 218mb |L2.594| "
|
||||||
- "L2.579[150,165] 342ns 218mb |L2.579| "
|
- "L2.595[166,171] 342ns 118mb |L2.595| "
|
||||||
- "L2.580[166,176] 342ns 186mb |L2.580| "
|
- "L2.598[183,197] 342ns 267mb |L2.598| "
|
||||||
- "L2.581[177,182] 342ns 150mb |L2.581| "
|
- "L2.599[198,207] 342ns 157mb |L2.599| "
|
||||||
- "L2.582[183,197] 342ns 267mb |L2.582| "
|
- "L2.600[208,220] 342ns 147mb |L2.600| "
|
||||||
- "L2.583[198,207] 342ns 157mb |L2.583| "
|
- "L2.601[221,232] 342ns 270mb |L2.601| "
|
||||||
- "L2.584[208,220] 342ns 147mb |L2.584| "
|
- "L2.602[233,244] 342ns 147mb |L2.602| "
|
||||||
- "L2.585[221,232] 342ns 270mb |L2.585| "
|
- "L2.603[245,253] 342ns 139mb |L2.603| "
|
||||||
- "L2.588[233,253] 342ns 286mb |L2.588| "
|
- "L2.604[271,276] 342ns 117mb |L2.604| "
|
||||||
- "L2.589[254,270] 342ns 289mb |L2.589| "
|
- "L2.605[277,281] 342ns 109mb |L2.605| "
|
||||||
- "L2.590[271,281] 342ns 225mb |L2.590| "
|
- "L2.612[254,261] 342ns 105mb |L2.612| "
|
||||||
- "L2.591[282,296] 342ns 234mb |L2.591| "
|
- "L2.613[262,270] 342ns 184mb |L2.613| "
|
||||||
- "L2.592[297,302] 342ns 232mb |L2.592| "
|
- "L2.616[309,311] 342ns 101mb |L2.616|"
|
||||||
- "L2.593[303,308] 342ns 244mb |L2.593| "
|
- "L2.617[312,314] 342ns 181mb |L2.617|"
|
||||||
- "L2.594[309,314] 342ns 282mb |L2.594|"
|
- "L2.618[315,317] 342ns 214mb |L2.618|"
|
||||||
- "L2.595[315,317] 342ns 214mb |L2.595|"
|
- "L2.619[318,320] 342ns 222mb |L2.619|"
|
||||||
- "L2.596[318,320] 342ns 222mb |L2.596|"
|
- "L2.620[321,323] 342ns 146mb |L2.620|"
|
||||||
- "L2.597[321,323] 342ns 146mb |L2.597|"
|
- "L2.621[324,326] 342ns 254mb |L2.621|"
|
||||||
- "L2.598[324,326] 342ns 254mb |L2.598|"
|
- "L2.622[327,329] 342ns 197mb |L2.622|"
|
||||||
- "L2.599[327,329] 342ns 197mb |L2.599|"
|
- "L2.623[330,332] 342ns 228mb |L2.623|"
|
||||||
- "L2.600[330,332] 342ns 228mb |L2.600|"
|
- "L2.624[333,335] 342ns 199mb |L2.624|"
|
||||||
- "L2.601[333,335] 342ns 199mb |L2.601|"
|
- "L2.625[336,337] 342ns 156mb |L2.625|"
|
||||||
- "L2.602[336,338] 342ns 280mb |L2.602|"
|
- "L2.626[338,338] 342ns 124mb |L2.626|"
|
||||||
- "L2.850[1,26] 342ns 101mb |L2.850| "
|
- "L2.628[1,36] 342ns 103mb |L2.628-| "
|
||||||
- "L2.853[69,85] 342ns 104mb |L2.853| "
|
- "L2.629[37,71] 342ns 103mb |L2.629-| "
|
||||||
- "L2.854[86,98] 342ns 107mb |L2.854| "
|
- "L2.630[72,83] 342ns 103mb |L2.630| "
|
||||||
- "L2.861[27,48] 342ns 103mb |L2.861| "
|
- "L2.638[172,177] 342ns 109mb |L2.638| "
|
||||||
- "L2.862[49,68] 342ns 98mb |L2.862| "
|
- "L2.639[178,182] 342ns 109mb |L2.639| "
|
||||||
- "L2.863[99,108] 342ns 102mb |L2.863| "
|
- "L2.640[282,288] 342ns 100mb |L2.640| "
|
||||||
- "L2.864[109,117] 342ns 91mb |L2.864| "
|
- "L2.643[300,303] 342ns 110mb |L2.643| "
|
||||||
- "L2.865[118,124] 342ns 91mb |L2.865| "
|
- "L2.646[84,94] 342ns 107mb |L2.646| "
|
||||||
- "L2.866[125,130] 342ns 107mb |L2.866| "
|
- "L2.647[95,104] 342ns 97mb |L2.647| "
|
||||||
- "L2.867[131,133] 342ns 64mb |L2.867| "
|
- "L2.648[105,111] 342ns 86mb |L2.648| "
|
||||||
- "L2.868[339,339] 342ns 25mb |L2.868|"
|
- "L2.649[112,119] 342ns 114mb |L2.649| "
|
||||||
- "WARNING: file L2.578[134,149] 342ns 202mb exceeds soft limit 100mb by more than 50%"
|
- "L2.650[120,126] 342ns 98mb |L2.650| "
|
||||||
- "WARNING: file L2.579[150,165] 342ns 218mb exceeds soft limit 100mb by more than 50%"
|
- "L2.651[127,130] 342ns 82mb |L2.651| "
|
||||||
- "WARNING: file L2.580[166,176] 342ns 186mb exceeds soft limit 100mb by more than 50%"
|
- "L2.652[131,138] 342ns 108mb |L2.652| "
|
||||||
- "WARNING: file L2.581[177,182] 342ns 150mb exceeds soft limit 100mb by more than 50%"
|
- "L2.653[139,145] 342ns 93mb |L2.653| "
|
||||||
- "WARNING: file L2.582[183,197] 342ns 267mb exceeds soft limit 100mb by more than 50%"
|
- "L2.654[146,149] 342ns 77mb |L2.654| "
|
||||||
- "WARNING: file L2.583[198,207] 342ns 157mb exceeds soft limit 100mb by more than 50%"
|
- "L2.655[289,293] 342ns 110mb |L2.655| "
|
||||||
- "WARNING: file L2.585[221,232] 342ns 270mb exceeds soft limit 100mb by more than 50%"
|
- "L2.656[294,297] 342ns 82mb |L2.656| "
|
||||||
- "WARNING: file L2.588[233,253] 342ns 286mb exceeds soft limit 100mb by more than 50%"
|
- "L2.657[298,299] 342ns 82mb |L2.657| "
|
||||||
- "WARNING: file L2.589[254,270] 342ns 289mb exceeds soft limit 100mb by more than 50%"
|
- "L2.658[304,306] 342ns 113mb |L2.658| "
|
||||||
- "WARNING: file L2.590[271,281] 342ns 225mb exceeds soft limit 100mb by more than 50%"
|
- "L2.659[307,308] 342ns 113mb |L2.659| "
|
||||||
- "WARNING: file L2.591[282,296] 342ns 234mb exceeds soft limit 100mb by more than 50%"
|
- "L2.660[339,339] 342ns 25mb |L2.660|"
|
||||||
- "WARNING: file L2.592[297,302] 342ns 232mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.594[150,165] 342ns 218mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.593[303,308] 342ns 244mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.598[183,197] 342ns 267mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.594[309,314] 342ns 282mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.599[198,207] 342ns 157mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.595[315,317] 342ns 214mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.601[221,232] 342ns 270mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.596[318,320] 342ns 222mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.613[262,270] 342ns 184mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.598[324,326] 342ns 254mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.617[312,314] 342ns 181mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.599[327,329] 342ns 197mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.618[315,317] 342ns 214mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.600[330,332] 342ns 228mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.619[318,320] 342ns 222mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.601[333,335] 342ns 199mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.621[324,326] 342ns 254mb exceeds soft limit 100mb by more than 50%"
|
||||||
- "WARNING: file L2.602[336,338] 342ns 280mb exceeds soft limit 100mb by more than 50%"
|
- "WARNING: file L2.622[327,329] 342ns 197mb exceeds soft limit 100mb by more than 50%"
|
||||||
|
- "WARNING: file L2.623[330,332] 342ns 228mb exceeds soft limit 100mb by more than 50%"
|
||||||
|
- "WARNING: file L2.624[333,335] 342ns 199mb exceeds soft limit 100mb by more than 50%"
|
||||||
|
- "WARNING: file L2.625[336,337] 342ns 156mb exceeds soft limit 100mb by more than 50%"
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4670,17 +4670,17 @@ async fn l0s_almost_needing_vertical_split() {
|
||||||
- "L0.998[24,100] 1.02us |-----------------------------------------L0.998-----------------------------------------|"
|
- "L0.998[24,100] 1.02us |-----------------------------------------L0.998-----------------------------------------|"
|
||||||
- "L0.999[24,100] 1.02us |-----------------------------------------L0.999-----------------------------------------|"
|
- "L0.999[24,100] 1.02us |-----------------------------------------L0.999-----------------------------------------|"
|
||||||
- "L0.1000[24,100] 1.02us |----------------------------------------L0.1000-----------------------------------------|"
|
- "L0.1000[24,100] 1.02us |----------------------------------------L0.1000-----------------------------------------|"
|
||||||
- "**** Final Output Files (6.5gb written)"
|
- "**** Final Output Files (5.23gb written)"
|
||||||
- "L2 "
|
- "L2 "
|
||||||
- "L2.3141[24,37] 1.02us 108mb|---L2.3141---| "
|
- "L2.3086[24,35] 1.02us 102mb|--L2.3086--| "
|
||||||
- "L2.3150[38,49] 1.02us 102mb |--L2.3150--| "
|
- "L2.3095[36,47] 1.02us 105mb |--L2.3095--| "
|
||||||
- "L2.3151[50,60] 1.02us 93mb |-L2.3151-| "
|
- "L2.3096[48,58] 1.02us 95mb |-L2.3096-| "
|
||||||
- "L2.3152[61,63] 1.02us 37mb |L2.3152| "
|
- "L2.3097[59,65] 1.02us 76mb |L2.3097| "
|
||||||
- "L2.3153[64,73] 1.02us 101mb |L2.3153-| "
|
- "L2.3098[66,76] 1.02us 106mb |-L2.3098-| "
|
||||||
- "L2.3154[74,82] 1.02us 90mb |L2.3154| "
|
- "L2.3099[77,86] 1.02us 96mb |L2.3099-| "
|
||||||
- "L2.3155[83,90] 1.02us 101mb |L2.3155| "
|
- "L2.3100[87,90] 1.02us 53mb |L2.3100| "
|
||||||
- "L2.3156[91,98] 1.02us 93mb |L2.3156| "
|
- "L2.3101[91,98] 1.02us 90mb |L2.3101| "
|
||||||
- "L2.3157[99,100] 1.02us 26mb |L2.3157|"
|
- "L2.3102[99,100] 1.02us 26mb |L2.3102|"
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -78,14 +78,12 @@ where
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use assert_matches::assert_matches;
|
|
||||||
use test_helpers::tracing::TracingCapture;
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::commit::mock::{CommitHistoryEntry, MockCommit};
|
use crate::commit::mock::{CommitHistoryEntry, MockCommit};
|
||||||
use iox_tests::ParquetFileBuilder;
|
use assert_matches::assert_matches;
|
||||||
|
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use test_helpers::tracing::TracingCapture;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_display() {
|
fn test_display() {
|
||||||
|
@ -111,14 +109,21 @@ mod tests {
|
||||||
.with_row_count(105)
|
.with_row_count(105)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
let created_1 = ParquetFileBuilder::new(1000).with_partition(1).build();
|
let partition_id_1 = PartitionId::new(1);
|
||||||
let created_2 = ParquetFileBuilder::new(1001).with_partition(1).build();
|
let transition_partition_id_1 = partition_identifier(1);
|
||||||
|
|
||||||
|
let created_1 = ParquetFileBuilder::new(1000)
|
||||||
|
.with_partition(transition_partition_id_1.clone())
|
||||||
|
.build();
|
||||||
|
let created_2 = ParquetFileBuilder::new(1001)
|
||||||
|
.with_partition(transition_partition_id_1)
|
||||||
|
.build();
|
||||||
|
|
||||||
let capture = TracingCapture::new();
|
let capture = TracingCapture::new();
|
||||||
|
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(1),
|
partition_id_1,
|
||||||
&[existing_1.clone()],
|
&[existing_1.clone()],
|
||||||
&[],
|
&[],
|
||||||
&[created_1.clone().into(), created_2.clone().into()],
|
&[created_1.clone().into(), created_2.clone().into()],
|
||||||
|
@ -130,9 +135,11 @@ mod tests {
|
||||||
Ok(res) if res == vec![ParquetFileId::new(1000), ParquetFileId::new(1001)]
|
Ok(res) if res == vec![ParquetFileId::new(1000), ParquetFileId::new(1001)]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let partition_id_2 = PartitionId::new(2);
|
||||||
|
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(2),
|
partition_id_2,
|
||||||
&[existing_2.clone(), existing_3.clone()],
|
&[existing_2.clone(), existing_3.clone()],
|
||||||
&[existing_1.clone()],
|
&[existing_1.clone()],
|
||||||
&[],
|
&[],
|
||||||
|
@ -151,14 +158,14 @@ level = INFO; message = committed parquet file change; target_level = Final; par
|
||||||
inner.history(),
|
inner.history(),
|
||||||
vec![
|
vec![
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(1),
|
partition_id: partition_id_1,
|
||||||
delete: vec![existing_1.clone()],
|
delete: vec![existing_1.clone()],
|
||||||
upgrade: vec![],
|
upgrade: vec![],
|
||||||
created: vec![created_1, created_2],
|
created: vec![created_1, created_2],
|
||||||
target_level: CompactionLevel::Final,
|
target_level: CompactionLevel::Final,
|
||||||
},
|
},
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(2),
|
partition_id: partition_id_2,
|
||||||
delete: vec![existing_2, existing_3],
|
delete: vec![existing_2, existing_3],
|
||||||
upgrade: vec![existing_1],
|
upgrade: vec![existing_1],
|
||||||
created: vec![],
|
created: vec![],
|
||||||
|
|
|
@ -303,15 +303,12 @@ where
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use assert_matches::assert_matches;
|
|
||||||
use metric::{assert_histogram, Attributes};
|
|
||||||
|
|
||||||
use crate::commit::mock::{CommitHistoryEntry, MockCommit};
|
|
||||||
use iox_tests::ParquetFileBuilder;
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::commit::mock::{CommitHistoryEntry, MockCommit};
|
||||||
|
use assert_matches::assert_matches;
|
||||||
|
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||||
|
use metric::{assert_histogram, Attributes};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_display() {
|
fn test_display() {
|
||||||
|
@ -326,6 +323,9 @@ mod tests {
|
||||||
let inner = Arc::new(MockCommit::new());
|
let inner = Arc::new(MockCommit::new());
|
||||||
let commit = MetricsCommitWrapper::new(Arc::clone(&inner), ®istry);
|
let commit = MetricsCommitWrapper::new(Arc::clone(&inner), ®istry);
|
||||||
|
|
||||||
|
let partition_id_1 = PartitionId::new(1);
|
||||||
|
let transition_partition_id_1 = partition_identifier(1);
|
||||||
|
|
||||||
let existing_1 = ParquetFileBuilder::new(1)
|
let existing_1 = ParquetFileBuilder::new(1)
|
||||||
.with_file_size_bytes(10_001)
|
.with_file_size_bytes(10_001)
|
||||||
.with_row_count(1_001)
|
.with_row_count(1_001)
|
||||||
|
@ -350,7 +350,7 @@ mod tests {
|
||||||
let created = ParquetFileBuilder::new(1000)
|
let created = ParquetFileBuilder::new(1000)
|
||||||
.with_file_size_bytes(10_016)
|
.with_file_size_bytes(10_016)
|
||||||
.with_row_count(1_016)
|
.with_row_count(1_016)
|
||||||
.with_partition(1)
|
.with_partition(transition_partition_id_1)
|
||||||
.with_compaction_level(CompactionLevel::Initial)
|
.with_compaction_level(CompactionLevel::Initial)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -392,7 +392,7 @@ mod tests {
|
||||||
|
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(1),
|
partition_id_1,
|
||||||
&[existing_1.clone()],
|
&[existing_1.clone()],
|
||||||
&[existing_2a.clone()],
|
&[existing_2a.clone()],
|
||||||
&[created.clone().into()],
|
&[created.clone().into()],
|
||||||
|
@ -401,9 +401,11 @@ mod tests {
|
||||||
.await;
|
.await;
|
||||||
assert_matches!(ids, Ok(res) if res == vec![ParquetFileId::new(1000)]);
|
assert_matches!(ids, Ok(res) if res == vec![ParquetFileId::new(1000)]);
|
||||||
|
|
||||||
|
let partition_id_2 = PartitionId::new(2);
|
||||||
|
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(2),
|
partition_id_2,
|
||||||
&[existing_2b.clone(), existing_3.clone()],
|
&[existing_2b.clone(), existing_3.clone()],
|
||||||
&[existing_4.clone()],
|
&[existing_4.clone()],
|
||||||
&[],
|
&[],
|
||||||
|
@ -449,14 +451,14 @@ mod tests {
|
||||||
inner.history(),
|
inner.history(),
|
||||||
vec![
|
vec![
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(1),
|
partition_id: partition_id_1,
|
||||||
delete: vec![existing_1],
|
delete: vec![existing_1],
|
||||||
upgrade: vec![existing_2a.clone()],
|
upgrade: vec![existing_2a.clone()],
|
||||||
created: vec![created],
|
created: vec![created],
|
||||||
target_level: CompactionLevel::FileNonOverlapped,
|
target_level: CompactionLevel::FileNonOverlapped,
|
||||||
},
|
},
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(2),
|
partition_id: partition_id_2,
|
||||||
delete: vec![existing_2b, existing_3],
|
delete: vec![existing_2b, existing_3],
|
||||||
upgrade: vec![existing_4],
|
upgrade: vec![existing_4],
|
||||||
created: vec![],
|
created: vec![],
|
||||||
|
|
|
@ -78,10 +78,9 @@ impl Commit for MockCommit {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use assert_matches::assert_matches;
|
|
||||||
use iox_tests::ParquetFileBuilder;
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use assert_matches::assert_matches;
|
||||||
|
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_display() {
|
fn test_display() {
|
||||||
|
@ -92,6 +91,11 @@ mod tests {
|
||||||
async fn test_commit() {
|
async fn test_commit() {
|
||||||
let commit = MockCommit::new();
|
let commit = MockCommit::new();
|
||||||
|
|
||||||
|
let partition_id_1 = PartitionId::new(1);
|
||||||
|
let transition_partition_id_1 = partition_identifier(1);
|
||||||
|
let partition_id_2 = PartitionId::new(2);
|
||||||
|
let transition_partition_id_2 = partition_identifier(2);
|
||||||
|
|
||||||
let existing_1 = ParquetFileBuilder::new(1).build();
|
let existing_1 = ParquetFileBuilder::new(1).build();
|
||||||
let existing_2 = ParquetFileBuilder::new(2).build();
|
let existing_2 = ParquetFileBuilder::new(2).build();
|
||||||
let existing_3 = ParquetFileBuilder::new(3).build();
|
let existing_3 = ParquetFileBuilder::new(3).build();
|
||||||
|
@ -101,14 +105,22 @@ mod tests {
|
||||||
let existing_7 = ParquetFileBuilder::new(7).build();
|
let existing_7 = ParquetFileBuilder::new(7).build();
|
||||||
let existing_8 = ParquetFileBuilder::new(8).build();
|
let existing_8 = ParquetFileBuilder::new(8).build();
|
||||||
|
|
||||||
let created_1_1 = ParquetFileBuilder::new(1000).with_partition(1).build();
|
let created_1_1 = ParquetFileBuilder::new(1000)
|
||||||
let created_1_2 = ParquetFileBuilder::new(1001).with_partition(1).build();
|
.with_partition(transition_partition_id_1.clone())
|
||||||
let created_1_3 = ParquetFileBuilder::new(1003).with_partition(1).build();
|
.build();
|
||||||
let created_2_1 = ParquetFileBuilder::new(1002).with_partition(2).build();
|
let created_1_2 = ParquetFileBuilder::new(1001)
|
||||||
|
.with_partition(transition_partition_id_1.clone())
|
||||||
|
.build();
|
||||||
|
let created_1_3 = ParquetFileBuilder::new(1003)
|
||||||
|
.with_partition(transition_partition_id_1)
|
||||||
|
.build();
|
||||||
|
let created_2_1 = ParquetFileBuilder::new(1002)
|
||||||
|
.with_partition(transition_partition_id_2)
|
||||||
|
.build();
|
||||||
|
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(1),
|
partition_id_1,
|
||||||
&[existing_1.clone(), existing_2.clone()],
|
&[existing_1.clone(), existing_2.clone()],
|
||||||
&[existing_3.clone(), existing_4.clone()],
|
&[existing_3.clone(), existing_4.clone()],
|
||||||
&[created_1_1.clone().into(), created_1_2.clone().into()],
|
&[created_1_1.clone().into(), created_1_2.clone().into()],
|
||||||
|
@ -122,7 +134,7 @@ mod tests {
|
||||||
|
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(2),
|
partition_id_2,
|
||||||
&[existing_3.clone()],
|
&[existing_3.clone()],
|
||||||
&[],
|
&[],
|
||||||
&[created_2_1.clone().into()],
|
&[created_2_1.clone().into()],
|
||||||
|
@ -136,7 +148,7 @@ mod tests {
|
||||||
|
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(1),
|
partition_id_1,
|
||||||
&[existing_5.clone(), existing_6.clone(), existing_7.clone()],
|
&[existing_5.clone(), existing_6.clone(), existing_7.clone()],
|
||||||
&[],
|
&[],
|
||||||
&[created_1_3.clone().into()],
|
&[created_1_3.clone().into()],
|
||||||
|
@ -151,7 +163,7 @@ mod tests {
|
||||||
// simulate fill implosion of the file (this may happen w/ delete predicates)
|
// simulate fill implosion of the file (this may happen w/ delete predicates)
|
||||||
let ids = commit
|
let ids = commit
|
||||||
.commit(
|
.commit(
|
||||||
PartitionId::new(1),
|
partition_id_1,
|
||||||
&[existing_8.clone()],
|
&[existing_8.clone()],
|
||||||
&[],
|
&[],
|
||||||
&[],
|
&[],
|
||||||
|
@ -167,28 +179,28 @@ mod tests {
|
||||||
commit.history(),
|
commit.history(),
|
||||||
vec![
|
vec![
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(1),
|
partition_id: partition_id_1,
|
||||||
delete: vec![existing_1, existing_2],
|
delete: vec![existing_1, existing_2],
|
||||||
upgrade: vec![existing_3.clone(), existing_4.clone()],
|
upgrade: vec![existing_3.clone(), existing_4.clone()],
|
||||||
created: vec![created_1_1, created_1_2],
|
created: vec![created_1_1, created_1_2],
|
||||||
target_level: CompactionLevel::FileNonOverlapped,
|
target_level: CompactionLevel::FileNonOverlapped,
|
||||||
},
|
},
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(2),
|
partition_id: partition_id_2,
|
||||||
delete: vec![existing_3],
|
delete: vec![existing_3],
|
||||||
upgrade: vec![],
|
upgrade: vec![],
|
||||||
created: vec![created_2_1],
|
created: vec![created_2_1],
|
||||||
target_level: CompactionLevel::Final,
|
target_level: CompactionLevel::Final,
|
||||||
},
|
},
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(1),
|
partition_id: partition_id_1,
|
||||||
delete: vec![existing_5, existing_6, existing_7,],
|
delete: vec![existing_5, existing_6, existing_7,],
|
||||||
upgrade: vec![],
|
upgrade: vec![],
|
||||||
created: vec![created_1_3],
|
created: vec![created_1_3],
|
||||||
target_level: CompactionLevel::FileNonOverlapped,
|
target_level: CompactionLevel::FileNonOverlapped,
|
||||||
},
|
},
|
||||||
CommitHistoryEntry {
|
CommitHistoryEntry {
|
||||||
partition_id: PartitionId::new(1),
|
partition_id: partition_id_1,
|
||||||
delete: vec![existing_8],
|
delete: vec![existing_8],
|
||||||
upgrade: vec![],
|
upgrade: vec![],
|
||||||
created: vec![],
|
created: vec![],
|
||||||
|
|
|
@ -4,7 +4,7 @@ use assert_matches::assert_matches;
|
||||||
use compactor_scheduler::{
|
use compactor_scheduler::{
|
||||||
create_scheduler, CompactionJob, LocalSchedulerConfig, Scheduler, SchedulerConfig,
|
create_scheduler, CompactionJob, LocalSchedulerConfig, Scheduler, SchedulerConfig,
|
||||||
};
|
};
|
||||||
use data_types::{ColumnType, ParquetFile, ParquetFileParams, PartitionId};
|
use data_types::{ColumnType, ParquetFile, ParquetFileParams, PartitionId, TransitionPartitionId};
|
||||||
use iox_tests::{ParquetFileBuilder, TestCatalog, TestParquetFileBuilder, TestPartition};
|
use iox_tests::{ParquetFileBuilder, TestCatalog, TestParquetFileBuilder, TestPartition};
|
||||||
|
|
||||||
mod end_job;
|
mod end_job;
|
||||||
|
@ -65,7 +65,7 @@ impl TestLocalScheduler {
|
||||||
|
|
||||||
pub async fn create_params_for_new_parquet_file(&self) -> ParquetFileParams {
|
pub async fn create_params_for_new_parquet_file(&self) -> ParquetFileParams {
|
||||||
ParquetFileBuilder::new(42)
|
ParquetFileBuilder::new(42)
|
||||||
.with_partition(self.get_partition_id().get())
|
.with_partition(self.get_transition_partition_id())
|
||||||
.build()
|
.build()
|
||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
@ -81,4 +81,8 @@ impl TestLocalScheduler {
|
||||||
pub fn get_partition_id(&self) -> PartitionId {
|
pub fn get_partition_id(&self) -> PartitionId {
|
||||||
self.test_partition.partition.id
|
self.test_partition.partition.id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_transition_partition_id(&self) -> TransitionPartitionId {
|
||||||
|
self.test_partition.partition.transition_partition_id()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -202,8 +202,7 @@ impl SimulatedFile {
|
||||||
ParquetFileParams {
|
ParquetFileParams {
|
||||||
namespace_id: partition_info.namespace_id,
|
namespace_id: partition_info.namespace_id,
|
||||||
table_id: partition_info.table.id,
|
table_id: partition_info.table.id,
|
||||||
partition_id: partition_info.partition_id,
|
partition_id: partition_info.transition_partition_id(),
|
||||||
partition_hash_id: partition_info.partition_hash_id.clone(),
|
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
min_time,
|
min_time,
|
||||||
max_time,
|
max_time,
|
||||||
|
|
|
@ -527,10 +527,9 @@ pub struct ParquetFile {
|
||||||
pub namespace_id: NamespaceId,
|
pub namespace_id: NamespaceId,
|
||||||
/// the table
|
/// the table
|
||||||
pub table_id: TableId,
|
pub table_id: TableId,
|
||||||
/// the partition
|
/// the partition identifier
|
||||||
pub partition_id: PartitionId,
|
#[sqlx(flatten)]
|
||||||
/// the partition hash ID, if generated
|
pub partition_id: TransitionPartitionId,
|
||||||
pub partition_hash_id: Option<PartitionHashId>,
|
|
||||||
/// the uuid used in the object store path for this file
|
/// the uuid used in the object store path for this file
|
||||||
pub object_store_id: Uuid,
|
pub object_store_id: Uuid,
|
||||||
/// the min timestamp of data in this file
|
/// the min timestamp of data in this file
|
||||||
|
@ -588,7 +587,6 @@ impl ParquetFile {
|
||||||
namespace_id: params.namespace_id,
|
namespace_id: params.namespace_id,
|
||||||
table_id: params.table_id,
|
table_id: params.table_id,
|
||||||
partition_id: params.partition_id,
|
partition_id: params.partition_id,
|
||||||
partition_hash_id: params.partition_hash_id,
|
|
||||||
object_store_id: params.object_store_id,
|
object_store_id: params.object_store_id,
|
||||||
min_time: params.min_time,
|
min_time: params.min_time,
|
||||||
max_time: params.max_time,
|
max_time: params.max_time,
|
||||||
|
@ -602,21 +600,9 @@ impl ParquetFile {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If this parquet file params will be storing a `PartitionHashId` in the catalog, use that.
|
|
||||||
/// Otherwise, use the database-assigned `PartitionId`.
|
|
||||||
pub fn transition_partition_id(&self) -> TransitionPartitionId {
|
|
||||||
TransitionPartitionId::from((self.partition_id, self.partition_hash_id.as_ref()))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Estimate the memory consumption of this object and its contents
|
/// Estimate the memory consumption of this object and its contents
|
||||||
pub fn size(&self) -> usize {
|
pub fn size(&self) -> usize {
|
||||||
std::mem::size_of_val(self)
|
std::mem::size_of_val(self) + self.partition_id.size() + self.column_set.size()
|
||||||
+ self
|
|
||||||
.partition_hash_id
|
|
||||||
.as_ref()
|
|
||||||
.map(|id| id.size() - std::mem::size_of_val(id))
|
|
||||||
.unwrap_or_default()
|
|
||||||
+ self.column_set.size()
|
|
||||||
- std::mem::size_of_val(&self.column_set)
|
- std::mem::size_of_val(&self.column_set)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -638,10 +624,8 @@ pub struct ParquetFileParams {
|
||||||
pub namespace_id: NamespaceId,
|
pub namespace_id: NamespaceId,
|
||||||
/// the table
|
/// the table
|
||||||
pub table_id: TableId,
|
pub table_id: TableId,
|
||||||
/// the partition
|
/// the partition identifier
|
||||||
pub partition_id: PartitionId,
|
pub partition_id: TransitionPartitionId,
|
||||||
/// the partition hash ID, if generated
|
|
||||||
pub partition_hash_id: Option<PartitionHashId>,
|
|
||||||
/// the uuid used in the object store path for this file
|
/// the uuid used in the object store path for this file
|
||||||
pub object_store_id: Uuid,
|
pub object_store_id: Uuid,
|
||||||
/// the min timestamp of data in this file
|
/// the min timestamp of data in this file
|
||||||
|
@ -662,21 +646,12 @@ pub struct ParquetFileParams {
|
||||||
pub max_l0_created_at: Timestamp,
|
pub max_l0_created_at: Timestamp,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ParquetFileParams {
|
|
||||||
/// If this parquet file params will be storing a `PartitionHashId` in the catalog, use that.
|
|
||||||
/// Otherwise, use the database-assigned `PartitionId`.
|
|
||||||
pub fn transition_partition_id(&self) -> TransitionPartitionId {
|
|
||||||
TransitionPartitionId::from((self.partition_id, self.partition_hash_id.as_ref()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<ParquetFile> for ParquetFileParams {
|
impl From<ParquetFile> for ParquetFileParams {
|
||||||
fn from(value: ParquetFile) -> Self {
|
fn from(value: ParquetFile) -> Self {
|
||||||
Self {
|
Self {
|
||||||
namespace_id: value.namespace_id,
|
namespace_id: value.namespace_id,
|
||||||
table_id: value.table_id,
|
table_id: value.table_id,
|
||||||
partition_id: value.partition_id,
|
partition_id: value.partition_id,
|
||||||
partition_hash_id: value.partition_hash_id,
|
|
||||||
object_store_id: value.object_store_id,
|
object_store_id: value.object_store_id,
|
||||||
min_time: value.min_time,
|
min_time: value.min_time,
|
||||||
max_time: value.max_time,
|
max_time: value.max_time,
|
||||||
|
|
|
@ -31,6 +31,34 @@ impl TransitionPartitionId {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a, R> sqlx::FromRow<'a, R> for TransitionPartitionId
|
||||||
|
where
|
||||||
|
R: sqlx::Row,
|
||||||
|
&'static str: sqlx::ColumnIndex<R>,
|
||||||
|
PartitionId: sqlx::decode::Decode<'a, R::Database>,
|
||||||
|
PartitionId: sqlx::types::Type<R::Database>,
|
||||||
|
Option<PartitionHashId>: sqlx::decode::Decode<'a, R::Database>,
|
||||||
|
Option<PartitionHashId>: sqlx::types::Type<R::Database>,
|
||||||
|
{
|
||||||
|
fn from_row(row: &'a R) -> sqlx::Result<Self> {
|
||||||
|
let partition_id: Option<PartitionId> = row.try_get("partition_id")?;
|
||||||
|
let partition_hash_id: Option<PartitionHashId> = row.try_get("partition_hash_id")?;
|
||||||
|
|
||||||
|
let transition_partition_id = match (partition_id, partition_hash_id) {
|
||||||
|
(_, Some(hash_id)) => TransitionPartitionId::Deterministic(hash_id),
|
||||||
|
(Some(id), _) => TransitionPartitionId::Deprecated(id),
|
||||||
|
(None, None) => {
|
||||||
|
return Err(sqlx::Error::ColumnDecode {
|
||||||
|
index: "partition_id".into(),
|
||||||
|
source: "Both partition_id and partition_hash_id were NULL".into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(transition_partition_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<(PartitionId, Option<&PartitionHashId>)> for TransitionPartitionId {
|
impl From<(PartitionId, Option<&PartitionHashId>)> for TransitionPartitionId {
|
||||||
fn from((partition_id, partition_hash_id): (PartitionId, Option<&PartitionHashId>)) -> Self {
|
fn from((partition_id, partition_hash_id): (PartitionId, Option<&PartitionHashId>)) -> Self {
|
||||||
partition_hash_id
|
partition_hash_id
|
||||||
|
|
|
@ -267,8 +267,7 @@ mod tests {
|
||||||
let parquet_file_params = ParquetFileParams {
|
let parquet_file_params = ParquetFileParams {
|
||||||
namespace_id: namespace.id,
|
namespace_id: namespace.id,
|
||||||
table_id: partition.table_id,
|
table_id: partition.table_id,
|
||||||
partition_id: partition.id,
|
partition_id: partition.transition_partition_id(),
|
||||||
partition_hash_id: partition.hash_id().cloned(),
|
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
min_time: Timestamp::new(1),
|
min_time: Timestamp::new(1),
|
||||||
max_time: Timestamp::new(10),
|
max_time: Timestamp::new(10),
|
||||||
|
@ -298,7 +297,7 @@ mod tests {
|
||||||
let location = ParquetFilePath::new(
|
let location = ParquetFilePath::new(
|
||||||
file_in_catalog.namespace_id,
|
file_in_catalog.namespace_id,
|
||||||
file_in_catalog.table_id,
|
file_in_catalog.table_id,
|
||||||
&file_in_catalog.transition_partition_id(),
|
&file_in_catalog.partition_id.clone(),
|
||||||
file_in_catalog.object_store_id,
|
file_in_catalog.object_store_id,
|
||||||
)
|
)
|
||||||
.object_store_path();
|
.object_store_path();
|
||||||
|
@ -376,7 +375,7 @@ mod tests {
|
||||||
let location = ParquetFilePath::new(
|
let location = ParquetFilePath::new(
|
||||||
file_in_catalog.namespace_id,
|
file_in_catalog.namespace_id,
|
||||||
file_in_catalog.table_id,
|
file_in_catalog.table_id,
|
||||||
&file_in_catalog.transition_partition_id(),
|
&file_in_catalog.partition_id.clone(),
|
||||||
file_in_catalog.object_store_id,
|
file_in_catalog.object_store_id,
|
||||||
)
|
)
|
||||||
.object_store_path();
|
.object_store_path();
|
||||||
|
@ -469,7 +468,7 @@ mod tests {
|
||||||
let loc = ParquetFilePath::new(
|
let loc = ParquetFilePath::new(
|
||||||
file_in_catalog.namespace_id,
|
file_in_catalog.namespace_id,
|
||||||
file_in_catalog.table_id,
|
file_in_catalog.table_id,
|
||||||
&file_in_catalog.transition_partition_id(),
|
&file_in_catalog.partition_id.clone(),
|
||||||
file_in_catalog.object_store_id,
|
file_in_catalog.object_store_id,
|
||||||
)
|
)
|
||||||
.object_store_path();
|
.object_store_path();
|
||||||
|
|
|
@ -52,6 +52,7 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
||||||
let proto_files = vec![
|
let proto_files = vec![
|
||||||
authz_path.join("authz.proto"),
|
authz_path.join("authz.proto"),
|
||||||
catalog_path.join("parquet_file.proto"),
|
catalog_path.join("parquet_file.proto"),
|
||||||
|
catalog_path.join("partition_identifier.proto"),
|
||||||
catalog_path.join("service.proto"),
|
catalog_path.join("service.proto"),
|
||||||
compactor_path.join("service.proto"),
|
compactor_path.join("service.proto"),
|
||||||
delete_path.join("service.proto"),
|
delete_path.join("service.proto"),
|
||||||
|
|
|
@ -2,6 +2,8 @@ syntax = "proto3";
|
||||||
package influxdata.iox.catalog.v1;
|
package influxdata.iox.catalog.v1;
|
||||||
option go_package = "github.com/influxdata/iox/catalog/v1";
|
option go_package = "github.com/influxdata/iox/catalog/v1";
|
||||||
|
|
||||||
|
import "influxdata/iox/catalog/v1/partition_identifier.proto";
|
||||||
|
|
||||||
message ParquetFile {
|
message ParquetFile {
|
||||||
reserved 7;
|
reserved 7;
|
||||||
reserved "min_sequence_number";
|
reserved "min_sequence_number";
|
||||||
|
@ -11,6 +13,8 @@ message ParquetFile {
|
||||||
reserved "shard_id";
|
reserved "shard_id";
|
||||||
reserved 8;
|
reserved 8;
|
||||||
reserved "max_sequence_number";
|
reserved "max_sequence_number";
|
||||||
|
reserved 5;
|
||||||
|
reserved "partition_id";
|
||||||
|
|
||||||
// the id of the file in the catalog
|
// the id of the file in the catalog
|
||||||
int64 id = 1;
|
int64 id = 1;
|
||||||
|
@ -18,8 +22,9 @@ message ParquetFile {
|
||||||
int64 namespace_id = 3;
|
int64 namespace_id = 3;
|
||||||
// the table id
|
// the table id
|
||||||
int64 table_id = 4;
|
int64 table_id = 4;
|
||||||
// the partition id
|
|
||||||
int64 partition_id = 5;
|
PartitionIdentifier partition_identifier = 19;
|
||||||
|
|
||||||
// the object store uuid
|
// the object store uuid
|
||||||
string object_store_id = 6;
|
string object_store_id = 6;
|
||||||
// the min timestamp of data in this file
|
// the min timestamp of data in this file
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
syntax = "proto3";
|
||||||
|
package influxdata.iox.catalog.v1;
|
||||||
|
option go_package = "github.com/influxdata/iox/catalog/v1";
|
||||||
|
|
||||||
|
message PartitionIdentifier {
|
||||||
|
// Either the catalog-assigned partition ID or the deterministic identifier created from the
|
||||||
|
// table ID and partition key.
|
||||||
|
oneof id {
|
||||||
|
int64 catalog_id = 1;
|
||||||
|
bytes hash_id = 2;
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,6 +3,7 @@ package influxdata.iox.catalog.v1;
|
||||||
option go_package = "github.com/influxdata/iox/catalog/v1";
|
option go_package = "github.com/influxdata/iox/catalog/v1";
|
||||||
|
|
||||||
import "influxdata/iox/catalog/v1/parquet_file.proto";
|
import "influxdata/iox/catalog/v1/parquet_file.proto";
|
||||||
|
import "influxdata/iox/catalog/v1/partition_identifier.proto";
|
||||||
|
|
||||||
service CatalogService {
|
service CatalogService {
|
||||||
// Get the parquet_file catalog records in the given partition
|
// Get the parquet_file catalog records in the given partition
|
||||||
|
@ -19,8 +20,11 @@ service CatalogService {
|
||||||
}
|
}
|
||||||
|
|
||||||
message GetParquetFilesByPartitionIdRequest {
|
message GetParquetFilesByPartitionIdRequest {
|
||||||
// the partition id
|
// Was the catalog-assigned partition ID.
|
||||||
int64 partition_id = 1;
|
reserved 1;
|
||||||
|
reserved "partition_id";
|
||||||
|
|
||||||
|
PartitionIdentifier partition_identifier = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GetParquetFilesByPartitionIdResponse {
|
message GetParquetFilesByPartitionIdResponse {
|
||||||
|
@ -35,15 +39,17 @@ message Partition {
|
||||||
reserved "sequencer_id";
|
reserved "sequencer_id";
|
||||||
reserved 7;
|
reserved 7;
|
||||||
reserved "shard_id";
|
reserved "shard_id";
|
||||||
|
reserved 1;
|
||||||
|
reserved "id";
|
||||||
|
|
||||||
// the partition id
|
|
||||||
int64 id = 1;
|
|
||||||
// the table id the partition is in
|
// the table id the partition is in
|
||||||
int64 table_id = 3;
|
int64 table_id = 3;
|
||||||
// the partition key
|
// the partition key
|
||||||
string key = 4;
|
string key = 4;
|
||||||
// the sort key for data in parquet files in the partition
|
// the sort key for data in parquet files in the partition
|
||||||
repeated string array_sort_key = 6;
|
repeated string array_sort_key = 6;
|
||||||
|
|
||||||
|
PartitionIdentifier identifier = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GetPartitionsByTableIdRequest {
|
message GetPartitionsByTableIdRequest {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use tracing::warn;
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
// Re-export the bytes type to ensure upstream users of this crate are
|
// Re-export the bytes type to ensure upstream users of this crate are
|
||||||
// interacting with the same type.
|
// interacting with the same type.
|
||||||
|
@ -32,5 +32,7 @@ pub struct NopDispatcher;
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl Dispatcher for NopDispatcher {
|
impl Dispatcher for NopDispatcher {
|
||||||
async fn dispatch(&self, _payload: crate::Bytes) {}
|
async fn dispatch(&self, _payload: crate::Bytes) {
|
||||||
|
debug!("received no-op message payload");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
|
use data_types::{PartitionHashId, PartitionId, TransitionPartitionId};
|
||||||
use futures_util::TryStreamExt;
|
use futures_util::TryStreamExt;
|
||||||
use influxdb_iox_client::{
|
use influxdb_iox_client::{
|
||||||
catalog::{self, generated_types::ParquetFile},
|
catalog::{
|
||||||
|
self,
|
||||||
|
generated_types::{partition_identifier, ParquetFile, PartitionIdentifier},
|
||||||
|
},
|
||||||
connection::Connection,
|
connection::Connection,
|
||||||
store,
|
store,
|
||||||
};
|
};
|
||||||
use observability_deps::tracing::{debug, info};
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::{
|
use tokio::{
|
||||||
|
@ -35,10 +38,6 @@ type Result<T, E = ExportError> = std::result::Result<T, E>;
|
||||||
pub struct RemoteExporter {
|
pub struct RemoteExporter {
|
||||||
catalog_client: catalog::Client,
|
catalog_client: catalog::Client,
|
||||||
store_client: store::Client,
|
store_client: store::Client,
|
||||||
|
|
||||||
/// Optional partition filter. If `Some(partition_id)`, only these
|
|
||||||
/// files with that `partition_id` are downloaded.
|
|
||||||
partition_filter: Option<i64>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RemoteExporter {
|
impl RemoteExporter {
|
||||||
|
@ -46,19 +45,9 @@ impl RemoteExporter {
|
||||||
Self {
|
Self {
|
||||||
catalog_client: catalog::Client::new(connection.clone()),
|
catalog_client: catalog::Client::new(connection.clone()),
|
||||||
store_client: store::Client::new(connection),
|
store_client: store::Client::new(connection),
|
||||||
partition_filter: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Specify that only files and metadata for the specific
|
|
||||||
/// partition id should be exported.
|
|
||||||
pub fn with_partition_filter(mut self, partition_id: i64) -> Self {
|
|
||||||
info!(partition_id, "Filtering by partition");
|
|
||||||
|
|
||||||
self.partition_filter = Some(partition_id);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exports all data and metadata for `table_name` in
|
/// Exports all data and metadata for `table_name` in
|
||||||
/// `namespace` to local files.
|
/// `namespace` to local files.
|
||||||
///
|
///
|
||||||
|
@ -95,39 +84,14 @@ impl RemoteExporter {
|
||||||
let indexed_parquet_file_metadata = parquet_files.into_iter().enumerate();
|
let indexed_parquet_file_metadata = parquet_files.into_iter().enumerate();
|
||||||
|
|
||||||
for (index, parquet_file) in indexed_parquet_file_metadata {
|
for (index, parquet_file) in indexed_parquet_file_metadata {
|
||||||
if self.should_export(parquet_file.partition_id) {
|
self.export_parquet_file(&output_directory, index, num_parquet_files, &parquet_file)
|
||||||
self.export_parquet_file(
|
|
||||||
&output_directory,
|
|
||||||
index,
|
|
||||||
num_parquet_files,
|
|
||||||
&parquet_file,
|
|
||||||
)
|
|
||||||
.await?;
|
.await?;
|
||||||
} else {
|
|
||||||
debug!(
|
|
||||||
"skipping file {} of {num_parquet_files} ({} does not match request)",
|
|
||||||
index + 1,
|
|
||||||
parquet_file.partition_id
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
println!("Done.");
|
println!("Done.");
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return true if this partition should be exported
|
|
||||||
fn should_export(&self, partition_id: i64) -> bool {
|
|
||||||
self.partition_filter
|
|
||||||
.map(|partition_filter| {
|
|
||||||
// if a partition filter was specified, only export
|
|
||||||
// the file if the partition matches
|
|
||||||
partition_filter == partition_id
|
|
||||||
})
|
|
||||||
// export files if there is no partition
|
|
||||||
.unwrap_or(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exports table and partition information for the specified
|
/// Exports table and partition information for the specified
|
||||||
/// table. Overwrites existing files, if any, to ensure it has the
|
/// table. Overwrites existing files, if any, to ensure it has the
|
||||||
/// latest catalog information.
|
/// latest catalog information.
|
||||||
|
@ -158,14 +122,12 @@ impl RemoteExporter {
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
for partition in partitions {
|
for partition in partitions {
|
||||||
let partition_id = partition.id;
|
let partition_id = to_partition_id(partition.identifier.as_ref());
|
||||||
if self.should_export(partition_id) {
|
|
||||||
let partition_json = serde_json::to_string_pretty(&partition)?;
|
let partition_json = serde_json::to_string_pretty(&partition)?;
|
||||||
let filename = format!("partition.{partition_id}.json");
|
let filename = format!("partition.{partition_id}.json");
|
||||||
let file_path = output_directory.join(&filename);
|
let file_path = output_directory.join(&filename);
|
||||||
write_string_to_file(&partition_json, &file_path).await?;
|
write_string_to_file(&partition_json, &file_path).await?;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -183,9 +145,10 @@ impl RemoteExporter {
|
||||||
parquet_file: &ParquetFile,
|
parquet_file: &ParquetFile,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let uuid = &parquet_file.object_store_id;
|
let uuid = &parquet_file.object_store_id;
|
||||||
let partition_id = parquet_file.partition_id;
|
|
||||||
let file_size_bytes = parquet_file.file_size_bytes as u64;
|
let file_size_bytes = parquet_file.file_size_bytes as u64;
|
||||||
|
|
||||||
|
let partition_id = to_partition_id(parquet_file.partition_identifier.as_ref());
|
||||||
|
|
||||||
// copy out the metadata as pbjson encoded data always (to
|
// copy out the metadata as pbjson encoded data always (to
|
||||||
// ensure we have the most up to date version)
|
// ensure we have the most up to date version)
|
||||||
{
|
{
|
||||||
|
@ -230,6 +193,21 @@ impl RemoteExporter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn to_partition_id(partition_identifier: Option<&PartitionIdentifier>) -> TransitionPartitionId {
|
||||||
|
match partition_identifier
|
||||||
|
.and_then(|pi| pi.id.as_ref())
|
||||||
|
.expect("Catalog service should send the partition identifier")
|
||||||
|
{
|
||||||
|
partition_identifier::Id::HashId(bytes) => TransitionPartitionId::Deterministic(
|
||||||
|
PartitionHashId::try_from(&bytes[..])
|
||||||
|
.expect("Catalog service should send valid hash_id bytes"),
|
||||||
|
),
|
||||||
|
partition_identifier::Id::CatalogId(id) => {
|
||||||
|
TransitionPartitionId::Deprecated(PartitionId::new(*id))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// writes the contents of a string to a file, overwriting the previous contents, if any
|
/// writes the contents of a string to a file, overwriting the previous contents, if any
|
||||||
async fn write_string_to_file(contents: &str, path: &Path) -> Result<()> {
|
async fn write_string_to_file(contents: &str, path: &Path) -> Result<()> {
|
||||||
let mut file = OpenOptions::new()
|
let mut file = OpenOptions::new()
|
||||||
|
|
|
@ -7,7 +7,7 @@ use data_types::{
|
||||||
NamespacePartitionTemplateOverride, TablePartitionTemplateOverride, PARTITION_BY_DAY_PROTO,
|
NamespacePartitionTemplateOverride, TablePartitionTemplateOverride, PARTITION_BY_DAY_PROTO,
|
||||||
},
|
},
|
||||||
ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceName, NamespaceNameError,
|
ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceName, NamespaceNameError,
|
||||||
ParquetFileParams, Partition, PartitionHashId, Statistics, Table, TableId, Timestamp,
|
ParquetFileParams, Partition, Statistics, Table, TableId, Timestamp,
|
||||||
};
|
};
|
||||||
use generated_types::influxdata::iox::catalog::v1 as proto;
|
use generated_types::influxdata::iox::catalog::v1 as proto;
|
||||||
// ParquetFile as ProtoParquetFile, Partition as ProtoPartition,
|
// ParquetFile as ProtoParquetFile, Partition as ProtoPartition,
|
||||||
|
@ -567,9 +567,6 @@ impl RemoteImporter {
|
||||||
// need to make columns in the target catalog
|
// need to make columns in the target catalog
|
||||||
let column_set = insert_columns(table.id, decoded_iox_parquet_metadata, repos).await?;
|
let column_set = insert_columns(table.id, decoded_iox_parquet_metadata, repos).await?;
|
||||||
|
|
||||||
// Create the the partition_hash_id
|
|
||||||
let partition_hash_id = Some(PartitionHashId::new(table.id, &partition.partition_key));
|
|
||||||
|
|
||||||
let params = if let Some(proto_parquet_file) = &parquet_metadata {
|
let params = if let Some(proto_parquet_file) = &parquet_metadata {
|
||||||
let compaction_level = proto_parquet_file
|
let compaction_level = proto_parquet_file
|
||||||
.compaction_level
|
.compaction_level
|
||||||
|
@ -579,8 +576,7 @@ impl RemoteImporter {
|
||||||
ParquetFileParams {
|
ParquetFileParams {
|
||||||
namespace_id: namespace.id,
|
namespace_id: namespace.id,
|
||||||
table_id: table.id,
|
table_id: table.id,
|
||||||
partition_hash_id,
|
partition_id: partition.transition_partition_id(),
|
||||||
partition_id: partition.id,
|
|
||||||
object_store_id,
|
object_store_id,
|
||||||
min_time: Timestamp::new(proto_parquet_file.min_time),
|
min_time: Timestamp::new(proto_parquet_file.min_time),
|
||||||
max_time: Timestamp::new(proto_parquet_file.max_time),
|
max_time: Timestamp::new(proto_parquet_file.max_time),
|
||||||
|
@ -599,8 +595,7 @@ impl RemoteImporter {
|
||||||
ParquetFileParams {
|
ParquetFileParams {
|
||||||
namespace_id: namespace.id,
|
namespace_id: namespace.id,
|
||||||
table_id: table.id,
|
table_id: table.id,
|
||||||
partition_hash_id,
|
partition_id: partition.transition_partition_id(),
|
||||||
partition_id: partition.id,
|
|
||||||
object_store_id,
|
object_store_id,
|
||||||
min_time,
|
min_time,
|
||||||
max_time,
|
max_time,
|
||||||
|
|
|
@ -67,7 +67,7 @@ libc = { version = "0.2" }
|
||||||
num_cpus = "1.16.0"
|
num_cpus = "1.16.0"
|
||||||
once_cell = { version = "1.18", features = ["parking_lot"] }
|
once_cell = { version = "1.18", features = ["parking_lot"] }
|
||||||
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
|
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
|
||||||
serde = "1.0.177"
|
serde = "1.0.179"
|
||||||
serde_json = "1.0.104"
|
serde_json = "1.0.104"
|
||||||
snafu = "0.7"
|
snafu = "0.7"
|
||||||
tempfile = "3.7.0"
|
tempfile = "3.7.0"
|
||||||
|
|
|
@ -55,10 +55,6 @@ struct GetTable {
|
||||||
#[clap(action)]
|
#[clap(action)]
|
||||||
table: String,
|
table: String,
|
||||||
|
|
||||||
/// If specified, only files from the specified partitions are downloaded
|
|
||||||
#[clap(action, short, long)]
|
|
||||||
partition_id: Option<i64>,
|
|
||||||
|
|
||||||
/// The output directory to use. If not specified, files will be placed in a directory named
|
/// The output directory to use. If not specified, files will be placed in a directory named
|
||||||
/// after the table in the current working directory.
|
/// after the table in the current working directory.
|
||||||
#[clap(action, short)]
|
#[clap(action, short)]
|
||||||
|
@ -91,13 +87,9 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
||||||
Command::GetTable(GetTable {
|
Command::GetTable(GetTable {
|
||||||
namespace,
|
namespace,
|
||||||
table,
|
table,
|
||||||
partition_id,
|
|
||||||
output_directory,
|
output_directory,
|
||||||
}) => {
|
}) => {
|
||||||
let mut exporter = RemoteExporter::new(connection);
|
let mut exporter = RemoteExporter::new(connection);
|
||||||
if let Some(partition_id) = partition_id {
|
|
||||||
exporter = exporter.with_partition_filter(partition_id);
|
|
||||||
}
|
|
||||||
Ok(exporter
|
Ok(exporter
|
||||||
.export_table(output_directory, namespace, table)
|
.export_table(output_directory, namespace, table)
|
||||||
.await?)
|
.await?)
|
||||||
|
|
|
@ -7,6 +7,7 @@ use clap_blocks::{
|
||||||
catalog_dsn::CatalogDsnConfig,
|
catalog_dsn::CatalogDsnConfig,
|
||||||
compactor::CompactorConfig,
|
compactor::CompactorConfig,
|
||||||
compactor_scheduler::CompactorSchedulerConfig,
|
compactor_scheduler::CompactorSchedulerConfig,
|
||||||
|
gossip::GossipConfig,
|
||||||
ingester::IngesterConfig,
|
ingester::IngesterConfig,
|
||||||
ingester_address::IngesterAddress,
|
ingester_address::IngesterAddress,
|
||||||
object_store::{make_object_store, ObjectStoreConfig},
|
object_store::{make_object_store, ObjectStoreConfig},
|
||||||
|
@ -476,6 +477,7 @@ impl Config {
|
||||||
persist_queue_depth,
|
persist_queue_depth,
|
||||||
persist_hot_partition_cost,
|
persist_hot_partition_cost,
|
||||||
rpc_write_max_incoming_bytes: 1024 * 1024 * 1024, // 1GiB
|
rpc_write_max_incoming_bytes: 1024 * 1024 * 1024, // 1GiB
|
||||||
|
gossip_config: GossipConfig::disabled(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let router_config = RouterConfig {
|
let router_config = RouterConfig {
|
||||||
|
@ -489,6 +491,7 @@ impl Config {
|
||||||
rpc_write_replicas: 1.try_into().unwrap(),
|
rpc_write_replicas: 1.try_into().unwrap(),
|
||||||
rpc_write_max_outgoing_bytes: ingester_config.rpc_write_max_incoming_bytes,
|
rpc_write_max_outgoing_bytes: ingester_config.rpc_write_max_incoming_bytes,
|
||||||
rpc_write_health_error_window_seconds: Duration::from_secs(5),
|
rpc_write_health_error_window_seconds: Duration::from_secs(5),
|
||||||
|
gossip_config: GossipConfig::disabled(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// create a CompactorConfig for the all in one server based on
|
// create a CompactorConfig for the all in one server based on
|
||||||
|
@ -637,6 +640,7 @@ pub async fn command(config: Config) -> Result<()> {
|
||||||
Arc::clone(&catalog),
|
Arc::clone(&catalog),
|
||||||
Arc::clone(&object_store),
|
Arc::clone(&object_store),
|
||||||
&router_config,
|
&router_config,
|
||||||
|
&GossipConfig::disabled(),
|
||||||
router_run_config
|
router_run_config
|
||||||
.tracing_config()
|
.tracing_config()
|
||||||
.traces_jaeger_trace_context_header_name
|
.traces_jaeger_trace_context_header_name
|
||||||
|
|
|
@ -98,6 +98,7 @@ pub async fn command(config: Config) -> Result<()> {
|
||||||
catalog,
|
catalog,
|
||||||
object_store,
|
object_store,
|
||||||
&config.router_config,
|
&config.router_config,
|
||||||
|
&config.router_config.gossip_config,
|
||||||
config
|
config
|
||||||
.run_config
|
.run_config
|
||||||
.tracing_config()
|
.tracing_config()
|
||||||
|
|
|
@ -157,8 +157,10 @@ async fn sharded_compactor_0_always_compacts_partition_1() {
|
||||||
.assert()
|
.assert()
|
||||||
.success()
|
.success()
|
||||||
.stdout(
|
.stdout(
|
||||||
// Important parts are the expected partition ID
|
// Important parts are the expected partition identifier
|
||||||
predicate::str::contains(r#""partitionId": "1","#)
|
predicate::str::contains(
|
||||||
|
r#""hashId": "uGKn6bMp7mpBjN4ZEZjq6xUSdT8ZuHqB3vKubD0O0jc=""#,
|
||||||
|
)
|
||||||
// and compaction level
|
// and compaction level
|
||||||
.and(predicate::str::contains(r#""compactionLevel": 1"#)),
|
.and(predicate::str::contains(r#""compactionLevel": 1"#)),
|
||||||
);
|
);
|
||||||
|
@ -240,8 +242,10 @@ async fn sharded_compactor_1_never_compacts_partition_1() {
|
||||||
.assert()
|
.assert()
|
||||||
.success()
|
.success()
|
||||||
.stdout(
|
.stdout(
|
||||||
// Important parts are the expected partition ID
|
// Important parts are the expected partition identifier
|
||||||
predicate::str::contains(r#""partitionId": "1","#)
|
predicate::str::contains(
|
||||||
|
r#""hashId": "uGKn6bMp7mpBjN4ZEZjq6xUSdT8ZuHqB3vKubD0O0jc=""#,
|
||||||
|
)
|
||||||
// and compaction level is 0 so it's not returned
|
// and compaction level is 0 so it's not returned
|
||||||
.and(predicate::str::contains("compactionLevel").not()),
|
.and(predicate::str::contains("compactionLevel").not()),
|
||||||
);
|
);
|
||||||
|
|
|
@ -280,10 +280,9 @@ async fn remote_partition_and_get_from_store_and_pull() {
|
||||||
.arg("1")
|
.arg("1")
|
||||||
.assert()
|
.assert()
|
||||||
.success()
|
.success()
|
||||||
.stdout(
|
.stdout(predicate::str::contains(
|
||||||
predicate::str::contains(r#""id": "1""#)
|
r#""hashId": "uGKn6bMp7mpBjN4ZEZjq6xUSdT8ZuHqB3vKubD0O0jc=""#,
|
||||||
.and(predicate::str::contains(r#""partitionId": "1","#)),
|
))
|
||||||
)
|
|
||||||
.get_output()
|
.get_output()
|
||||||
.stdout
|
.stdout
|
||||||
.clone();
|
.clone();
|
||||||
|
|
|
@ -29,9 +29,15 @@ impl Client {
|
||||||
&mut self,
|
&mut self,
|
||||||
partition_id: i64,
|
partition_id: i64,
|
||||||
) -> Result<Vec<ParquetFile>, Error> {
|
) -> Result<Vec<ParquetFile>, Error> {
|
||||||
|
let partition_identifier = PartitionIdentifier {
|
||||||
|
id: Some(partition_identifier::Id::CatalogId(partition_id)),
|
||||||
|
};
|
||||||
|
|
||||||
let response = self
|
let response = self
|
||||||
.inner
|
.inner
|
||||||
.get_parquet_files_by_partition_id(GetParquetFilesByPartitionIdRequest { partition_id })
|
.get_parquet_files_by_partition_id(GetParquetFilesByPartitionIdRequest {
|
||||||
|
partition_identifier: Some(partition_identifier),
|
||||||
|
})
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
Ok(response.into_inner().parquet_files)
|
Ok(response.into_inner().parquet_files)
|
||||||
|
|
|
@ -48,6 +48,7 @@ trace = { version = "0.1.0", path = "../trace" }
|
||||||
uuid = "1.4.1"
|
uuid = "1.4.1"
|
||||||
wal = { version = "0.1.0", path = "../wal" }
|
wal = { version = "0.1.0", path = "../wal" }
|
||||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||||
|
gossip = { version = "0.1.0", path = "../gossip" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_matches = "1.5.0"
|
assert_matches = "1.5.0"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
//! Partition level data buffer structures.
|
//! Partition level data buffer structures.
|
||||||
|
|
||||||
use std::{collections::VecDeque, sync::Arc};
|
use std::sync::Arc;
|
||||||
|
|
||||||
use data_types::{
|
use data_types::{
|
||||||
sequence_number_set::SequenceNumberSet, NamespaceId, PartitionHashId, PartitionId,
|
sequence_number_set::SequenceNumberSet, NamespaceId, PartitionHashId, PartitionId,
|
||||||
|
@ -8,11 +8,12 @@ use data_types::{
|
||||||
};
|
};
|
||||||
use mutable_batch::MutableBatch;
|
use mutable_batch::MutableBatch;
|
||||||
use observability_deps::tracing::*;
|
use observability_deps::tracing::*;
|
||||||
use schema::sort::SortKey;
|
use schema::{merge::SchemaMerger, sort::SortKey, Schema};
|
||||||
|
|
||||||
use self::{
|
use self::{
|
||||||
buffer::{traits::Queryable, BufferState, DataBuffer, Persisting},
|
buffer::{traits::Queryable, DataBuffer},
|
||||||
persisting::{BatchIdent, PersistingData},
|
persisting::{BatchIdent, PersistingData},
|
||||||
|
persisting_list::PersistingList,
|
||||||
};
|
};
|
||||||
use super::{namespace::NamespaceName, table::TableMetadata};
|
use super::{namespace::NamespaceName, table::TableMetadata};
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -21,6 +22,7 @@ use crate::{
|
||||||
|
|
||||||
mod buffer;
|
mod buffer;
|
||||||
pub(crate) mod persisting;
|
pub(crate) mod persisting;
|
||||||
|
mod persisting_list;
|
||||||
pub(crate) mod resolver;
|
pub(crate) mod resolver;
|
||||||
|
|
||||||
/// The load state of the [`SortKey`] for a given partition.
|
/// The load state of the [`SortKey`] for a given partition.
|
||||||
|
@ -89,7 +91,7 @@ pub struct PartitionData {
|
||||||
///
|
///
|
||||||
/// The [`BatchIdent`] is a generational counter that is used to tag each
|
/// The [`BatchIdent`] is a generational counter that is used to tag each
|
||||||
/// persisting with a unique, opaque identifier.
|
/// persisting with a unique, opaque identifier.
|
||||||
persisting: VecDeque<(BatchIdent, BufferState<Persisting>)>,
|
persisting: PersistingList,
|
||||||
|
|
||||||
/// The number of persist operations started over the lifetime of this
|
/// The number of persist operations started over the lifetime of this
|
||||||
/// [`PartitionData`].
|
/// [`PartitionData`].
|
||||||
|
@ -123,7 +125,7 @@ impl PartitionData {
|
||||||
table_id,
|
table_id,
|
||||||
table,
|
table,
|
||||||
buffer: DataBuffer::default(),
|
buffer: DataBuffer::default(),
|
||||||
persisting: VecDeque::with_capacity(1),
|
persisting: PersistingList::default(),
|
||||||
started_persistence_count: BatchIdent::default(),
|
started_persistence_count: BatchIdent::default(),
|
||||||
completed_persistence_count: 0,
|
completed_persistence_count: 0,
|
||||||
}
|
}
|
||||||
|
@ -169,7 +171,7 @@ impl PartitionData {
|
||||||
/// persisting batches, plus 1 for the "hot" buffer. Reading the row count
|
/// persisting batches, plus 1 for the "hot" buffer. Reading the row count
|
||||||
/// of each batch is `O(1)`. This method is expected to be fast.
|
/// of each batch is `O(1)`. This method is expected to be fast.
|
||||||
pub(crate) fn rows(&self) -> usize {
|
pub(crate) fn rows(&self) -> usize {
|
||||||
self.persisting.iter().map(|(_, v)| v.rows()).sum::<usize>() + self.buffer.rows()
|
self.persisting.rows() + self.buffer.rows()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the timestamp min/max values for the data contained within this
|
/// Return the timestamp min/max values for the data contained within this
|
||||||
|
@ -188,11 +190,8 @@ impl PartitionData {
|
||||||
/// statistics for each batch is `O(1)`. This method is expected to be fast.
|
/// statistics for each batch is `O(1)`. This method is expected to be fast.
|
||||||
pub(crate) fn timestamp_stats(&self) -> Option<TimestampMinMax> {
|
pub(crate) fn timestamp_stats(&self) -> Option<TimestampMinMax> {
|
||||||
self.persisting
|
self.persisting
|
||||||
.iter()
|
.timestamp_stats()
|
||||||
.map(|(_, v)| {
|
.into_iter()
|
||||||
v.timestamp_stats()
|
|
||||||
.expect("persisting batches must be non-empty")
|
|
||||||
})
|
|
||||||
.chain(self.buffer.timestamp_stats())
|
.chain(self.buffer.timestamp_stats())
|
||||||
.reduce(|acc, v| TimestampMinMax {
|
.reduce(|acc, v| TimestampMinMax {
|
||||||
min: acc.min.min(v.min),
|
min: acc.min.min(v.min),
|
||||||
|
@ -200,6 +199,30 @@ impl PartitionData {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the schema of the data currently buffered within this
|
||||||
|
/// [`PartitionData`].
|
||||||
|
///
|
||||||
|
/// This schema is not additive - it is the union of the individual schema
|
||||||
|
/// batches currently buffered and as such columns are removed as the
|
||||||
|
/// individual batches containing those columns are persisted and dropped.
|
||||||
|
pub(crate) fn schema(&self) -> Option<Schema> {
|
||||||
|
if self.persisting.is_empty() && self.buffer.rows() == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(
|
||||||
|
self.persisting
|
||||||
|
.schema()
|
||||||
|
.into_iter()
|
||||||
|
.cloned()
|
||||||
|
.chain(self.buffer.schema())
|
||||||
|
.fold(SchemaMerger::new(), |acc, v| {
|
||||||
|
acc.merge(&v).expect("schemas are incompatible")
|
||||||
|
})
|
||||||
|
.build(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// Return all data for this partition, ordered by the calls to
|
/// Return all data for this partition, ordered by the calls to
|
||||||
/// [`PartitionData::buffer_write()`].
|
/// [`PartitionData::buffer_write()`].
|
||||||
pub(crate) fn get_query_data(&mut self, projection: &OwnedProjection) -> Option<QueryAdaptor> {
|
pub(crate) fn get_query_data(&mut self, projection: &OwnedProjection) -> Option<QueryAdaptor> {
|
||||||
|
@ -213,8 +236,7 @@ impl PartitionData {
|
||||||
// existing rows materialise to the correct output.
|
// existing rows materialise to the correct output.
|
||||||
let data = self
|
let data = self
|
||||||
.persisting
|
.persisting
|
||||||
.iter()
|
.get_query_data(projection)
|
||||||
.flat_map(|(_, b)| b.get_query_data(projection))
|
|
||||||
.chain(buffered_data)
|
.chain(buffered_data)
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
@ -287,7 +309,7 @@ impl PartitionData {
|
||||||
// Increment the "started persist" counter.
|
// Increment the "started persist" counter.
|
||||||
//
|
//
|
||||||
// This is used to cheaply identify batches given to the
|
// This is used to cheaply identify batches given to the
|
||||||
// mark_persisted() call.
|
// mark_persisted() call and ensure monotonicity.
|
||||||
let batch_ident = self.started_persistence_count.next();
|
let batch_ident = self.started_persistence_count.next();
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
|
@ -310,10 +332,9 @@ impl PartitionData {
|
||||||
batch_ident,
|
batch_ident,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Push the new buffer to the back of the persisting queue, so that
|
// Push the buffer into the persisting list (which maintains batch
|
||||||
// iterating from back to front during queries iterates over writes from
|
// order).
|
||||||
// oldest to newest.
|
self.persisting.push(batch_ident, fsm);
|
||||||
self.persisting.push_back((batch_ident, fsm));
|
|
||||||
|
|
||||||
Some(data)
|
Some(data)
|
||||||
}
|
}
|
||||||
|
@ -328,22 +349,11 @@ impl PartitionData {
|
||||||
/// This method panics if [`Self`] is not marked as undergoing a persist
|
/// This method panics if [`Self`] is not marked as undergoing a persist
|
||||||
/// operation, or `batch` is not currently being persisted.
|
/// operation, or `batch` is not currently being persisted.
|
||||||
pub(crate) fn mark_persisted(&mut self, batch: PersistingData) -> SequenceNumberSet {
|
pub(crate) fn mark_persisted(&mut self, batch: PersistingData) -> SequenceNumberSet {
|
||||||
// Find the batch in the persisting queue.
|
let fsm = self.persisting.remove(batch.batch_ident());
|
||||||
let idx = self
|
|
||||||
.persisting
|
|
||||||
.iter()
|
|
||||||
.position(|(old, _)| *old == batch.batch_ident())
|
|
||||||
.expect("no currently persisting batch");
|
|
||||||
|
|
||||||
// Remove the batch from the queue, preserving the order of the queue
|
|
||||||
// for batch iteration during queries.
|
|
||||||
let (old_ident, fsm) = self.persisting.remove(idx).unwrap();
|
|
||||||
assert_eq!(old_ident, batch.batch_ident());
|
|
||||||
|
|
||||||
self.completed_persistence_count += 1;
|
self.completed_persistence_count += 1;
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
batch_ident = %old_ident,
|
|
||||||
persistence_count = %self.completed_persistence_count,
|
persistence_count = %self.completed_persistence_count,
|
||||||
namespace_id = %self.namespace_id,
|
namespace_id = %self.namespace_id,
|
||||||
table_id = %self.table_id,
|
table_id = %self.table_id,
|
||||||
|
|
|
@ -7,7 +7,7 @@ use schema::Projection;
|
||||||
///
|
///
|
||||||
/// A [`Buffer`] can contain no writes.
|
/// A [`Buffer`] can contain no writes.
|
||||||
///
|
///
|
||||||
/// [`BufferState`]: super::super::BufferState
|
/// [`BufferState`]: super::BufferState
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub(super) struct Buffer {
|
pub(super) struct Buffer {
|
||||||
buffer: Option<MutableBatch>,
|
buffer: Option<MutableBatch>,
|
||||||
|
|
|
@ -77,7 +77,7 @@ pub(crate) struct BufferState<T> {
|
||||||
|
|
||||||
impl BufferState<Buffering> {
|
impl BufferState<Buffering> {
|
||||||
/// Initialise a new buffer state machine.
|
/// Initialise a new buffer state machine.
|
||||||
pub(super) fn new() -> Self {
|
pub(crate) fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
state: Buffering::default(),
|
state: Buffering::default(),
|
||||||
sequence_numbers: SequenceNumberSet::default(),
|
sequence_numbers: SequenceNumberSet::default(),
|
||||||
|
|
|
@ -2,14 +2,18 @@ use std::fmt::Display;
|
||||||
|
|
||||||
use crate::query_adaptor::QueryAdaptor;
|
use crate::query_adaptor::QueryAdaptor;
|
||||||
|
|
||||||
/// An opaque generational identifier of a buffer in a [`PartitionData`].
|
/// An opaque, monotonic generational identifier of a buffer in a
|
||||||
|
/// [`PartitionData`].
|
||||||
|
///
|
||||||
|
/// A [`BatchIdent`] is strictly greater than all those that were obtained
|
||||||
|
/// before it.
|
||||||
///
|
///
|
||||||
/// [`PartitionData`]: super::PartitionData
|
/// [`PartitionData`]: super::PartitionData
|
||||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd)]
|
||||||
pub(super) struct BatchIdent(u64);
|
pub(crate) struct BatchIdent(u64);
|
||||||
|
|
||||||
impl BatchIdent {
|
impl BatchIdent {
|
||||||
/// Return the next unique value.
|
/// Return the next unique monotonic value.
|
||||||
pub(super) fn next(&mut self) -> Self {
|
pub(super) fn next(&mut self) -> Self {
|
||||||
self.0 += 1;
|
self.0 += 1;
|
||||||
Self(self.0)
|
Self(self.0)
|
||||||
|
|
|
@ -0,0 +1,467 @@
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
|
use arrow::record_batch::RecordBatch;
|
||||||
|
use data_types::TimestampMinMax;
|
||||||
|
use schema::{merge::SchemaMerger, Schema};
|
||||||
|
|
||||||
|
use crate::query::projection::OwnedProjection;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
buffer::{traits::Queryable, BufferState, Persisting},
|
||||||
|
persisting::BatchIdent,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An ordered list of buffered, persisting data as [`BufferState<Persisting>`]
|
||||||
|
/// FSM instances.
|
||||||
|
///
|
||||||
|
/// This type maintains a cache of row count & timestamp min/max statistics
|
||||||
|
/// across all persisting batches, and performs incremental computation at
|
||||||
|
/// persist time, moving it out of the query execution path.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct PersistingList {
|
||||||
|
/// The currently persisting [`DataBuffer`] instances, if any.
|
||||||
|
///
|
||||||
|
/// This queue is ordered from newest at the head, to oldest at the tail -
|
||||||
|
/// forward iteration order matches write order.
|
||||||
|
///
|
||||||
|
/// The [`BatchIdent`] is a generational counter that is used to tag each
|
||||||
|
/// persisting with a unique, opaque, monotonic identifier.
|
||||||
|
///
|
||||||
|
/// [`DataBuffer`]: super::buffer::DataBuffer
|
||||||
|
persisting: VecDeque<(BatchIdent, BufferState<Persisting>)>,
|
||||||
|
|
||||||
|
cached: Option<CachedStats>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for PersistingList {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
persisting: VecDeque::with_capacity(1),
|
||||||
|
cached: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PersistingList {
|
||||||
|
/// Add this `buffer` which was assigned `ident` when marked as persisting
|
||||||
|
/// to the list.
|
||||||
|
///
|
||||||
|
/// This call incrementally recomputes the cached data statistics.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if a batch with a later `ident` has already been added to this
|
||||||
|
/// list - calls MUST push ordered buffers/idents to maintain correct
|
||||||
|
/// ordering of row updates across batches.
|
||||||
|
///
|
||||||
|
/// The provided buffer MUST be non-empty (containing a timestamp column,
|
||||||
|
/// and a schema)
|
||||||
|
pub(crate) fn push(&mut self, ident: BatchIdent, buffer: BufferState<Persisting>) {
|
||||||
|
// Recompute the statistics.
|
||||||
|
match &mut self.cached {
|
||||||
|
Some(v) => v.push(&buffer),
|
||||||
|
None => {
|
||||||
|
// Set the cached stats, as there's no other stats to merge
|
||||||
|
// with, so skip merging schemas.
|
||||||
|
self.cached = Some(CachedStats {
|
||||||
|
rows: buffer.rows(),
|
||||||
|
timestamps: buffer
|
||||||
|
.timestamp_stats()
|
||||||
|
.expect("persisting batch must contain timestamps"),
|
||||||
|
schema: buffer.schema().expect("persisting batch must have schema"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invariant: the batch being added MUST be ordered strictly after
|
||||||
|
// existing batches.
|
||||||
|
//
|
||||||
|
// The BatchIdent provides this ordering assurance, as it is a monotonic
|
||||||
|
// (opaque) identifier.
|
||||||
|
assert!(self
|
||||||
|
.persisting
|
||||||
|
.back()
|
||||||
|
.map(|(last, _)| ident > *last)
|
||||||
|
.unwrap_or(true));
|
||||||
|
|
||||||
|
self.persisting.push_back((ident, buffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove the buffer identified by `ident` from the list.
|
||||||
|
///
|
||||||
|
/// There is no ordering requirement for this call, but is more efficient
|
||||||
|
/// when removals match the order of calls to [`PersistingList::push()`].
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// This method panics if there is currently no batch identified by `ident`
|
||||||
|
/// in the list.
|
||||||
|
pub(crate) fn remove(&mut self, ident: BatchIdent) -> BufferState<Persisting> {
|
||||||
|
let idx = self
|
||||||
|
.persisting
|
||||||
|
.iter()
|
||||||
|
.position(|(old, _)| *old == ident)
|
||||||
|
.expect("no currently persisting batch");
|
||||||
|
|
||||||
|
let (old_ident, fsm) = self.persisting.remove(idx).unwrap();
|
||||||
|
assert_eq!(old_ident, ident);
|
||||||
|
|
||||||
|
// Recompute the cache of all remaining persisting batch stats (if any)
|
||||||
|
self.cached = CachedStats::new(self.persisting.iter().map(|(_, v)| v));
|
||||||
|
|
||||||
|
fsm
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn is_empty(&self) -> bool {
|
||||||
|
self.persisting.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the row count sum across all batches in this list.
|
||||||
|
///
|
||||||
|
/// This is an `O(1)` operation.
|
||||||
|
pub(crate) fn rows(&self) -> usize {
|
||||||
|
self.cached.as_ref().map(|v| v.rows).unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the timestamp min/max values across all batches in this list.
|
||||||
|
///
|
||||||
|
/// This is an `O(1)` operation.
|
||||||
|
pub(crate) fn timestamp_stats(&self) -> Option<TimestampMinMax> {
|
||||||
|
self.cached.as_ref().map(|v| v.timestamps)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the merged schema of all batches in this list.
|
||||||
|
///
|
||||||
|
/// This is an `O(1)` operation.
|
||||||
|
pub(crate) fn schema(&self) -> Option<&Schema> {
|
||||||
|
self.cached.as_ref().map(|v| &v.schema)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the [`RecordBatch`] in this list, optionally applying the given
|
||||||
|
/// projection.
|
||||||
|
///
|
||||||
|
/// This is an `O(n)` operation.
|
||||||
|
pub(crate) fn get_query_data<'a, 'b: 'a>(
|
||||||
|
&'a self,
|
||||||
|
projection: &'b OwnedProjection,
|
||||||
|
) -> impl Iterator<Item = RecordBatch> + 'a {
|
||||||
|
self.persisting
|
||||||
|
.iter()
|
||||||
|
.flat_map(move |(_, b)| b.get_query_data(projection))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The set of cached statistics describing the batches of data within the
|
||||||
|
/// [`PersistingList`].
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct CachedStats {
|
||||||
|
rows: usize,
|
||||||
|
timestamps: TimestampMinMax,
|
||||||
|
|
||||||
|
/// The merged schema of all the persisting batches.
|
||||||
|
schema: Schema,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CachedStats {
|
||||||
|
/// Generate a new [`CachedStats`] from an iterator of batches, if any.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// If any batches are empty (containing no schema or timestamp column), or
|
||||||
|
/// the batches do not contain compatible schemas, this call panics.
|
||||||
|
fn new<'a, T>(mut iter: T) -> Option<Self>
|
||||||
|
where
|
||||||
|
T: Iterator<Item = &'a BufferState<Persisting>> + 'a,
|
||||||
|
{
|
||||||
|
let v = iter.next()?;
|
||||||
|
|
||||||
|
let mut schema = SchemaMerger::new();
|
||||||
|
schema = schema
|
||||||
|
.merge(&v.schema().expect("persisting batch must be non-empty"))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut rows = v.rows();
|
||||||
|
debug_assert!(rows > 0);
|
||||||
|
|
||||||
|
let mut timestamps = v
|
||||||
|
.timestamp_stats()
|
||||||
|
.expect("unprojected batch should have timestamp");
|
||||||
|
|
||||||
|
for buf in iter {
|
||||||
|
rows += buf.rows();
|
||||||
|
if let Some(v) = buf.schema() {
|
||||||
|
debug_assert!(buf.rows() > 0);
|
||||||
|
|
||||||
|
schema = schema
|
||||||
|
.merge(&v)
|
||||||
|
.expect("persit list contains incompatible schemas");
|
||||||
|
|
||||||
|
let ts = buf
|
||||||
|
.timestamp_stats()
|
||||||
|
.expect("no timestamp for bach containing rows");
|
||||||
|
|
||||||
|
timestamps.min = timestamps.min.min(ts.min);
|
||||||
|
timestamps.max = timestamps.max.max(ts.max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
rows,
|
||||||
|
timestamps,
|
||||||
|
schema: schema.build(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Incrementally recompute the cached stats by adding `buffer` to the
|
||||||
|
// statistics.
|
||||||
|
fn push(&mut self, buffer: &BufferState<Persisting>) {
|
||||||
|
// This re-computation below MUST complete - no early exit is allowed or
|
||||||
|
// the stats will be left in an inconsistent state.
|
||||||
|
|
||||||
|
self.rows += buffer.rows();
|
||||||
|
|
||||||
|
let ts = buffer
|
||||||
|
.timestamp_stats()
|
||||||
|
.expect("persisting batch must contain timestamps");
|
||||||
|
|
||||||
|
self.timestamps.min = self.timestamps.min.min(ts.min);
|
||||||
|
self.timestamps.max = self.timestamps.max.max(ts.max);
|
||||||
|
|
||||||
|
let mut schema = SchemaMerger::new();
|
||||||
|
schema = schema.merge(&self.schema).unwrap();
|
||||||
|
schema = schema
|
||||||
|
.merge(&buffer.schema().expect("persisting batch must have schema"))
|
||||||
|
.expect("incompatible schema");
|
||||||
|
self.schema = schema.build()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
|
use arrow_util::assert_batches_eq;
|
||||||
|
use assert_matches::assert_matches;
|
||||||
|
use data_types::SequenceNumber;
|
||||||
|
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||||
|
|
||||||
|
use crate::buffer_tree::partition::buffer::Transition;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Ensure the ordering of yielded batches matches that of the calls to
|
||||||
|
/// push(), preserving batch ordering, and in turn, causal row ordering.
|
||||||
|
#[test]
|
||||||
|
fn test_batch_ordering() {
|
||||||
|
let mut list = PersistingList::default();
|
||||||
|
let mut ident_oracle = BatchIdent::default();
|
||||||
|
|
||||||
|
assert!(list.is_empty());
|
||||||
|
|
||||||
|
// Generate a buffer with a single row.
|
||||||
|
let buffer = buffer_with_lp(r#"bananas,tag=platanos great="yes" 42"#);
|
||||||
|
|
||||||
|
// Add it to the list.
|
||||||
|
list.push(ident_oracle.next(), buffer);
|
||||||
|
|
||||||
|
// The statistics must now match the expected values.
|
||||||
|
assert!(!list.is_empty());
|
||||||
|
assert_eq!(list.rows(), 1);
|
||||||
|
assert_matches!(
|
||||||
|
list.timestamp_stats(),
|
||||||
|
Some(TimestampMinMax { min: 42, max: 42 })
|
||||||
|
);
|
||||||
|
assert_schema_matches(list.schema().unwrap(), &["time", "great", "tag"]);
|
||||||
|
|
||||||
|
// Assert the row content
|
||||||
|
let data = list
|
||||||
|
.get_query_data(&OwnedProjection::default())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let expected = vec![
|
||||||
|
"+-------+----------+--------------------------------+",
|
||||||
|
"| great | tag | time |",
|
||||||
|
"+-------+----------+--------------------------------+",
|
||||||
|
"| yes | platanos | 1970-01-01T00:00:00.000000042Z |",
|
||||||
|
"+-------+----------+--------------------------------+",
|
||||||
|
];
|
||||||
|
assert_eq!(data.len(), 1);
|
||||||
|
assert_batches_eq!(&expected, &data);
|
||||||
|
|
||||||
|
// Push a new buffer updating the last row to check yielded row ordering.
|
||||||
|
let buffer = buffer_with_lp(r#"bananas,tag=platanos great="definitely" 42"#);
|
||||||
|
list.push(ident_oracle.next(), buffer);
|
||||||
|
|
||||||
|
// The statistics must now match the expected values.
|
||||||
|
assert!(!list.is_empty());
|
||||||
|
assert_eq!(list.rows(), 2);
|
||||||
|
assert_matches!(
|
||||||
|
list.timestamp_stats(),
|
||||||
|
Some(TimestampMinMax { min: 42, max: 42 })
|
||||||
|
);
|
||||||
|
assert_schema_matches(list.schema().unwrap(), &["time", "great", "tag"]);
|
||||||
|
|
||||||
|
// Assert the row content
|
||||||
|
let data = list
|
||||||
|
.get_query_data(&OwnedProjection::default())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let expected = vec![
|
||||||
|
"+------------+----------+--------------------------------+",
|
||||||
|
"| great | tag | time |",
|
||||||
|
"+------------+----------+--------------------------------+",
|
||||||
|
"| yes | platanos | 1970-01-01T00:00:00.000000042Z |",
|
||||||
|
"| definitely | platanos | 1970-01-01T00:00:00.000000042Z |",
|
||||||
|
"+------------+----------+--------------------------------+",
|
||||||
|
];
|
||||||
|
assert_eq!(data.len(), 2);
|
||||||
|
assert_batches_eq!(&expected, &data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Assert projection across batches works, and does not panic when given a
|
||||||
|
/// missing column.
|
||||||
|
#[test]
|
||||||
|
fn test_projection() {
|
||||||
|
let mut list = PersistingList::default();
|
||||||
|
let mut ident_oracle = BatchIdent::default();
|
||||||
|
|
||||||
|
assert!(list.is_empty());
|
||||||
|
|
||||||
|
// Populate the list.
|
||||||
|
list.push(
|
||||||
|
ident_oracle.next(),
|
||||||
|
buffer_with_lp(
|
||||||
|
"\
|
||||||
|
bananas,tag=platanos v=1 42\n\
|
||||||
|
bananas,tag=platanos v=2,bananas=100 4242\n\
|
||||||
|
",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
list.push(
|
||||||
|
ident_oracle.next(),
|
||||||
|
buffer_with_lp(
|
||||||
|
"\
|
||||||
|
bananas,tag=platanos v=3 424242\n\
|
||||||
|
bananas v=4,bananas=200 42424242\n\
|
||||||
|
",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Assert the row content
|
||||||
|
let data = list
|
||||||
|
.get_query_data(&OwnedProjection::from(vec!["time", "tag", "missing"]))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let expected = vec![
|
||||||
|
"+--------------------------------+----------+",
|
||||||
|
"| time | tag |",
|
||||||
|
"+--------------------------------+----------+",
|
||||||
|
"| 1970-01-01T00:00:00.000000042Z | platanos |",
|
||||||
|
"| 1970-01-01T00:00:00.000004242Z | platanos |",
|
||||||
|
"| 1970-01-01T00:00:00.000424242Z | platanos |",
|
||||||
|
"| 1970-01-01T00:00:00.042424242Z | |",
|
||||||
|
"+--------------------------------+----------+",
|
||||||
|
];
|
||||||
|
assert_batches_eq!(&expected, &data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate the cached statistics as batches are added and removed.
|
||||||
|
#[test]
|
||||||
|
fn test_cached_statistics() {
|
||||||
|
let mut list = PersistingList::default();
|
||||||
|
let mut ident_oracle = BatchIdent::default();
|
||||||
|
|
||||||
|
assert!(list.is_empty());
|
||||||
|
|
||||||
|
// Generate a buffer with a single row.
|
||||||
|
let first_batch = ident_oracle.next();
|
||||||
|
list.push(
|
||||||
|
first_batch,
|
||||||
|
buffer_with_lp(r#"bananas,tag=platanos great="yes" 42"#),
|
||||||
|
);
|
||||||
|
|
||||||
|
// The statistics must now match the expected values.
|
||||||
|
assert!(!list.is_empty());
|
||||||
|
assert_eq!(list.rows(), 1);
|
||||||
|
assert_matches!(
|
||||||
|
list.timestamp_stats(),
|
||||||
|
Some(TimestampMinMax { min: 42, max: 42 })
|
||||||
|
);
|
||||||
|
assert_schema_matches(list.schema().unwrap(), &["time", "great", "tag"]);
|
||||||
|
|
||||||
|
// Push another row.
|
||||||
|
let second_batch = ident_oracle.next();
|
||||||
|
list.push(
|
||||||
|
second_batch,
|
||||||
|
buffer_with_lp(r#"bananas,another=yes great="definitely",incremental=true 4242"#),
|
||||||
|
);
|
||||||
|
|
||||||
|
// The statistics must now match the expected values.
|
||||||
|
assert!(!list.is_empty());
|
||||||
|
assert_eq!(list.rows(), 2);
|
||||||
|
assert_matches!(
|
||||||
|
list.timestamp_stats(),
|
||||||
|
Some(TimestampMinMax { min: 42, max: 4242 })
|
||||||
|
);
|
||||||
|
assert_schema_matches(
|
||||||
|
list.schema().unwrap(),
|
||||||
|
&["time", "great", "tag", "another", "incremental"],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Remove the first batch.
|
||||||
|
list.remove(first_batch);
|
||||||
|
|
||||||
|
// The statistics must now match the second batch values.
|
||||||
|
assert!(!list.is_empty());
|
||||||
|
assert_eq!(list.rows(), 1);
|
||||||
|
assert_matches!(
|
||||||
|
list.timestamp_stats(),
|
||||||
|
Some(TimestampMinMax {
|
||||||
|
min: 4242,
|
||||||
|
max: 4242
|
||||||
|
})
|
||||||
|
);
|
||||||
|
assert_schema_matches(
|
||||||
|
list.schema().unwrap(),
|
||||||
|
&["time", "great", "another", "incremental"],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Remove the second/final batch.
|
||||||
|
list.remove(second_batch);
|
||||||
|
|
||||||
|
assert!(list.is_empty());
|
||||||
|
assert_eq!(list.rows(), 0);
|
||||||
|
assert_matches!(list.timestamp_stats(), None);
|
||||||
|
assert_matches!(list.schema(), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Assert the schema columns match the given names.
|
||||||
|
fn assert_schema_matches(schema: &Schema, cols: &[&str]) {
|
||||||
|
let schema = schema.as_arrow();
|
||||||
|
let got = schema
|
||||||
|
.all_fields()
|
||||||
|
.into_iter()
|
||||||
|
.map(|v| v.name().to_owned())
|
||||||
|
.collect::<BTreeSet<_>>();
|
||||||
|
|
||||||
|
let want = cols
|
||||||
|
.iter()
|
||||||
|
.map(ToString::to_string)
|
||||||
|
.collect::<BTreeSet<_>>();
|
||||||
|
|
||||||
|
assert_eq!(got, want);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a persisting buffer containing the given LP content.
|
||||||
|
fn buffer_with_lp(lp: &str) -> BufferState<Persisting> {
|
||||||
|
let mut buffer = BufferState::new();
|
||||||
|
// Write some data to a buffer.
|
||||||
|
buffer
|
||||||
|
.write(lp_to_mutable_batch(lp).1, SequenceNumber::new(0))
|
||||||
|
.expect("write to empty buffer should succeed");
|
||||||
|
|
||||||
|
// Convert the buffer into a persisting snapshot.
|
||||||
|
match buffer.snapshot() {
|
||||||
|
Transition::Ok(v) => v.into_persisting(),
|
||||||
|
Transition::Unchanged(_) => panic!("did not transition to snapshot state"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,3 +1,5 @@
|
||||||
|
use gossip::{GossipHandle, NopDispatcher};
|
||||||
|
|
||||||
/// This needs to be pub for the benchmarks but should not be used outside the crate.
|
/// This needs to be pub for the benchmarks but should not be used outside the crate.
|
||||||
#[cfg(feature = "benches")]
|
#[cfg(feature = "benches")]
|
||||||
pub use wal_replay::*;
|
pub use wal_replay::*;
|
||||||
|
@ -5,7 +7,7 @@ pub use wal_replay::*;
|
||||||
mod graceful_shutdown;
|
mod graceful_shutdown;
|
||||||
mod wal_replay;
|
mod wal_replay;
|
||||||
|
|
||||||
use std::{path::PathBuf, sync::Arc, time::Duration};
|
use std::{net::SocketAddr, path::PathBuf, sync::Arc, time::Duration};
|
||||||
|
|
||||||
use arrow_flight::flight_service_server::FlightService;
|
use arrow_flight::flight_service_server::FlightService;
|
||||||
use backoff::BackoffConfig;
|
use backoff::BackoffConfig;
|
||||||
|
@ -109,6 +111,9 @@ pub struct IngesterGuard<T> {
|
||||||
/// The task handle executing the graceful shutdown once triggered.
|
/// The task handle executing the graceful shutdown once triggered.
|
||||||
graceful_shutdown_handler: tokio::task::JoinHandle<()>,
|
graceful_shutdown_handler: tokio::task::JoinHandle<()>,
|
||||||
shutdown_complete: Shared<oneshot::Receiver<()>>,
|
shutdown_complete: Shared<oneshot::Receiver<()>>,
|
||||||
|
|
||||||
|
/// An optional handle to the gossip sub-system, if running.
|
||||||
|
gossip_handle: Option<GossipHandle>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> IngesterGuard<T>
|
impl<T> IngesterGuard<T>
|
||||||
|
@ -137,6 +142,27 @@ impl<T> Drop for IngesterGuard<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Configuration parameters for the optional gossip sub-system.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub enum GossipConfig {
|
||||||
|
/// Disable the gossip sub-system.
|
||||||
|
#[default]
|
||||||
|
Disabled,
|
||||||
|
|
||||||
|
/// Enable the gossip sub-system, listening on the specified `bind_addr` and
|
||||||
|
/// using `peers` as the initial peer seed list.
|
||||||
|
Enabled {
|
||||||
|
/// UDP socket address to use for gossip communication.
|
||||||
|
bind_addr: SocketAddr,
|
||||||
|
/// Initial peer seed list in the form of either:
|
||||||
|
///
|
||||||
|
/// - "dns.address.example:port"
|
||||||
|
/// - "10.0.0.1:port"
|
||||||
|
///
|
||||||
|
peers: Vec<String>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
/// Errors that occur during initialisation of an `ingester` instance.
|
/// Errors that occur during initialisation of an `ingester` instance.
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum InitError {
|
pub enum InitError {
|
||||||
|
@ -152,6 +178,10 @@ pub enum InitError {
|
||||||
/// An error replaying the entries in the WAL.
|
/// An error replaying the entries in the WAL.
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
WalReplay(Box<dyn std::error::Error>),
|
WalReplay(Box<dyn std::error::Error>),
|
||||||
|
|
||||||
|
/// An error binding the UDP socket for gossip communication.
|
||||||
|
#[error("failed to bind udp gossip socket: {0}")]
|
||||||
|
GossipBind(std::io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Initialise a new `ingester` instance, returning the gRPC service handler
|
/// Initialise a new `ingester` instance, returning the gRPC service handler
|
||||||
|
@ -238,6 +268,7 @@ pub async fn new<F>(
|
||||||
persist_queue_depth: usize,
|
persist_queue_depth: usize,
|
||||||
persist_hot_partition_cost: usize,
|
persist_hot_partition_cost: usize,
|
||||||
object_store: ParquetStorage,
|
object_store: ParquetStorage,
|
||||||
|
gossip: GossipConfig,
|
||||||
shutdown: F,
|
shutdown: F,
|
||||||
) -> Result<IngesterGuard<impl IngesterRpcInterface>, InitError>
|
) -> Result<IngesterGuard<impl IngesterRpcInterface>, InitError>
|
||||||
where
|
where
|
||||||
|
@ -351,11 +382,9 @@ where
|
||||||
|
|
||||||
// Initialize disk metrics to emit disk capacity / free statistics for the
|
// Initialize disk metrics to emit disk capacity / free statistics for the
|
||||||
// WAL directory.
|
// WAL directory.
|
||||||
let disk_metric_task = tokio::task::spawn(
|
let (disk_metric_task, _snapshot_rx) = DiskSpaceMetrics::new(wal_directory, &metrics)
|
||||||
DiskSpaceMetrics::new(wal_directory, &metrics)
|
.expect("failed to resolve WAL directory to disk");
|
||||||
.expect("failed to resolve WAL directory to disk")
|
let disk_metric_task = tokio::task::spawn(disk_metric_task.run());
|
||||||
.run(),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Replay the WAL log files, if any.
|
// Replay the WAL log files, if any.
|
||||||
let max_sequence_number =
|
let max_sequence_number =
|
||||||
|
@ -422,6 +451,23 @@ where
|
||||||
wal_reference_handle,
|
wal_reference_handle,
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// Optionally start the gossip subsystem
|
||||||
|
let gossip_handle = match gossip {
|
||||||
|
GossipConfig::Disabled => {
|
||||||
|
info!("gossip disabled");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
GossipConfig::Enabled { bind_addr, peers } => {
|
||||||
|
// Start the gossip sub-system, which logs during init.
|
||||||
|
let handle =
|
||||||
|
gossip::Builder::new(peers, NopDispatcher::default(), Arc::clone(&metrics))
|
||||||
|
.bind(bind_addr)
|
||||||
|
.await
|
||||||
|
.map_err(InitError::GossipBind)?;
|
||||||
|
Some(handle)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Ok(IngesterGuard {
|
Ok(IngesterGuard {
|
||||||
rpc: GrpcDelegate::new(
|
rpc: GrpcDelegate::new(
|
||||||
Arc::new(write_path),
|
Arc::new(write_path),
|
||||||
|
@ -438,5 +484,6 @@ where
|
||||||
disk_metric_task,
|
disk_metric_task,
|
||||||
graceful_shutdown_handler: shutdown_task,
|
graceful_shutdown_handler: shutdown_task,
|
||||||
shutdown_complete: shutdown_rx.shared(),
|
shutdown_complete: shutdown_rx.shared(),
|
||||||
|
gossip_handle,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -200,6 +200,7 @@
|
||||||
unused_crate_dependencies,
|
unused_crate_dependencies,
|
||||||
missing_docs
|
missing_docs
|
||||||
)]
|
)]
|
||||||
|
#![allow(clippy::default_constructed_unit_structs)]
|
||||||
|
|
||||||
// Workaround for "unused crate" lint false positives.
|
// Workaround for "unused crate" lint false positives.
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
@ -2,7 +2,8 @@ use std::{fmt::Debug, sync::Arc, time::Duration};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use data_types::{
|
use data_types::{
|
||||||
sequence_number_set::SequenceNumberSet, NamespaceId, ParquetFileParams, PartitionId, TableId,
|
sequence_number_set::SequenceNumberSet, NamespaceId, ParquetFileParams, TableId,
|
||||||
|
TransitionPartitionId,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::wal::reference_tracker::WalReferenceHandle;
|
use crate::wal::reference_tracker::WalReferenceHandle;
|
||||||
|
@ -54,9 +55,9 @@ impl CompletedPersist {
|
||||||
self.meta.table_id
|
self.meta.table_id
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the [`PartitionId`] of the persisted data.
|
/// Returns the [`TransitionPartitionId`] of the persisted data.
|
||||||
pub(crate) fn partition_id(&self) -> PartitionId {
|
pub(crate) fn partition_id(&self) -> &TransitionPartitionId {
|
||||||
self.meta.partition_id
|
&self.meta.partition_id
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the [`SequenceNumberSet`] of the persisted data.
|
/// Returns the [`SequenceNumberSet`] of the persisted data.
|
||||||
|
@ -166,15 +167,16 @@ pub(crate) mod mock {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::test_util::{ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_TABLE_ID};
|
use crate::test_util::{
|
||||||
|
ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, ARBITRARY_TRANSITION_PARTITION_ID,
|
||||||
|
};
|
||||||
use data_types::{ColumnId, ColumnSet, SequenceNumber, Timestamp};
|
use data_types::{ColumnId, ColumnSet, SequenceNumber, Timestamp};
|
||||||
|
|
||||||
fn arbitrary_file_meta() -> ParquetFileParams {
|
fn arbitrary_file_meta() -> ParquetFileParams {
|
||||||
ParquetFileParams {
|
ParquetFileParams {
|
||||||
namespace_id: ARBITRARY_NAMESPACE_ID,
|
namespace_id: ARBITRARY_NAMESPACE_ID,
|
||||||
table_id: ARBITRARY_TABLE_ID,
|
table_id: ARBITRARY_TABLE_ID,
|
||||||
partition_id: ARBITRARY_PARTITION_ID,
|
partition_id: ARBITRARY_TRANSITION_PARTITION_ID.clone(),
|
||||||
partition_hash_id: None,
|
|
||||||
object_store_id: Default::default(),
|
object_store_id: Default::default(),
|
||||||
min_time: Timestamp::new(42),
|
min_time: Timestamp::new(42),
|
||||||
max_time: Timestamp::new(42),
|
max_time: Timestamp::new(42),
|
||||||
|
@ -226,7 +228,7 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(note.namespace_id(), meta.namespace_id);
|
assert_eq!(note.namespace_id(), meta.namespace_id);
|
||||||
assert_eq!(note.table_id(), meta.table_id);
|
assert_eq!(note.table_id(), meta.table_id);
|
||||||
assert_eq!(note.partition_id(), meta.partition_id);
|
assert_eq!(note.partition_id(), &meta.partition_id);
|
||||||
|
|
||||||
assert_eq!(note.column_count(), meta.column_set.len());
|
assert_eq!(note.column_count(), meta.column_set.len());
|
||||||
assert_eq!(note.row_count(), meta.row_count as usize);
|
assert_eq!(note.row_count(), meta.row_count as usize);
|
||||||
|
|
|
@ -151,7 +151,9 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{
|
use crate::{
|
||||||
persist::completion_observer::mock::MockCompletionObserver,
|
persist::completion_observer::mock::MockCompletionObserver,
|
||||||
test_util::{ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_TABLE_ID},
|
test_util::{
|
||||||
|
ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, ARBITRARY_TRANSITION_PARTITION_ID,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
use data_types::{
|
use data_types::{
|
||||||
sequence_number_set::SequenceNumberSet, ColumnId, ColumnSet, ParquetFileParams, Timestamp,
|
sequence_number_set::SequenceNumberSet, ColumnId, ColumnSet, ParquetFileParams, Timestamp,
|
||||||
|
@ -169,8 +171,7 @@ mod tests {
|
||||||
let meta = ParquetFileParams {
|
let meta = ParquetFileParams {
|
||||||
namespace_id: ARBITRARY_NAMESPACE_ID,
|
namespace_id: ARBITRARY_NAMESPACE_ID,
|
||||||
table_id: ARBITRARY_TABLE_ID,
|
table_id: ARBITRARY_TABLE_ID,
|
||||||
partition_id: ARBITRARY_PARTITION_ID,
|
partition_id: ARBITRARY_TRANSITION_PARTITION_ID.clone(),
|
||||||
partition_hash_id: None,
|
|
||||||
object_store_id: Default::default(),
|
object_store_id: Default::default(),
|
||||||
min_time: Timestamp::new(Duration::from_secs(1_000).as_nanos() as _),
|
min_time: Timestamp::new(Duration::from_secs(1_000).as_nanos() as _),
|
||||||
max_time: Timestamp::new(Duration::from_secs(1_042).as_nanos() as _), // 42 seconds later
|
max_time: Timestamp::new(Duration::from_secs(1_042).as_nanos() as _), // 42 seconds later
|
||||||
|
|
|
@ -16,7 +16,7 @@ mod tests {
|
||||||
use std::{sync::Arc, time::Duration};
|
use std::{sync::Arc, time::Duration};
|
||||||
|
|
||||||
use assert_matches::assert_matches;
|
use assert_matches::assert_matches;
|
||||||
use data_types::{CompactionLevel, ParquetFile, TransitionPartitionId};
|
use data_types::{CompactionLevel, ParquetFile};
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use iox_catalog::{
|
use iox_catalog::{
|
||||||
interface::{get_schema_by_id, Catalog, SoftDeletedRows},
|
interface::{get_schema_by_id, Catalog, SoftDeletedRows},
|
||||||
|
@ -190,7 +190,7 @@ mod tests {
|
||||||
// Generate a partition with data
|
// Generate a partition with data
|
||||||
let partition = partition_with_write(Arc::clone(&catalog)).await;
|
let partition = partition_with_write(Arc::clone(&catalog)).await;
|
||||||
let table_id = partition.lock().table_id();
|
let table_id = partition.lock().table_id();
|
||||||
let partition_id = partition.lock().partition_id();
|
let partition_id = partition.lock().transition_partition_id();
|
||||||
let namespace_id = partition.lock().namespace_id();
|
let namespace_id = partition.lock().namespace_id();
|
||||||
assert_matches!(partition.lock().sort_key(), SortKeyState::Provided(None));
|
assert_matches!(partition.lock().sort_key(), SortKeyState::Provided(None));
|
||||||
|
|
||||||
|
@ -221,7 +221,7 @@ mod tests {
|
||||||
assert_matches!(&completion_observer.calls().as_slice(), &[n] => {
|
assert_matches!(&completion_observer.calls().as_slice(), &[n] => {
|
||||||
assert_eq!(n.namespace_id(), namespace_id);
|
assert_eq!(n.namespace_id(), namespace_id);
|
||||||
assert_eq!(n.table_id(), table_id);
|
assert_eq!(n.table_id(), table_id);
|
||||||
assert_eq!(n.partition_id(), partition_id);
|
assert_eq!(n.partition_id(), &partition_id);
|
||||||
assert_eq!(n.sequence_numbers().len(), 1);
|
assert_eq!(n.sequence_numbers().len(), 1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -243,12 +243,12 @@ mod tests {
|
||||||
.repositories()
|
.repositories()
|
||||||
.await
|
.await
|
||||||
.parquet_files()
|
.parquet_files()
|
||||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition_id))
|
.list_by_partition_not_to_delete(&partition_id)
|
||||||
.await
|
.await
|
||||||
.expect("query for parquet files failed");
|
.expect("query for parquet files failed");
|
||||||
|
|
||||||
// Validate a single file was inserted with the expected properties.
|
// Validate a single file was inserted with the expected properties.
|
||||||
let (object_store_id, file_size_bytes) = assert_matches!(&*files, &[ParquetFile {
|
let (object_store_id, file_size_bytes) = assert_matches!(&*files, [ParquetFile {
|
||||||
namespace_id: got_namespace_id,
|
namespace_id: got_namespace_id,
|
||||||
table_id: got_table_id,
|
table_id: got_table_id,
|
||||||
partition_id: got_partition_id,
|
partition_id: got_partition_id,
|
||||||
|
@ -263,12 +263,12 @@ mod tests {
|
||||||
{
|
{
|
||||||
assert_eq!(created_at.get(), max_l0_created_at.get());
|
assert_eq!(created_at.get(), max_l0_created_at.get());
|
||||||
|
|
||||||
assert_eq!(got_namespace_id, namespace_id);
|
assert_eq!(got_namespace_id, &namespace_id);
|
||||||
assert_eq!(got_table_id, table_id);
|
assert_eq!(got_table_id, &table_id);
|
||||||
assert_eq!(got_partition_id, partition_id);
|
assert_eq!(got_partition_id, &partition_id);
|
||||||
|
|
||||||
assert_eq!(row_count, 1);
|
assert_eq!(*row_count, 1);
|
||||||
assert_eq!(compaction_level, CompactionLevel::Initial);
|
assert_eq!(compaction_level, &CompactionLevel::Initial);
|
||||||
|
|
||||||
(object_store_id, file_size_bytes)
|
(object_store_id, file_size_bytes)
|
||||||
}
|
}
|
||||||
|
@ -292,7 +292,7 @@ mod tests {
|
||||||
}] => {
|
}] => {
|
||||||
let want_path = format!("{object_store_id}.parquet");
|
let want_path = format!("{object_store_id}.parquet");
|
||||||
assert!(location.as_ref().ends_with(&want_path));
|
assert!(location.as_ref().ends_with(&want_path));
|
||||||
assert_eq!(size, file_size_bytes as usize);
|
assert_eq!(size, *file_size_bytes as usize);
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -326,8 +326,7 @@ mod tests {
|
||||||
// Generate a partition with data
|
// Generate a partition with data
|
||||||
let partition = partition_with_write(Arc::clone(&catalog)).await;
|
let partition = partition_with_write(Arc::clone(&catalog)).await;
|
||||||
let table_id = partition.lock().table_id();
|
let table_id = partition.lock().table_id();
|
||||||
let partition_id = partition.lock().partition_id();
|
let partition_id = partition.lock().transition_partition_id();
|
||||||
let transition_partition_id = partition.lock().transition_partition_id();
|
|
||||||
let namespace_id = partition.lock().namespace_id();
|
let namespace_id = partition.lock().namespace_id();
|
||||||
assert_matches!(partition.lock().sort_key(), SortKeyState::Provided(None));
|
assert_matches!(partition.lock().sort_key(), SortKeyState::Provided(None));
|
||||||
|
|
||||||
|
@ -344,7 +343,7 @@ mod tests {
|
||||||
.await
|
.await
|
||||||
.partitions()
|
.partitions()
|
||||||
.cas_sort_key(
|
.cas_sort_key(
|
||||||
&transition_partition_id,
|
&partition_id,
|
||||||
None,
|
None,
|
||||||
&["bananas", "are", "good", "for", "you"],
|
&["bananas", "are", "good", "for", "you"],
|
||||||
)
|
)
|
||||||
|
@ -367,7 +366,7 @@ mod tests {
|
||||||
assert_matches!(&completion_observer.calls().as_slice(), &[n] => {
|
assert_matches!(&completion_observer.calls().as_slice(), &[n] => {
|
||||||
assert_eq!(n.namespace_id(), namespace_id);
|
assert_eq!(n.namespace_id(), namespace_id);
|
||||||
assert_eq!(n.table_id(), table_id);
|
assert_eq!(n.table_id(), table_id);
|
||||||
assert_eq!(n.partition_id(), partition_id);
|
assert_eq!(n.partition_id(), &partition_id);
|
||||||
assert_eq!(n.sequence_numbers().len(), 1);
|
assert_eq!(n.sequence_numbers().len(), 1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -392,12 +391,12 @@ mod tests {
|
||||||
.repositories()
|
.repositories()
|
||||||
.await
|
.await
|
||||||
.parquet_files()
|
.parquet_files()
|
||||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition_id))
|
.list_by_partition_not_to_delete(&partition_id)
|
||||||
.await
|
.await
|
||||||
.expect("query for parquet files failed");
|
.expect("query for parquet files failed");
|
||||||
|
|
||||||
// Validate a single file was inserted with the expected properties.
|
// Validate a single file was inserted with the expected properties.
|
||||||
let (object_store_id, file_size_bytes) = assert_matches!(&*files, &[ParquetFile {
|
let (object_store_id, file_size_bytes) = assert_matches!(&*files, [ParquetFile {
|
||||||
namespace_id: got_namespace_id,
|
namespace_id: got_namespace_id,
|
||||||
table_id: got_table_id,
|
table_id: got_table_id,
|
||||||
partition_id: got_partition_id,
|
partition_id: got_partition_id,
|
||||||
|
@ -412,12 +411,12 @@ mod tests {
|
||||||
{
|
{
|
||||||
assert_eq!(created_at.get(), max_l0_created_at.get());
|
assert_eq!(created_at.get(), max_l0_created_at.get());
|
||||||
|
|
||||||
assert_eq!(got_namespace_id, namespace_id);
|
assert_eq!(got_namespace_id, &namespace_id);
|
||||||
assert_eq!(got_table_id, table_id);
|
assert_eq!(got_table_id, &table_id);
|
||||||
assert_eq!(got_partition_id, partition_id);
|
assert_eq!(got_partition_id, &partition_id);
|
||||||
|
|
||||||
assert_eq!(row_count, 1);
|
assert_eq!(*row_count, 1);
|
||||||
assert_eq!(compaction_level, CompactionLevel::Initial);
|
assert_eq!(compaction_level, &CompactionLevel::Initial);
|
||||||
|
|
||||||
(object_store_id, file_size_bytes)
|
(object_store_id, file_size_bytes)
|
||||||
}
|
}
|
||||||
|
@ -438,18 +437,14 @@ mod tests {
|
||||||
assert_eq!(files.len(), 2, "expected two uploaded files");
|
assert_eq!(files.len(), 2, "expected two uploaded files");
|
||||||
|
|
||||||
// Ensure the catalog record points at a valid file in object storage.
|
// Ensure the catalog record points at a valid file in object storage.
|
||||||
let want_path = ParquetFilePath::new(
|
let want_path =
|
||||||
namespace_id,
|
ParquetFilePath::new(namespace_id, table_id, &partition_id, *object_store_id)
|
||||||
table_id,
|
|
||||||
&transition_partition_id,
|
|
||||||
object_store_id,
|
|
||||||
)
|
|
||||||
.object_store_path();
|
.object_store_path();
|
||||||
let file = files
|
let file = files
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.find(|f| f.location == want_path)
|
.find(|f| f.location == want_path)
|
||||||
.expect("did not find final file in object storage");
|
.expect("did not find final file in object storage");
|
||||||
|
|
||||||
assert_eq!(file.size, file_size_bytes as usize);
|
assert_eq!(file.size, *file_size_bytes as usize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,8 @@ pub(crate) mod mock {
|
||||||
use std::{sync::Arc, time::Duration};
|
use std::{sync::Arc, time::Duration};
|
||||||
|
|
||||||
use data_types::{
|
use data_types::{
|
||||||
ColumnId, ColumnSet, NamespaceId, ParquetFileParams, PartitionId, TableId, Timestamp,
|
ColumnId, ColumnSet, NamespaceId, ParquetFileParams, PartitionHashId, PartitionKey,
|
||||||
|
TableId, Timestamp, TransitionPartitionId,
|
||||||
};
|
};
|
||||||
use test_helpers::timeout::FutureTimeout;
|
use test_helpers::timeout::FutureTimeout;
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
|
@ -155,13 +156,16 @@ pub(crate) mod mock {
|
||||||
let wait_ms: u64 = rand::random::<u64>() % 100;
|
let wait_ms: u64 = rand::random::<u64>() % 100;
|
||||||
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
|
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
|
||||||
let sequence_numbers = partition.lock().mark_persisted(data);
|
let sequence_numbers = partition.lock().mark_persisted(data);
|
||||||
|
let table_id = TableId::new(2);
|
||||||
|
let partition_hash_id =
|
||||||
|
PartitionHashId::new(table_id, &PartitionKey::from("arbitrary"));
|
||||||
|
let partition_id = TransitionPartitionId::Deterministic(partition_hash_id);
|
||||||
completion_observer
|
completion_observer
|
||||||
.persist_complete(Arc::new(CompletedPersist::new(
|
.persist_complete(Arc::new(CompletedPersist::new(
|
||||||
ParquetFileParams {
|
ParquetFileParams {
|
||||||
namespace_id: NamespaceId::new(1),
|
namespace_id: NamespaceId::new(1),
|
||||||
table_id: TableId::new(2),
|
table_id,
|
||||||
partition_id: PartitionId::new(3),
|
partition_id,
|
||||||
partition_hash_id: None,
|
|
||||||
object_store_id: Default::default(),
|
object_store_id: Default::default(),
|
||||||
min_time: Timestamp::new(42),
|
min_time: Timestamp::new(42),
|
||||||
max_time: Timestamp::new(42),
|
max_time: Timestamp::new(42),
|
||||||
|
|
|
@ -394,8 +394,7 @@ where
|
||||||
ParquetFileParams {
|
ParquetFileParams {
|
||||||
namespace_id: NamespaceId::new(1),
|
namespace_id: NamespaceId::new(1),
|
||||||
table_id: TableId::new(2),
|
table_id: TableId::new(2),
|
||||||
partition_id: PartitionId::new(3),
|
partition_id: ARBITRARY_TRANSITION_PARTITION_ID.clone(),
|
||||||
partition_hash_id: None,
|
|
||||||
object_store_id: Default::default(),
|
object_store_id: Default::default(),
|
||||||
min_time: Timestamp::new(42),
|
min_time: Timestamp::new(42),
|
||||||
max_time: Timestamp::new(42),
|
max_time: Timestamp::new(42),
|
||||||
|
|
|
@ -30,7 +30,7 @@ use futures::{stream::FuturesUnordered, FutureExt, StreamExt, TryStreamExt};
|
||||||
use generated_types::influxdata::iox::ingester::v1::{
|
use generated_types::influxdata::iox::ingester::v1::{
|
||||||
write_service_server::WriteService, WriteRequest,
|
write_service_server::WriteService, WriteRequest,
|
||||||
};
|
};
|
||||||
use ingester::{IngesterGuard, IngesterRpcInterface};
|
use ingester::{GossipConfig, IngesterGuard, IngesterRpcInterface};
|
||||||
use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
|
use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
|
||||||
use iox_catalog::{
|
use iox_catalog::{
|
||||||
interface::{Catalog, SoftDeletedRows},
|
interface::{Catalog, SoftDeletedRows},
|
||||||
|
@ -168,6 +168,7 @@ impl TestContextBuilder {
|
||||||
max_persist_queue_depth,
|
max_persist_queue_depth,
|
||||||
persist_hot_partition_cost,
|
persist_hot_partition_cost,
|
||||||
storage.clone(),
|
storage.clone(),
|
||||||
|
GossipConfig::default(),
|
||||||
shutdown_rx.map(|v| v.expect("shutdown sender dropped without calling shutdown")),
|
shutdown_rx.map(|v| v.expect("shutdown sender dropped without calling shutdown")),
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
DROP TRIGGER IF EXISTS update_partition ON parquet_file;
|
||||||
|
|
||||||
|
ALTER TABLE parquet_file
|
||||||
|
ALTER COLUMN partition_id
|
||||||
|
DROP NOT NULL;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_partition_on_new_file_at()
|
||||||
|
RETURNS TRIGGER
|
||||||
|
LANGUAGE PLPGSQL
|
||||||
|
AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE partition
|
||||||
|
SET new_file_at = NEW.created_at
|
||||||
|
WHERE (NEW.partition_id IS NULL OR id = NEW.partition_id)
|
||||||
|
AND (NEW.partition_hash_id IS NULL OR hash_id = NEW.partition_hash_id);
|
||||||
|
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
|
||||||
|
CREATE TRIGGER update_partition
|
||||||
|
AFTER INSERT ON parquet_file
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE PROCEDURE update_partition_on_new_file_at();
|
|
@ -0,0 +1,98 @@
|
||||||
|
CREATE TABLE parquet_file_temp
|
||||||
|
AS SELECT * FROM parquet_file;
|
||||||
|
|
||||||
|
DROP TABLE parquet_file;
|
||||||
|
|
||||||
|
CREATE TABLE parquet_file
|
||||||
|
(
|
||||||
|
id INTEGER
|
||||||
|
constraint parquet_file_pkey
|
||||||
|
primary key autoincrement,
|
||||||
|
shard_id numeric not null
|
||||||
|
constraint parquet_file_sequencer_id_fkey
|
||||||
|
references shard,
|
||||||
|
table_id numeric not null
|
||||||
|
references table_name,
|
||||||
|
partition_id numeric
|
||||||
|
references partition,
|
||||||
|
partition_hash_id bytea
|
||||||
|
references partition (hash_id),
|
||||||
|
|
||||||
|
object_store_id uuid not null
|
||||||
|
constraint parquet_location_unique
|
||||||
|
unique,
|
||||||
|
max_sequence_number numeric,
|
||||||
|
min_time numeric,
|
||||||
|
max_time numeric,
|
||||||
|
to_delete numeric,
|
||||||
|
row_count numeric default 0 not null,
|
||||||
|
file_size_bytes numeric default 0 not null,
|
||||||
|
compaction_level smallint default 0 not null,
|
||||||
|
created_at numeric,
|
||||||
|
namespace_id numeric not null
|
||||||
|
references namespace
|
||||||
|
on delete cascade,
|
||||||
|
column_set numeric[] not null,
|
||||||
|
max_l0_created_at numeric default 0 not null
|
||||||
|
);
|
||||||
|
|
||||||
|
create index if not exists parquet_file_deleted_at_idx
|
||||||
|
on parquet_file (to_delete);
|
||||||
|
|
||||||
|
create index if not exists parquet_file_partition_idx
|
||||||
|
on parquet_file (partition_id);
|
||||||
|
|
||||||
|
create index if not exists parquet_file_table_idx
|
||||||
|
on parquet_file (table_id);
|
||||||
|
|
||||||
|
create index if not exists parquet_file_shard_compaction_delete_idx
|
||||||
|
on parquet_file (shard_id, compaction_level, to_delete);
|
||||||
|
|
||||||
|
create index if not exists parquet_file_shard_compaction_delete_created_idx
|
||||||
|
on parquet_file (shard_id, compaction_level, to_delete, created_at);
|
||||||
|
|
||||||
|
create index if not exists parquet_file_partition_created_idx
|
||||||
|
on parquet_file (partition_id, created_at);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS parquet_file_partition_hash_id_idx
|
||||||
|
ON parquet_file (partition_hash_id)
|
||||||
|
WHERE partition_hash_id IS NOT NULL;
|
||||||
|
|
||||||
|
create trigger if not exists update_partition
|
||||||
|
after insert
|
||||||
|
on parquet_file
|
||||||
|
for each row
|
||||||
|
begin
|
||||||
|
UPDATE partition
|
||||||
|
SET new_file_at = NEW.created_at
|
||||||
|
WHERE (NEW.partition_id IS NULL OR id = NEW.partition_id)
|
||||||
|
AND (NEW.partition_hash_id IS NULL OR hash_id = NEW.partition_hash_id);
|
||||||
|
end;
|
||||||
|
|
||||||
|
create trigger if not exists update_billing
|
||||||
|
after insert
|
||||||
|
on parquet_file
|
||||||
|
for each row
|
||||||
|
begin
|
||||||
|
INSERT INTO billing_summary (namespace_id, total_file_size_bytes)
|
||||||
|
VALUES (NEW.namespace_id, NEW.file_size_bytes)
|
||||||
|
ON CONFLICT (namespace_id) DO UPDATE
|
||||||
|
SET total_file_size_bytes = billing_summary.total_file_size_bytes + NEW.file_size_bytes
|
||||||
|
WHERE billing_summary.namespace_id = NEW.namespace_id;
|
||||||
|
end;
|
||||||
|
|
||||||
|
create trigger if not exists decrement_summary
|
||||||
|
after update
|
||||||
|
on parquet_file
|
||||||
|
for each row
|
||||||
|
when OLD.to_delete IS NULL AND NEW.to_delete IS NOT NULL
|
||||||
|
begin
|
||||||
|
UPDATE billing_summary
|
||||||
|
SET total_file_size_bytes = billing_summary.total_file_size_bytes - OLD.file_size_bytes
|
||||||
|
WHERE billing_summary.namespace_id = OLD.namespace_id;
|
||||||
|
end;
|
||||||
|
|
||||||
|
INSERT INTO parquet_file
|
||||||
|
SELECT * FROM parquet_file_temp;
|
||||||
|
|
||||||
|
DROP TABLE parquet_file_temp;
|
|
@ -1865,7 +1865,7 @@ pub(crate) mod test_helpers {
|
||||||
|
|
||||||
let other_params = ParquetFileParams {
|
let other_params = ParquetFileParams {
|
||||||
table_id: other_partition.table_id,
|
table_id: other_partition.table_id,
|
||||||
partition_id: other_partition.id,
|
partition_id: other_partition.transition_partition_id(),
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
min_time: Timestamp::new(50),
|
min_time: Timestamp::new(50),
|
||||||
max_time: Timestamp::new(60),
|
max_time: Timestamp::new(60),
|
||||||
|
@ -1978,7 +1978,7 @@ pub(crate) mod test_helpers {
|
||||||
|
|
||||||
let f1_params = ParquetFileParams {
|
let f1_params = ParquetFileParams {
|
||||||
table_id: partition2.table_id,
|
table_id: partition2.table_id,
|
||||||
partition_id: partition2.id,
|
partition_id: partition2.transition_partition_id(),
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
min_time: Timestamp::new(1),
|
min_time: Timestamp::new(1),
|
||||||
max_time: Timestamp::new(10),
|
max_time: Timestamp::new(10),
|
||||||
|
@ -2449,7 +2449,7 @@ pub(crate) mod test_helpers {
|
||||||
let l0_five_hour_ago_file_params = ParquetFileParams {
|
let l0_five_hour_ago_file_params = ParquetFileParams {
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
created_at: time_five_hour_ago,
|
created_at: time_five_hour_ago,
|
||||||
partition_id: partition2.id,
|
partition_id: partition2.transition_partition_id(),
|
||||||
..parquet_file_params.clone()
|
..parquet_file_params.clone()
|
||||||
};
|
};
|
||||||
repos
|
repos
|
||||||
|
@ -2492,7 +2492,7 @@ pub(crate) mod test_helpers {
|
||||||
let l1_file_params = ParquetFileParams {
|
let l1_file_params = ParquetFileParams {
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
created_at: time_now,
|
created_at: time_now,
|
||||||
partition_id: partition2.id,
|
partition_id: partition2.transition_partition_id(),
|
||||||
compaction_level: CompactionLevel::FileNonOverlapped,
|
compaction_level: CompactionLevel::FileNonOverlapped,
|
||||||
..parquet_file_params.clone()
|
..parquet_file_params.clone()
|
||||||
};
|
};
|
||||||
|
@ -2578,7 +2578,7 @@ pub(crate) mod test_helpers {
|
||||||
let l2_file_params = ParquetFileParams {
|
let l2_file_params = ParquetFileParams {
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
created_at: time_now,
|
created_at: time_now,
|
||||||
partition_id: partition3.id,
|
partition_id: partition3.transition_partition_id(),
|
||||||
compaction_level: CompactionLevel::Final,
|
compaction_level: CompactionLevel::Final,
|
||||||
..parquet_file_params.clone()
|
..parquet_file_params.clone()
|
||||||
};
|
};
|
||||||
|
@ -2619,7 +2619,7 @@ pub(crate) mod test_helpers {
|
||||||
let l0_one_hour_ago_file_params = ParquetFileParams {
|
let l0_one_hour_ago_file_params = ParquetFileParams {
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
created_at: time_one_hour_ago,
|
created_at: time_one_hour_ago,
|
||||||
partition_id: partition3.id,
|
partition_id: partition3.transition_partition_id(),
|
||||||
..parquet_file_params.clone()
|
..parquet_file_params.clone()
|
||||||
};
|
};
|
||||||
repos
|
repos
|
||||||
|
@ -2720,8 +2720,7 @@ pub(crate) mod test_helpers {
|
||||||
level1_file.compaction_level = CompactionLevel::FileNonOverlapped;
|
level1_file.compaction_level = CompactionLevel::FileNonOverlapped;
|
||||||
|
|
||||||
let other_partition_params = ParquetFileParams {
|
let other_partition_params = ParquetFileParams {
|
||||||
partition_id: partition2.id,
|
partition_id: partition2.transition_partition_id(),
|
||||||
partition_hash_id: partition2.hash_id().cloned(),
|
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
..parquet_file_params.clone()
|
..parquet_file_params.clone()
|
||||||
};
|
};
|
||||||
|
@ -2744,12 +2743,20 @@ pub(crate) mod test_helpers {
|
||||||
expected_ids.sort();
|
expected_ids.sort();
|
||||||
assert_eq!(file_ids, expected_ids);
|
assert_eq!(file_ids, expected_ids);
|
||||||
|
|
||||||
// remove namespace to avoid it from affecting later tests
|
// Using the catalog partition ID should return the same files, even if the Parquet file
|
||||||
repos
|
// records don't have the partition ID on them (which is the default now)
|
||||||
.namespaces()
|
let files = repos
|
||||||
.soft_delete("namespace_parquet_file_test_list_by_partiton_not_to_delete")
|
.parquet_files()
|
||||||
|
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition.id))
|
||||||
.await
|
.await
|
||||||
.expect("delete namespace should succeed");
|
.unwrap();
|
||||||
|
assert_eq!(files.len(), 2);
|
||||||
|
|
||||||
|
let mut file_ids: Vec<_> = files.into_iter().map(|f| f.id).collect();
|
||||||
|
file_ids.sort();
|
||||||
|
let mut expected_ids = vec![parquet_file.id, level1_file.id];
|
||||||
|
expected_ids.sort();
|
||||||
|
assert_eq!(file_ids, expected_ids);
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn test_update_to_compaction_level_1(catalog: Arc<dyn Catalog>) {
|
async fn test_update_to_compaction_level_1(catalog: Arc<dyn Catalog>) {
|
||||||
|
|
|
@ -396,8 +396,7 @@ pub mod test_helpers {
|
||||||
ParquetFileParams {
|
ParquetFileParams {
|
||||||
namespace_id: namespace.id,
|
namespace_id: namespace.id,
|
||||||
table_id: table.id,
|
table_id: table.id,
|
||||||
partition_id: partition.id,
|
partition_id: partition.transition_partition_id(),
|
||||||
partition_hash_id: partition.hash_id().cloned(),
|
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
min_time: Timestamp::new(1),
|
min_time: Timestamp::new(1),
|
||||||
max_time: Timestamp::new(10),
|
max_time: Timestamp::new(10),
|
||||||
|
|
|
@ -887,14 +887,28 @@ impl ParquetFileRepo for MemTxn {
|
||||||
) -> Result<Vec<ParquetFile>> {
|
) -> Result<Vec<ParquetFile>> {
|
||||||
let stage = self.stage();
|
let stage = self.stage();
|
||||||
|
|
||||||
|
let partition = stage
|
||||||
|
.partitions
|
||||||
|
.iter()
|
||||||
|
.find(|p| match partition_id {
|
||||||
|
TransitionPartitionId::Deterministic(hash_id) => p
|
||||||
|
.hash_id()
|
||||||
|
.map(|p_hash_id| p_hash_id == hash_id)
|
||||||
|
.unwrap_or(false),
|
||||||
|
TransitionPartitionId::Deprecated(id) => id == &p.id,
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
|
||||||
Ok(stage
|
Ok(stage
|
||||||
.parquet_files
|
.parquet_files
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|f| match partition_id {
|
.filter(|f| match &f.partition_id {
|
||||||
TransitionPartitionId::Deterministic(hash_id) => {
|
TransitionPartitionId::Deterministic(hash_id) => partition
|
||||||
f.partition_hash_id.as_ref().map_or(false, |h| h == hash_id)
|
.hash_id()
|
||||||
}
|
.map(|p_hash_id| p_hash_id == hash_id)
|
||||||
TransitionPartitionId::Deprecated(id) => f.partition_id == *id,
|
.unwrap_or(false),
|
||||||
|
TransitionPartitionId::Deprecated(id) => id == &partition.id,
|
||||||
})
|
})
|
||||||
.filter(|f| f.to_delete.is_none())
|
.filter(|f| f.to_delete.is_none())
|
||||||
.cloned()
|
.cloned()
|
||||||
|
@ -996,17 +1010,15 @@ async fn create_parquet_file(
|
||||||
ParquetFileId::new(stage.parquet_files.len() as i64 + 1),
|
ParquetFileId::new(stage.parquet_files.len() as i64 + 1),
|
||||||
);
|
);
|
||||||
let created_at = parquet_file.created_at;
|
let created_at = parquet_file.created_at;
|
||||||
let partition_id = parquet_file.partition_id;
|
let partition_id = parquet_file.partition_id.clone();
|
||||||
stage.parquet_files.push(parquet_file);
|
stage.parquet_files.push(parquet_file);
|
||||||
|
|
||||||
// Update the new_file_at field its partition to the time of created_at
|
// Update the new_file_at field its partition to the time of created_at
|
||||||
let partition = stage
|
let partition = stage
|
||||||
.partitions
|
.partitions
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.find(|p| p.id == partition_id)
|
.find(|p| p.transition_partition_id() == partition_id)
|
||||||
.ok_or(Error::PartitionNotFound {
|
.ok_or(Error::PartitionNotFound { id: partition_id })?;
|
||||||
id: TransitionPartitionId::Deprecated(partition_id),
|
|
||||||
})?;
|
|
||||||
partition.new_file_at = Some(created_at);
|
partition.new_file_at = Some(created_at);
|
||||||
|
|
||||||
Ok(stage.parquet_files.last().unwrap().clone())
|
Ok(stage.parquet_files.last().unwrap().clone())
|
||||||
|
|
|
@ -1627,22 +1627,26 @@ RETURNING id;
|
||||||
let query = match partition_id {
|
let query = match partition_id {
|
||||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFile>(
|
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFile>(
|
||||||
r#"
|
r#"
|
||||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||||
max_l0_created_at
|
compaction_level, created_at, column_set, max_l0_created_at
|
||||||
FROM parquet_file
|
FROM parquet_file
|
||||||
WHERE parquet_file.partition_hash_id = $1
|
INNER JOIN partition
|
||||||
|
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||||
|
WHERE partition.hash_id = $1
|
||||||
AND parquet_file.to_delete IS NULL;
|
AND parquet_file.to_delete IS NULL;
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(hash_id), // $1
|
.bind(hash_id), // $1
|
||||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFile>(
|
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFile>(
|
||||||
r#"
|
r#"
|
||||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||||
max_l0_created_at
|
compaction_level, created_at, column_set, max_l0_created_at
|
||||||
FROM parquet_file
|
FROM parquet_file
|
||||||
WHERE parquet_file.partition_id = $1
|
INNER JOIN partition
|
||||||
|
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||||
|
WHERE partition.id = $1
|
||||||
AND parquet_file.to_delete IS NULL;
|
AND parquet_file.to_delete IS NULL;
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
|
@ -1754,7 +1758,6 @@ where
|
||||||
namespace_id,
|
namespace_id,
|
||||||
table_id,
|
table_id,
|
||||||
partition_id,
|
partition_id,
|
||||||
partition_hash_id,
|
|
||||||
object_store_id,
|
object_store_id,
|
||||||
min_time,
|
min_time,
|
||||||
max_time,
|
max_time,
|
||||||
|
@ -1766,6 +1769,11 @@ where
|
||||||
max_l0_created_at,
|
max_l0_created_at,
|
||||||
} = parquet_file_params;
|
} = parquet_file_params;
|
||||||
|
|
||||||
|
let (partition_id, partition_hash_id) = match partition_id {
|
||||||
|
TransitionPartitionId::Deterministic(hash_id) => (None, Some(hash_id)),
|
||||||
|
TransitionPartitionId::Deprecated(id) => (Some(id), None),
|
||||||
|
};
|
||||||
|
|
||||||
let partition_hash_id_ref = &partition_hash_id.as_ref();
|
let partition_hash_id_ref = &partition_hash_id.as_ref();
|
||||||
let query = sqlx::query_scalar::<_, ParquetFileId>(
|
let query = sqlx::query_scalar::<_, ParquetFileId>(
|
||||||
r#"
|
r#"
|
||||||
|
@ -2203,7 +2211,10 @@ RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
||||||
.create(parquet_file_params)
|
.create(parquet_file_params)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(parquet_file.partition_hash_id.is_none());
|
assert_matches!(
|
||||||
|
parquet_file.partition_id,
|
||||||
|
TransitionPartitionId::Deprecated(_)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -1221,8 +1221,8 @@ struct ParquetFilePod {
|
||||||
id: ParquetFileId,
|
id: ParquetFileId,
|
||||||
namespace_id: NamespaceId,
|
namespace_id: NamespaceId,
|
||||||
table_id: TableId,
|
table_id: TableId,
|
||||||
partition_id: PartitionId,
|
#[sqlx(flatten)]
|
||||||
partition_hash_id: Option<PartitionHashId>,
|
partition_id: TransitionPartitionId,
|
||||||
object_store_id: Uuid,
|
object_store_id: Uuid,
|
||||||
min_time: Timestamp,
|
min_time: Timestamp,
|
||||||
max_time: Timestamp,
|
max_time: Timestamp,
|
||||||
|
@ -1242,7 +1242,6 @@ impl From<ParquetFilePod> for ParquetFile {
|
||||||
namespace_id: value.namespace_id,
|
namespace_id: value.namespace_id,
|
||||||
table_id: value.table_id,
|
table_id: value.table_id,
|
||||||
partition_id: value.partition_id,
|
partition_id: value.partition_id,
|
||||||
partition_hash_id: value.partition_hash_id,
|
|
||||||
object_store_id: value.object_store_id,
|
object_store_id: value.object_store_id,
|
||||||
min_time: value.min_time,
|
min_time: value.min_time,
|
||||||
max_time: value.max_time,
|
max_time: value.max_time,
|
||||||
|
@ -1395,22 +1394,26 @@ RETURNING id;
|
||||||
let query = match partition_id {
|
let query = match partition_id {
|
||||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFilePod>(
|
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFilePod>(
|
||||||
r#"
|
r#"
|
||||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||||
max_l0_created_at
|
compaction_level, created_at, column_set, max_l0_created_at
|
||||||
FROM parquet_file
|
FROM parquet_file
|
||||||
WHERE parquet_file.partition_hash_id = $1
|
INNER JOIN partition
|
||||||
|
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||||
|
WHERE partition.hash_id = $1
|
||||||
AND parquet_file.to_delete IS NULL;
|
AND parquet_file.to_delete IS NULL;
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(hash_id), // $1
|
.bind(hash_id), // $1
|
||||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFilePod>(
|
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFilePod>(
|
||||||
r#"
|
r#"
|
||||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||||
max_l0_created_at
|
compaction_level, created_at, column_set, max_l0_created_at
|
||||||
FROM parquet_file
|
FROM parquet_file
|
||||||
WHERE parquet_file.partition_id = $1
|
INNER JOIN partition
|
||||||
|
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||||
|
WHERE partition.id = $1
|
||||||
AND parquet_file.to_delete IS NULL;
|
AND parquet_file.to_delete IS NULL;
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
|
@ -1533,7 +1536,6 @@ where
|
||||||
namespace_id,
|
namespace_id,
|
||||||
table_id,
|
table_id,
|
||||||
partition_id,
|
partition_id,
|
||||||
partition_hash_id,
|
|
||||||
object_store_id,
|
object_store_id,
|
||||||
min_time,
|
min_time,
|
||||||
max_time,
|
max_time,
|
||||||
|
@ -1545,7 +1547,10 @@ where
|
||||||
max_l0_created_at,
|
max_l0_created_at,
|
||||||
} = parquet_file_params;
|
} = parquet_file_params;
|
||||||
|
|
||||||
let partition_hash_id_ref = &partition_hash_id.as_ref();
|
let (partition_id, partition_hash_id) = match partition_id {
|
||||||
|
TransitionPartitionId::Deterministic(hash_id) => (None, Some(hash_id)),
|
||||||
|
TransitionPartitionId::Deprecated(id) => (Some(id), None),
|
||||||
|
};
|
||||||
let res = sqlx::query_as::<_, ParquetFilePod>(
|
let res = sqlx::query_as::<_, ParquetFilePod>(
|
||||||
r#"
|
r#"
|
||||||
INSERT INTO parquet_file (
|
INSERT INTO parquet_file (
|
||||||
|
@ -1562,7 +1567,7 @@ RETURNING
|
||||||
.bind(TRANSITION_SHARD_ID) // $1
|
.bind(TRANSITION_SHARD_ID) // $1
|
||||||
.bind(table_id) // $2
|
.bind(table_id) // $2
|
||||||
.bind(partition_id) // $3
|
.bind(partition_id) // $3
|
||||||
.bind(partition_hash_id_ref) // $4
|
.bind(partition_hash_id.as_ref()) // $4
|
||||||
.bind(object_store_id) // $5
|
.bind(object_store_id) // $5
|
||||||
.bind(min_time) // $6
|
.bind(min_time) // $6
|
||||||
.bind(max_time) // $7
|
.bind(max_time) // $7
|
||||||
|
@ -1811,7 +1816,10 @@ RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
||||||
.create(parquet_file_params)
|
.create(parquet_file_params)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(parquet_file.partition_hash_id.is_none());
|
assert_matches!(
|
||||||
|
parquet_file.partition_id,
|
||||||
|
TransitionPartitionId::Deprecated(_)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! test_column_create_or_get_many_unchecked {
|
macro_rules! test_column_create_or_get_many_unchecked {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use data_types::{
|
use data_types::{
|
||||||
ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileId, Partition,
|
ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileId, Partition,
|
||||||
PartitionHashId, PartitionId, PartitionKey, SkippedCompaction, Table, TableId, Timestamp,
|
PartitionHashId, PartitionId, PartitionKey, SkippedCompaction, Table, TableId, Timestamp,
|
||||||
|
TransitionPartitionId,
|
||||||
};
|
};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
@ -20,8 +21,7 @@ impl ParquetFileBuilder {
|
||||||
id: ParquetFileId::new(id),
|
id: ParquetFileId::new(id),
|
||||||
namespace_id: NamespaceId::new(0),
|
namespace_id: NamespaceId::new(0),
|
||||||
table_id,
|
table_id,
|
||||||
partition_id: PartitionId::new(0),
|
partition_id: TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||||
partition_hash_id: Some(PartitionHashId::new(
|
|
||||||
table_id,
|
table_id,
|
||||||
&PartitionKey::from("arbitrary"),
|
&PartitionKey::from("arbitrary"),
|
||||||
)),
|
)),
|
||||||
|
@ -39,11 +39,11 @@ impl ParquetFileBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the partition id
|
/// Set the partition identifier
|
||||||
pub fn with_partition(self, id: i64) -> Self {
|
pub fn with_partition(self, partition_id: TransitionPartitionId) -> Self {
|
||||||
Self {
|
Self {
|
||||||
file: ParquetFile {
|
file: ParquetFile {
|
||||||
partition_id: PartitionId::new(id),
|
partition_id,
|
||||||
..self.file
|
..self.file
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -602,8 +602,7 @@ impl TestPartition {
|
||||||
let parquet_file_params = ParquetFileParams {
|
let parquet_file_params = ParquetFileParams {
|
||||||
namespace_id: self.namespace.namespace.id,
|
namespace_id: self.namespace.namespace.id,
|
||||||
table_id: self.table.table.id,
|
table_id: self.table.table.id,
|
||||||
partition_id: self.partition.id,
|
partition_id: self.partition.transition_partition_id(),
|
||||||
partition_hash_id: self.partition.hash_id().cloned(),
|
|
||||||
object_store_id: object_store_id.unwrap_or_else(Uuid::new_v4),
|
object_store_id: object_store_id.unwrap_or_else(Uuid::new_v4),
|
||||||
min_time: Timestamp::new(min_time),
|
min_time: Timestamp::new(min_time),
|
||||||
max_time: Timestamp::new(max_time),
|
max_time: Timestamp::new(max_time),
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
// Workaround for "unused crate" lint false positives.
|
// Workaround for "unused crate" lint false positives.
|
||||||
use workspace_hack as _;
|
use workspace_hack as _;
|
||||||
|
|
||||||
|
use data_types::{PartitionHashId, PartitionKey, TableId, TransitionPartitionId};
|
||||||
|
|
||||||
mod catalog;
|
mod catalog;
|
||||||
pub use catalog::{
|
pub use catalog::{
|
||||||
TestCatalog, TestNamespace, TestParquetFile, TestParquetFileBuilder, TestPartition, TestTable,
|
TestCatalog, TestNamespace, TestParquetFile, TestParquetFileBuilder, TestPartition, TestTable,
|
||||||
|
@ -24,3 +26,14 @@ pub use catalog::{
|
||||||
|
|
||||||
mod builders;
|
mod builders;
|
||||||
pub use builders::{ParquetFileBuilder, PartitionBuilder, SkippedCompactionBuilder, TableBuilder};
|
pub use builders::{ParquetFileBuilder, PartitionBuilder, SkippedCompactionBuilder, TableBuilder};
|
||||||
|
|
||||||
|
/// Create a partition identifier from an int (which gets used as the table ID) and a partition key
|
||||||
|
/// with the string "arbitrary". Most useful in cases where there isn't any actual catalog
|
||||||
|
/// interaction (that is, in mocks) and when the important property of the partition identifiers is
|
||||||
|
/// that they're either the same or different than other partition identifiers.
|
||||||
|
pub fn partition_identifier(table_id: i64) -> TransitionPartitionId {
|
||||||
|
TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||||
|
TableId::new(table_id),
|
||||||
|
&PartitionKey::from("arbitrary"),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ use generated_types::influxdata::iox::{
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use hyper::{Body, Request, Response};
|
use hyper::{Body, Request, Response};
|
||||||
use ingester::{IngesterGuard, IngesterRpcInterface};
|
use ingester::{GossipConfig, IngesterGuard, IngesterRpcInterface};
|
||||||
use iox_catalog::interface::Catalog;
|
use iox_catalog::interface::Catalog;
|
||||||
use iox_query::exec::Executor;
|
use iox_query::exec::Executor;
|
||||||
use ioxd_common::{
|
use ioxd_common::{
|
||||||
|
@ -210,6 +210,14 @@ pub async fn create_ingester_server_type(
|
||||||
) -> Result<Arc<dyn ServerType>> {
|
) -> Result<Arc<dyn ServerType>> {
|
||||||
let (shutdown_tx, shutdown_rx) = oneshot::channel();
|
let (shutdown_tx, shutdown_rx) = oneshot::channel();
|
||||||
|
|
||||||
|
let gossip = match ingester_config.gossip_config.gossip_bind_address {
|
||||||
|
None => GossipConfig::Disabled,
|
||||||
|
Some(v) => GossipConfig::Enabled {
|
||||||
|
bind_addr: v.into(),
|
||||||
|
peers: ingester_config.gossip_config.seed_list.clone(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
let grpc = ingester::new(
|
let grpc = ingester::new(
|
||||||
catalog,
|
catalog,
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
|
@ -221,6 +229,7 @@ pub async fn create_ingester_server_type(
|
||||||
ingester_config.persist_queue_depth,
|
ingester_config.persist_queue_depth,
|
||||||
ingester_config.persist_hot_partition_cost,
|
ingester_config.persist_hot_partition_cost,
|
||||||
object_store,
|
object_store,
|
||||||
|
gossip,
|
||||||
shutdown_rx.map(|v| v.expect("shutdown sender dropped without calling shutdown")),
|
shutdown_rx.map(|v| v.expect("shutdown sender dropped without calling shutdown")),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
|
@ -10,6 +10,7 @@ async-trait = "0.1"
|
||||||
authz = { path = "../authz" }
|
authz = { path = "../authz" }
|
||||||
clap_blocks = { path = "../clap_blocks" }
|
clap_blocks = { path = "../clap_blocks" }
|
||||||
data_types = { path = "../data_types" }
|
data_types = { path = "../data_types" }
|
||||||
|
gossip = { version = "0.1.0", path = "../gossip" }
|
||||||
hashbrown = { workspace = true }
|
hashbrown = { workspace = true }
|
||||||
hyper = "0.14"
|
hyper = "0.14"
|
||||||
iox_catalog = { path = "../iox_catalog" }
|
iox_catalog = { path = "../iox_catalog" }
|
||||||
|
|
|
@ -10,7 +10,9 @@
|
||||||
missing_debug_implementations,
|
missing_debug_implementations,
|
||||||
unused_crate_dependencies
|
unused_crate_dependencies
|
||||||
)]
|
)]
|
||||||
|
#![allow(clippy::default_constructed_unit_structs)]
|
||||||
|
|
||||||
|
use gossip::NopDispatcher;
|
||||||
// Workaround for "unused crate" lint false positives.
|
// Workaround for "unused crate" lint false positives.
|
||||||
use workspace_hack as _;
|
use workspace_hack as _;
|
||||||
|
|
||||||
|
@ -21,7 +23,7 @@ use std::{
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use authz::{Authorizer, AuthorizerInstrumentation, IoxAuthorizer};
|
use authz::{Authorizer, AuthorizerInstrumentation, IoxAuthorizer};
|
||||||
use clap_blocks::router::RouterConfig;
|
use clap_blocks::{gossip::GossipConfig, router::RouterConfig};
|
||||||
use data_types::NamespaceName;
|
use data_types::NamespaceName;
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use hyper::{Body, Request, Response};
|
use hyper::{Body, Request, Response};
|
||||||
|
@ -86,6 +88,10 @@ pub enum Error {
|
||||||
source: Box<dyn std::error::Error>,
|
source: Box<dyn std::error::Error>,
|
||||||
addr: String,
|
addr: String,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// An error binding the UDP socket for gossip communication.
|
||||||
|
#[error("failed to bind udp gossip socket: {0}")]
|
||||||
|
GossipBind(std::io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||||
|
@ -218,6 +224,7 @@ pub async fn create_router_server_type(
|
||||||
catalog: Arc<dyn Catalog>,
|
catalog: Arc<dyn Catalog>,
|
||||||
object_store: Arc<DynObjectStore>,
|
object_store: Arc<DynObjectStore>,
|
||||||
router_config: &RouterConfig,
|
router_config: &RouterConfig,
|
||||||
|
gossip_config: &GossipConfig,
|
||||||
trace_context_header_name: String,
|
trace_context_header_name: String,
|
||||||
) -> Result<Arc<dyn ServerType>> {
|
) -> Result<Arc<dyn ServerType>> {
|
||||||
let ingester_connections = router_config.ingester_addresses.iter().map(|addr| {
|
let ingester_connections = router_config.ingester_addresses.iter().map(|addr| {
|
||||||
|
@ -333,6 +340,28 @@ pub async fn create_router_server_type(
|
||||||
// Record the overall request handling latency
|
// Record the overall request handling latency
|
||||||
let handler_stack = InstrumentationDecorator::new("request", &metrics, handler_stack);
|
let handler_stack = InstrumentationDecorator::new("request", &metrics, handler_stack);
|
||||||
|
|
||||||
|
// Optionally initialised the gossip subsystem.
|
||||||
|
//
|
||||||
|
// NOTE: the handle is completely unused, but needs to live as long as the
|
||||||
|
// server does to do anything useful (RAII), so it is placed int he
|
||||||
|
// RpcWriteRouterServer, which doesn't need it at all.
|
||||||
|
//
|
||||||
|
// TODO: remove handle from RpcWriteRouterServer when using handle
|
||||||
|
let gossip_handle = match gossip_config.gossip_bind_address {
|
||||||
|
Some(bind_addr) => {
|
||||||
|
let handle = gossip::Builder::new(
|
||||||
|
gossip_config.seed_list.clone(),
|
||||||
|
NopDispatcher::default(),
|
||||||
|
Arc::clone(&metrics),
|
||||||
|
)
|
||||||
|
.bind(*bind_addr)
|
||||||
|
.await
|
||||||
|
.map_err(Error::GossipBind)?;
|
||||||
|
Some(handle)
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
// Initialize the HTTP API delegate
|
// Initialize the HTTP API delegate
|
||||||
let write_request_unifier: Result<Box<dyn WriteRequestUnifier>> = match (
|
let write_request_unifier: Result<Box<dyn WriteRequestUnifier>> = match (
|
||||||
router_config.single_tenant_deployment,
|
router_config.single_tenant_deployment,
|
||||||
|
@ -379,8 +408,13 @@ pub async fn create_router_server_type(
|
||||||
// `RpcWriteRouterServerType`.
|
// `RpcWriteRouterServerType`.
|
||||||
let grpc = RpcWriteGrpcDelegate::new(catalog, object_store);
|
let grpc = RpcWriteGrpcDelegate::new(catalog, object_store);
|
||||||
|
|
||||||
let router_server =
|
let router_server = RpcWriteRouterServer::new(
|
||||||
RpcWriteRouterServer::new(http, grpc, metrics, common_state.trace_collector());
|
http,
|
||||||
|
grpc,
|
||||||
|
metrics,
|
||||||
|
common_state.trace_collector(),
|
||||||
|
gossip_handle,
|
||||||
|
);
|
||||||
let server_type = Arc::new(RpcWriteRouterServerType::new(router_server, common_state));
|
let server_type = Arc::new(RpcWriteRouterServerType::new(router_server, common_state));
|
||||||
Ok(server_type)
|
Ok(server_type)
|
||||||
}
|
}
|
||||||
|
|
|
@ -108,7 +108,7 @@ impl From<&ParquetFile> for ParquetFilePath {
|
||||||
Self {
|
Self {
|
||||||
namespace_id: f.namespace_id,
|
namespace_id: f.namespace_id,
|
||||||
table_id: f.table_id,
|
table_id: f.table_id,
|
||||||
partition_id: f.transition_partition_id(),
|
partition_id: f.partition_id.clone(),
|
||||||
object_store_id: f.object_store_id,
|
object_store_id: f.object_store_id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -119,7 +119,7 @@ impl From<&ParquetFileParams> for ParquetFilePath {
|
||||||
Self {
|
Self {
|
||||||
namespace_id: f.namespace_id,
|
namespace_id: f.namespace_id,
|
||||||
table_id: f.table_id,
|
table_id: f.table_id,
|
||||||
partition_id: f.transition_partition_id(),
|
partition_id: f.partition_id.clone(),
|
||||||
object_store_id: f.object_store_id,
|
object_store_id: f.object_store_id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,7 +91,7 @@ use bytes::Bytes;
|
||||||
use data_types::{
|
use data_types::{
|
||||||
ColumnId, ColumnSet, ColumnSummary, CompactionLevel, InfluxDbType, NamespaceId,
|
ColumnId, ColumnSet, ColumnSummary, CompactionLevel, InfluxDbType, NamespaceId,
|
||||||
ParquetFileParams, PartitionHashId, PartitionId, PartitionKey, StatValues, Statistics, TableId,
|
ParquetFileParams, PartitionHashId, PartitionId, PartitionKey, StatValues, Statistics, TableId,
|
||||||
Timestamp,
|
Timestamp, TransitionPartitionId,
|
||||||
};
|
};
|
||||||
use generated_types::influxdata::iox::ingester::v1 as proto;
|
use generated_types::influxdata::iox::ingester::v1 as proto;
|
||||||
use iox_time::Time;
|
use iox_time::Time;
|
||||||
|
@ -443,6 +443,7 @@ impl IoxMetadata {
|
||||||
where
|
where
|
||||||
F: for<'a> Fn(&'a str) -> ColumnId,
|
F: for<'a> Fn(&'a str) -> ColumnId,
|
||||||
{
|
{
|
||||||
|
let partition_id = TransitionPartitionId::from((partition_id, partition_hash_id.as_ref()));
|
||||||
let decoded = metadata.decode().expect("invalid IOx metadata");
|
let decoded = metadata.decode().expect("invalid IOx metadata");
|
||||||
trace!(
|
trace!(
|
||||||
?partition_id,
|
?partition_id,
|
||||||
|
@ -487,7 +488,6 @@ impl IoxMetadata {
|
||||||
namespace_id: self.namespace_id,
|
namespace_id: self.namespace_id,
|
||||||
table_id: self.table_id,
|
table_id: self.table_id,
|
||||||
partition_id,
|
partition_id,
|
||||||
partition_hash_id,
|
|
||||||
object_store_id: self.object_store_id,
|
object_store_id: self.object_store_id,
|
||||||
min_time,
|
min_time,
|
||||||
max_time,
|
max_time,
|
||||||
|
|
|
@ -113,11 +113,13 @@ impl CatalogCache {
|
||||||
"ram_metadata",
|
"ram_metadata",
|
||||||
RamSize(ram_pool_metadata_bytes),
|
RamSize(ram_pool_metadata_bytes),
|
||||||
Arc::clone(&metric_registry),
|
Arc::clone(&metric_registry),
|
||||||
|
&Handle::current(),
|
||||||
));
|
));
|
||||||
let ram_pool_data = Arc::new(ResourcePool::new(
|
let ram_pool_data = Arc::new(ResourcePool::new(
|
||||||
"ram_data",
|
"ram_data",
|
||||||
RamSize(ram_pool_data_bytes),
|
RamSize(ram_pool_data_bytes),
|
||||||
Arc::clone(&metric_registry),
|
Arc::clone(&metric_registry),
|
||||||
|
&Handle::current(),
|
||||||
));
|
));
|
||||||
|
|
||||||
let partition_cache = PartitionCache::new(
|
let partition_cache = PartitionCache::new(
|
||||||
|
|
|
@ -361,8 +361,8 @@ mod tests {
|
||||||
partition.create_parquet_file(builder).await;
|
partition.create_parquet_file(builder).await;
|
||||||
let table_id = table.table.id;
|
let table_id = table.table.id;
|
||||||
|
|
||||||
let single_file_size = 240;
|
let single_file_size = 256;
|
||||||
let two_file_size = 448;
|
let two_file_size = 480;
|
||||||
assert!(single_file_size < two_file_size);
|
assert!(single_file_size < two_file_size);
|
||||||
|
|
||||||
let cache = make_cache(&catalog);
|
let cache = make_cache(&catalog);
|
||||||
|
|
|
@ -17,7 +17,7 @@ use cache_system::{
|
||||||
};
|
};
|
||||||
use data_types::{
|
use data_types::{
|
||||||
partition_template::{build_column_values, ColumnValue},
|
partition_template::{build_column_values, ColumnValue},
|
||||||
ColumnId, Partition, PartitionId, TransitionPartitionId,
|
ColumnId, Partition, TransitionPartitionId,
|
||||||
};
|
};
|
||||||
use datafusion::scalar::ScalarValue;
|
use datafusion::scalar::ScalarValue;
|
||||||
use iox_catalog::{interface::Catalog, partition_lookup_batch};
|
use iox_catalog::{interface::Catalog, partition_lookup_batch};
|
||||||
|
@ -38,7 +38,7 @@ const CACHE_ID: &str = "partition";
|
||||||
|
|
||||||
type CacheT = Box<
|
type CacheT = Box<
|
||||||
dyn Cache<
|
dyn Cache<
|
||||||
K = PartitionId,
|
K = TransitionPartitionId,
|
||||||
V = Option<CachedPartition>,
|
V = Option<CachedPartition>,
|
||||||
GetExtra = (Arc<CachedTable>, Option<Span>),
|
GetExtra = (Arc<CachedTable>, Option<Span>),
|
||||||
PeekExtra = ((), Option<Span>),
|
PeekExtra = ((), Option<Span>),
|
||||||
|
@ -49,7 +49,7 @@ type CacheT = Box<
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct PartitionCache {
|
pub struct PartitionCache {
|
||||||
cache: CacheT,
|
cache: CacheT,
|
||||||
remove_if_handle: RemoveIfHandle<PartitionId, Option<CachedPartition>>,
|
remove_if_handle: RemoveIfHandle<TransitionPartitionId, Option<CachedPartition>>,
|
||||||
flusher: Arc<dyn BatchLoaderFlusher>,
|
flusher: Arc<dyn BatchLoaderFlusher>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,7 +64,8 @@ impl PartitionCache {
|
||||||
testing: bool,
|
testing: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let loader = FunctionLoader::new(
|
let loader = FunctionLoader::new(
|
||||||
move |partition_ids: Vec<PartitionId>, cached_tables: Vec<Arc<CachedTable>>| {
|
move |partition_ids: Vec<TransitionPartitionId>,
|
||||||
|
cached_tables: Vec<Arc<CachedTable>>| {
|
||||||
// sanity checks
|
// sanity checks
|
||||||
assert_eq!(partition_ids.len(), cached_tables.len());
|
assert_eq!(partition_ids.len(), cached_tables.len());
|
||||||
|
|
||||||
|
@ -75,23 +76,20 @@ impl PartitionCache {
|
||||||
// prepare output buffer
|
// prepare output buffer
|
||||||
let mut out = (0..partition_ids.len()).map(|_| None).collect::<Vec<_>>();
|
let mut out = (0..partition_ids.len()).map(|_| None).collect::<Vec<_>>();
|
||||||
let mut out_map =
|
let mut out_map =
|
||||||
HashMap::<PartitionId, usize>::with_capacity(partition_ids.len());
|
HashMap::<TransitionPartitionId, usize>::with_capacity(partition_ids.len());
|
||||||
for (idx, id) in partition_ids.iter().enumerate() {
|
for (idx, id) in partition_ids.iter().enumerate() {
|
||||||
match out_map.entry(*id) {
|
match out_map.entry(id.clone()) {
|
||||||
Entry::Occupied(_) => unreachable!("cache system requested same partition from loader concurrently, this should have been prevented by the CacheDriver"),
|
Entry::Occupied(_) => unreachable!(
|
||||||
|
"cache system requested same partition from loader concurrently, \
|
||||||
|
this should have been prevented by the CacheDriver"
|
||||||
|
),
|
||||||
Entry::Vacant(v) => {
|
Entry::Vacant(v) => {
|
||||||
v.insert(idx);
|
v.insert(idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// build `&[&TransitionPartitionId]` for batch catalog request
|
let ids: Vec<&TransitionPartitionId> = partition_ids.iter().collect();
|
||||||
let ids = partition_ids
|
|
||||||
.iter()
|
|
||||||
.copied()
|
|
||||||
.map(TransitionPartitionId::Deprecated)
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let ids = ids.iter().collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// fetch catalog data
|
// fetch catalog data
|
||||||
let partitions = Backoff::new(&backoff_config)
|
let partitions = Backoff::new(&backoff_config)
|
||||||
|
@ -104,7 +102,7 @@ impl PartitionCache {
|
||||||
|
|
||||||
// build output
|
// build output
|
||||||
for p in partitions {
|
for p in partitions {
|
||||||
let idx = out_map[&p.id];
|
let idx = out_map[&p.transition_partition_id()];
|
||||||
let cached_table = &cached_tables[idx];
|
let cached_table = &cached_tables[idx];
|
||||||
let p = CachedPartition::new(p, cached_table);
|
let p = CachedPartition::new(p, cached_table);
|
||||||
out[idx] = Some(p);
|
out[idx] = Some(p);
|
||||||
|
@ -180,7 +178,7 @@ impl PartitionCache {
|
||||||
|
|
||||||
self.remove_if_handle.remove_if_and_get(
|
self.remove_if_handle.remove_if_and_get(
|
||||||
&self.cache,
|
&self.cache,
|
||||||
partition_id,
|
partition_id.clone(),
|
||||||
move |cached_partition| {
|
move |cached_partition| {
|
||||||
let invalidates = if let Some(sort_key) =
|
let invalidates = if let Some(sort_key) =
|
||||||
&cached_partition.and_then(|p| p.sort_key)
|
&cached_partition.and_then(|p| p.sort_key)
|
||||||
|
@ -195,7 +193,7 @@ impl PartitionCache {
|
||||||
|
|
||||||
if invalidates {
|
if invalidates {
|
||||||
debug!(
|
debug!(
|
||||||
partition_id = partition_id.get(),
|
partition_id = %partition_id,
|
||||||
"invalidate partition cache",
|
"invalidate partition cache",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -217,13 +215,13 @@ impl PartitionCache {
|
||||||
/// Request for [`PartitionCache::get`].
|
/// Request for [`PartitionCache::get`].
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct PartitionRequest {
|
pub struct PartitionRequest {
|
||||||
pub partition_id: PartitionId,
|
pub partition_id: TransitionPartitionId,
|
||||||
pub sort_key_should_cover: Vec<ColumnId>,
|
pub sort_key_should_cover: Vec<ColumnId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct CachedPartition {
|
pub struct CachedPartition {
|
||||||
pub id: PartitionId,
|
pub id: TransitionPartitionId,
|
||||||
pub sort_key: Option<Arc<PartitionSortKey>>,
|
pub sort_key: Option<Arc<PartitionSortKey>>,
|
||||||
pub column_ranges: ColumnRanges,
|
pub column_ranges: ColumnRanges,
|
||||||
}
|
}
|
||||||
|
@ -299,7 +297,7 @@ impl CachedPartition {
|
||||||
column_ranges.shrink_to_fit();
|
column_ranges.shrink_to_fit();
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
id: partition.id,
|
id: partition.transition_partition_id(),
|
||||||
sort_key,
|
sort_key,
|
||||||
column_ranges: Arc::new(column_ranges),
|
column_ranges: Arc::new(column_ranges),
|
||||||
}
|
}
|
||||||
|
@ -368,7 +366,10 @@ mod tests {
|
||||||
ram::test_util::test_ram_pool, test_util::assert_catalog_access_metric_count,
|
ram::test_util::test_ram_pool, test_util::assert_catalog_access_metric_count,
|
||||||
};
|
};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use data_types::{partition_template::TablePartitionTemplateOverride, ColumnType};
|
use data_types::{
|
||||||
|
partition_template::TablePartitionTemplateOverride, ColumnType, PartitionHashId,
|
||||||
|
PartitionId, PartitionKey, TableId,
|
||||||
|
};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use generated_types::influxdata::iox::partition_template::v1::{
|
use generated_types::influxdata::iox::partition_template::v1::{
|
||||||
template_part::Part, PartitionTemplate, TemplatePart,
|
template_part::Part, PartitionTemplate, TemplatePart,
|
||||||
|
@ -419,8 +420,11 @@ mod tests {
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let p1_id = p1.transition_partition_id();
|
||||||
|
let p2_id = p2.transition_partition_id();
|
||||||
|
|
||||||
let sort_key1a = cache
|
let sort_key1a = cache
|
||||||
.get_one(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
|
.get_one(Arc::clone(&cached_table), &p1_id, &Vec::new(), None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
|
@ -434,24 +438,24 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
|
|
||||||
let sort_key2 = cache
|
let sort_key2 = cache
|
||||||
.get_one(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
|
.get_one(Arc::clone(&cached_table), &p2_id, &Vec::new(), None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
assert_eq!(sort_key2, None);
|
assert_eq!(sort_key2, None);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
2,
|
2,
|
||||||
);
|
);
|
||||||
|
|
||||||
let sort_key1b = cache
|
let sort_key1b = cache
|
||||||
.get_one(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
|
.get_one(Arc::clone(&cached_table), &p1_id, &Vec::new(), None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
|
@ -461,16 +465,37 @@ mod tests {
|
||||||
));
|
));
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
2,
|
2,
|
||||||
);
|
);
|
||||||
|
|
||||||
// non-existing partition
|
// non-existing partition
|
||||||
for _ in 0..2 {
|
for _ in 0..2 {
|
||||||
|
// Non-existing partition identified by partition hash ID
|
||||||
let res = cache
|
let res = cache
|
||||||
.get_one(
|
.get_one(
|
||||||
Arc::clone(&cached_table),
|
Arc::clone(&cached_table),
|
||||||
PartitionId::new(i64::MAX),
|
&TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||||
|
TableId::new(i64::MAX),
|
||||||
|
&PartitionKey::from("bananas_not_found"),
|
||||||
|
)),
|
||||||
|
&[],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(res, None);
|
||||||
|
assert_catalog_access_metric_count(
|
||||||
|
&catalog.metric_registry,
|
||||||
|
"partition_get_by_hash_id_batch",
|
||||||
|
3,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Non-existing partition identified by deprecated catalog IDs; this part can be
|
||||||
|
// removed when partition identification is fully transitioned to partition hash IDs
|
||||||
|
let res = cache
|
||||||
|
.get_one(
|
||||||
|
Arc::clone(&cached_table),
|
||||||
|
&TransitionPartitionId::Deprecated(PartitionId::new(i64::MAX)),
|
||||||
&Vec::new(),
|
&Vec::new(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
@ -479,7 +504,7 @@ mod tests {
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_id_batch",
|
||||||
3,
|
1,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -548,8 +573,14 @@ mod tests {
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let p1_id = p1.transition_partition_id();
|
||||||
|
let p2_id = p2.transition_partition_id();
|
||||||
|
let p3_id = p3.transition_partition_id();
|
||||||
|
let p4_id = p4.transition_partition_id();
|
||||||
|
let p5_id = p5.transition_partition_id();
|
||||||
|
|
||||||
let ranges1a = cache
|
let ranges1a = cache
|
||||||
.get_one(Arc::clone(&cached_table), p1.id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p1_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.column_ranges;
|
.column_ranges;
|
||||||
|
@ -578,12 +609,12 @@ mod tests {
|
||||||
));
|
));
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
|
|
||||||
let ranges2 = cache
|
let ranges2 = cache
|
||||||
.get_one(Arc::clone(&cached_table), p2.id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p2_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.column_ranges;
|
.column_ranges;
|
||||||
|
@ -599,12 +630,12 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
2,
|
2,
|
||||||
);
|
);
|
||||||
|
|
||||||
let ranges3 = cache
|
let ranges3 = cache
|
||||||
.get_one(Arc::clone(&cached_table), p3.id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p3_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.column_ranges;
|
.column_ranges;
|
||||||
|
@ -629,12 +660,12 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
3,
|
3,
|
||||||
);
|
);
|
||||||
|
|
||||||
let ranges4 = cache
|
let ranges4 = cache
|
||||||
.get_one(Arc::clone(&cached_table), p4.id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p4_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.column_ranges;
|
.column_ranges;
|
||||||
|
@ -659,12 +690,12 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
4,
|
4,
|
||||||
);
|
);
|
||||||
|
|
||||||
let ranges5 = cache
|
let ranges5 = cache
|
||||||
.get_one(Arc::clone(&cached_table), p5.id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p5_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.column_ranges;
|
.column_ranges;
|
||||||
|
@ -680,28 +711,48 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
5,
|
5,
|
||||||
);
|
);
|
||||||
|
|
||||||
let ranges1b = cache
|
let ranges1b = cache
|
||||||
.get_one(Arc::clone(&cached_table), p1.id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p1_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.column_ranges;
|
.column_ranges;
|
||||||
assert!(Arc::ptr_eq(&ranges1a, &ranges1b));
|
assert!(Arc::ptr_eq(&ranges1a, &ranges1b));
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
5,
|
5,
|
||||||
);
|
);
|
||||||
|
|
||||||
// non-existing partition
|
|
||||||
for _ in 0..2 {
|
for _ in 0..2 {
|
||||||
|
// Non-existing partition identified by partition hash ID
|
||||||
let res = cache
|
let res = cache
|
||||||
.get_one(
|
.get_one(
|
||||||
Arc::clone(&cached_table),
|
Arc::clone(&cached_table),
|
||||||
PartitionId::new(i64::MAX),
|
&TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||||
|
TableId::new(i64::MAX),
|
||||||
|
&PartitionKey::from("bananas_not_found"),
|
||||||
|
)),
|
||||||
|
&[],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(res, None);
|
||||||
|
assert_catalog_access_metric_count(
|
||||||
|
&catalog.metric_registry,
|
||||||
|
"partition_get_by_hash_id_batch",
|
||||||
|
6,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Non-existing partition identified by deprecated catalog IDs; this part can be
|
||||||
|
// removed when partition identification is fully transitioned to partition hash IDs
|
||||||
|
let res = cache
|
||||||
|
.get_one(
|
||||||
|
Arc::clone(&cached_table),
|
||||||
|
&TransitionPartitionId::Deprecated(PartitionId::new(i64::MAX)),
|
||||||
&[],
|
&[],
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
@ -710,7 +761,7 @@ mod tests {
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_id_batch",
|
||||||
6,
|
1,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -724,7 +775,7 @@ mod tests {
|
||||||
let c1 = t.create_column("foo", ColumnType::Tag).await;
|
let c1 = t.create_column("foo", ColumnType::Tag).await;
|
||||||
let c2 = t.create_column("time", ColumnType::Time).await;
|
let c2 = t.create_column("time", ColumnType::Time).await;
|
||||||
let p = t.create_partition("k1").await;
|
let p = t.create_partition("k1").await;
|
||||||
let p_id = p.partition.id;
|
let p_id = p.partition.transition_partition_id();
|
||||||
let p_sort_key = p.partition.sort_key();
|
let p_sort_key = p.partition.sort_key();
|
||||||
let cached_table = Arc::new(CachedTable {
|
let cached_table = Arc::new(CachedTable {
|
||||||
id: t.table.id,
|
id: t.table.id,
|
||||||
|
@ -751,41 +802,41 @@ mod tests {
|
||||||
);
|
);
|
||||||
|
|
||||||
let sort_key = cache
|
let sort_key = cache
|
||||||
.get_one(Arc::clone(&cached_table), p_id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
assert_eq!(sort_key, None,);
|
assert_eq!(sort_key, None,);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
|
|
||||||
// requesting nother will not expire
|
// requesting nother will not expire
|
||||||
assert!(p_sort_key.is_none());
|
assert!(p_sort_key.is_none());
|
||||||
let sort_key = cache
|
let sort_key = cache
|
||||||
.get_one(Arc::clone(&cached_table), p_id, &[], None)
|
.get_one(Arc::clone(&cached_table), &p_id, &[], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
assert_eq!(sort_key, None,);
|
assert_eq!(sort_key, None,);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
|
|
||||||
// but requesting something will expire
|
// but requesting something will expire
|
||||||
let sort_key = cache
|
let sort_key = cache
|
||||||
.get_one(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
|
.get_one(Arc::clone(&cached_table), &p_id, &[c1.column.id], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
assert_eq!(sort_key, None,);
|
assert_eq!(sort_key, None,);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
2,
|
2,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -801,7 +852,7 @@ mod tests {
|
||||||
// expire & fetch
|
// expire & fetch
|
||||||
let p_sort_key = p.partition.sort_key();
|
let p_sort_key = p.partition.sort_key();
|
||||||
let sort_key = cache
|
let sort_key = cache
|
||||||
.get_one(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
|
.get_one(Arc::clone(&cached_table), &p_id, &[c1.column.id], None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
|
@ -815,7 +866,7 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
3,
|
3,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -827,7 +878,7 @@ mod tests {
|
||||||
vec![c1.column.id, c2.column.id],
|
vec![c1.column.id, c2.column.id],
|
||||||
] {
|
] {
|
||||||
let sort_key_2 = cache
|
let sort_key_2 = cache
|
||||||
.get_one(Arc::clone(&cached_table), p_id, &should_cover, None)
|
.get_one(Arc::clone(&cached_table), &p_id, &should_cover, None)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.sort_key;
|
.sort_key;
|
||||||
|
@ -837,7 +888,7 @@ mod tests {
|
||||||
));
|
));
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
3,
|
3,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -847,7 +898,7 @@ mod tests {
|
||||||
let sort_key_2 = cache
|
let sort_key_2 = cache
|
||||||
.get_one(
|
.get_one(
|
||||||
Arc::clone(&cached_table),
|
Arc::clone(&cached_table),
|
||||||
p_id,
|
&p_id,
|
||||||
&[c1.column.id, c3.column.id],
|
&[c1.column.id, c3.column.id],
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
@ -861,7 +912,7 @@ mod tests {
|
||||||
assert_eq!(sort_key, sort_key_2);
|
assert_eq!(sort_key, sort_key_2);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
4,
|
4,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -892,34 +943,45 @@ mod tests {
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let p1_id = p1.transition_partition_id();
|
||||||
|
let p2_id = p2.transition_partition_id();
|
||||||
|
|
||||||
let mut res = cache
|
let mut res = cache
|
||||||
.get(
|
.get(
|
||||||
Arc::clone(&cached_table),
|
Arc::clone(&cached_table),
|
||||||
vec![
|
vec![
|
||||||
PartitionRequest {
|
PartitionRequest {
|
||||||
partition_id: p1.id,
|
partition_id: p1_id.clone(),
|
||||||
sort_key_should_cover: vec![],
|
sort_key_should_cover: vec![],
|
||||||
},
|
},
|
||||||
PartitionRequest {
|
PartitionRequest {
|
||||||
partition_id: p2.id,
|
partition_id: p2_id.clone(),
|
||||||
sort_key_should_cover: vec![],
|
sort_key_should_cover: vec![],
|
||||||
},
|
},
|
||||||
PartitionRequest {
|
PartitionRequest {
|
||||||
partition_id: p1.id,
|
partition_id: p1_id.clone(),
|
||||||
|
sort_key_should_cover: vec![],
|
||||||
|
},
|
||||||
|
// requesting non-existing partitions is fine, they just don't appear in
|
||||||
|
// the output
|
||||||
|
PartitionRequest {
|
||||||
|
partition_id: TransitionPartitionId::Deprecated(PartitionId::new(i64::MAX)),
|
||||||
sort_key_should_cover: vec![],
|
sort_key_should_cover: vec![],
|
||||||
},
|
},
|
||||||
PartitionRequest {
|
PartitionRequest {
|
||||||
// requesting non-existing partitions is fine, they just don't appear in the output
|
partition_id: TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||||
partition_id: PartitionId::new(i64::MAX),
|
TableId::new(i64::MAX),
|
||||||
|
&PartitionKey::from("bananas_not_found"),
|
||||||
|
)),
|
||||||
sort_key_should_cover: vec![],
|
sort_key_should_cover: vec![],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
res.sort_by_key(|p| p.id);
|
res.sort_by(|a, b| a.id.cmp(&b.id));
|
||||||
let ids = res.iter().map(|p| p.id).collect::<Vec<_>>();
|
let ids = res.into_iter().map(|p| p.id).collect::<Vec<_>>();
|
||||||
assert_eq!(ids, vec![p1.id, p1.id, p2.id]);
|
assert_eq!(ids, vec![p1_id.clone(), p1_id, p2_id]);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_id_batch",
|
||||||
|
@ -1008,7 +1070,7 @@ mod tests {
|
||||||
c_id: ColumnId,
|
c_id: ColumnId,
|
||||||
|
|
||||||
/// Partitions within that table.
|
/// Partitions within that table.
|
||||||
partitions: Vec<PartitionId>,
|
partitions: Vec<TransitionPartitionId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ConcurrencyTestState {
|
impl ConcurrencyTestState {
|
||||||
|
@ -1032,7 +1094,7 @@ mod tests {
|
||||||
t.create_partition_with_sort_key(&format!("p{i}"), &["time"])
|
t.create_partition_with_sort_key(&format!("p{i}"), &["time"])
|
||||||
.await
|
.await
|
||||||
.partition
|
.partition
|
||||||
.id
|
.transition_partition_id()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
|
@ -1046,7 +1108,8 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform the actual [`PartitionCache::get`] call and run some basic sanity checks on the result.
|
/// Perform the actual [`PartitionCache::get`] call and run some basic sanity checks on the
|
||||||
|
/// result.
|
||||||
async fn run(self, cache: Arc<PartitionCache>) {
|
async fn run(self, cache: Arc<PartitionCache>) {
|
||||||
let Self {
|
let Self {
|
||||||
cached_table,
|
cached_table,
|
||||||
|
@ -1060,15 +1123,15 @@ mod tests {
|
||||||
partitions
|
partitions
|
||||||
.iter()
|
.iter()
|
||||||
.map(|p| PartitionRequest {
|
.map(|p| PartitionRequest {
|
||||||
partition_id: *p,
|
partition_id: p.clone(),
|
||||||
sort_key_should_cover: vec![],
|
sort_key_should_cover: vec![],
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
results.sort_by_key(|p| p.id);
|
results.sort_by(|a, b| a.id.cmp(&b.id));
|
||||||
let partitions_res = results.iter().map(|p| p.id).collect::<Vec<_>>();
|
let partitions_res = results.iter().map(|p| p.id.clone()).collect::<Vec<_>>();
|
||||||
assert_eq!(partitions, partitions_res);
|
assert_eq!(partitions, partitions_res);
|
||||||
assert!(results
|
assert!(results
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -1086,7 +1149,7 @@ mod tests {
|
||||||
async fn get_one(
|
async fn get_one(
|
||||||
&self,
|
&self,
|
||||||
cached_table: Arc<CachedTable>,
|
cached_table: Arc<CachedTable>,
|
||||||
partition_id: PartitionId,
|
partition_id: &TransitionPartitionId,
|
||||||
sort_key_should_cover: &[ColumnId],
|
sort_key_should_cover: &[ColumnId],
|
||||||
span: Option<Span>,
|
span: Option<Span>,
|
||||||
) -> Option<CachedPartition>;
|
) -> Option<CachedPartition>;
|
||||||
|
@ -1097,14 +1160,14 @@ mod tests {
|
||||||
async fn get_one(
|
async fn get_one(
|
||||||
&self,
|
&self,
|
||||||
cached_table: Arc<CachedTable>,
|
cached_table: Arc<CachedTable>,
|
||||||
partition_id: PartitionId,
|
partition_id: &TransitionPartitionId,
|
||||||
sort_key_should_cover: &[ColumnId],
|
sort_key_should_cover: &[ColumnId],
|
||||||
span: Option<Span>,
|
span: Option<Span>,
|
||||||
) -> Option<CachedPartition> {
|
) -> Option<CachedPartition> {
|
||||||
self.get(
|
self.get(
|
||||||
cached_table,
|
cached_table,
|
||||||
vec![PartitionRequest {
|
vec![PartitionRequest {
|
||||||
partition_id,
|
partition_id: partition_id.clone(),
|
||||||
sort_key_should_cover: sort_key_should_cover.to_vec(),
|
sort_key_should_cover: sort_key_should_cover.to_vec(),
|
||||||
}],
|
}],
|
||||||
span,
|
span,
|
||||||
|
|
|
@ -43,12 +43,14 @@ pub mod test_util {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use cache_system::backend::policy::lru::ResourcePool;
|
use cache_system::backend::policy::lru::ResourcePool;
|
||||||
|
use tokio::runtime::Handle;
|
||||||
|
|
||||||
pub fn test_ram_pool() -> Arc<ResourcePool<RamSize>> {
|
pub fn test_ram_pool() -> Arc<ResourcePool<RamSize>> {
|
||||||
Arc::new(ResourcePool::new(
|
Arc::new(ResourcePool::new(
|
||||||
"pool",
|
"pool",
|
||||||
RamSize(usize::MAX),
|
RamSize(usize::MAX),
|
||||||
Arc::new(metric::Registry::new()),
|
Arc::new(metric::Registry::new()),
|
||||||
|
&Handle::current(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -859,10 +859,6 @@ impl IngesterPartition {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partition_id(&self) -> PartitionId {
|
|
||||||
self.partition_id
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn transition_partition_id(&self) -> TransitionPartitionId {
|
pub(crate) fn transition_partition_id(&self) -> TransitionPartitionId {
|
||||||
TransitionPartitionId::from((self.partition_id, self.partition_hash_id.as_ref()))
|
TransitionPartitionId::from((self.partition_id, self.partition_hash_id.as_ref()))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use data_types::{ChunkId, ChunkOrder, ColumnId, ParquetFile, PartitionId, TransitionPartitionId};
|
use data_types::{ChunkId, ChunkOrder, ColumnId, ParquetFile, TransitionPartitionId};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use hashbrown::HashSet;
|
use hashbrown::HashSet;
|
||||||
use iox_catalog::interface::Catalog;
|
use iox_catalog::interface::Catalog;
|
||||||
|
@ -56,7 +56,7 @@ impl ChunkAdapter {
|
||||||
&self,
|
&self,
|
||||||
cached_table: Arc<CachedTable>,
|
cached_table: Arc<CachedTable>,
|
||||||
files: Arc<[Arc<ParquetFile>]>,
|
files: Arc<[Arc<ParquetFile>]>,
|
||||||
cached_partitions: &HashMap<PartitionId, CachedPartition>,
|
cached_partitions: &HashMap<TransitionPartitionId, CachedPartition>,
|
||||||
span: Option<Span>,
|
span: Option<Span>,
|
||||||
) -> Vec<QuerierParquetChunk> {
|
) -> Vec<QuerierParquetChunk> {
|
||||||
let span_recorder = SpanRecorder::new(span);
|
let span_recorder = SpanRecorder::new(span);
|
||||||
|
@ -170,18 +170,13 @@ impl ChunkAdapter {
|
||||||
|
|
||||||
let order = ChunkOrder::new(parquet_file.file.max_l0_created_at.get());
|
let order = ChunkOrder::new(parquet_file.file.max_l0_created_at.get());
|
||||||
|
|
||||||
let partition_id = parquet_file.file.partition_id;
|
let partition_id = parquet_file.file.partition_id.clone();
|
||||||
let transition_partition_id = TransitionPartitionId::from((
|
|
||||||
partition_id,
|
|
||||||
parquet_file.file.partition_hash_id.as_ref(),
|
|
||||||
));
|
|
||||||
|
|
||||||
let meta = Arc::new(QuerierParquetChunkMeta {
|
let meta = Arc::new(QuerierParquetChunkMeta {
|
||||||
chunk_id,
|
chunk_id,
|
||||||
order,
|
order,
|
||||||
sort_key: Some(sort_key),
|
sort_key: Some(sort_key),
|
||||||
partition_id,
|
partition_id,
|
||||||
transition_partition_id,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
let parquet_chunk = Arc::new(ParquetChunk::new(
|
let parquet_chunk = Arc::new(ParquetChunk::new(
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
//! Querier Chunks
|
//! Querier Chunks
|
||||||
|
|
||||||
use data_types::{ChunkId, ChunkOrder, PartitionId, TransitionPartitionId};
|
use data_types::{ChunkId, ChunkOrder, TransitionPartitionId};
|
||||||
use datafusion::physical_plan::Statistics;
|
use datafusion::physical_plan::Statistics;
|
||||||
use iox_query::chunk_statistics::{create_chunk_statistics, ColumnRanges};
|
use iox_query::chunk_statistics::{create_chunk_statistics, ColumnRanges};
|
||||||
use parquet_file::chunk::ParquetChunk;
|
use parquet_file::chunk::ParquetChunk;
|
||||||
|
@ -25,10 +25,7 @@ pub struct QuerierParquetChunkMeta {
|
||||||
sort_key: Option<SortKey>,
|
sort_key: Option<SortKey>,
|
||||||
|
|
||||||
/// Partition ID.
|
/// Partition ID.
|
||||||
partition_id: PartitionId,
|
partition_id: TransitionPartitionId,
|
||||||
|
|
||||||
/// Transition partition ID.
|
|
||||||
transition_partition_id: TransitionPartitionId,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl QuerierParquetChunkMeta {
|
impl QuerierParquetChunkMeta {
|
||||||
|
@ -43,13 +40,8 @@ impl QuerierParquetChunkMeta {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Partition ID.
|
/// Partition ID.
|
||||||
pub fn partition_id(&self) -> PartitionId {
|
pub fn partition_id(&self) -> &TransitionPartitionId {
|
||||||
self.partition_id
|
&self.partition_id
|
||||||
}
|
|
||||||
|
|
||||||
/// Partition ID.
|
|
||||||
pub fn transition_partition_id(&self) -> &TransitionPartitionId {
|
|
||||||
&self.transition_partition_id
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,7 +243,7 @@ pub mod tests {
|
||||||
.get(
|
.get(
|
||||||
Arc::clone(&self.cached_table),
|
Arc::clone(&self.cached_table),
|
||||||
vec![PartitionRequest {
|
vec![PartitionRequest {
|
||||||
partition_id: self.parquet_file.partition_id,
|
partition_id: self.parquet_file.partition_id.clone(),
|
||||||
sort_key_should_cover: vec![],
|
sort_key_should_cover: vec![],
|
||||||
}],
|
}],
|
||||||
None,
|
None,
|
||||||
|
@ -261,7 +253,7 @@ pub mod tests {
|
||||||
.next()
|
.next()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let cached_partitions =
|
let cached_partitions =
|
||||||
HashMap::from([(self.parquet_file.partition_id, cached_partition)]);
|
HashMap::from([(self.parquet_file.partition_id.clone(), cached_partition)]);
|
||||||
self.adapter
|
self.adapter
|
||||||
.new_chunks(
|
.new_chunks(
|
||||||
Arc::clone(&self.cached_table),
|
Arc::clone(&self.cached_table),
|
||||||
|
|
|
@ -15,11 +15,11 @@ impl QueryChunk for QuerierParquetChunk {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn partition_id(&self) -> PartitionId {
|
fn partition_id(&self) -> PartitionId {
|
||||||
self.meta().partition_id()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transition_partition_id(&self) -> &TransitionPartitionId {
|
fn transition_partition_id(&self) -> &TransitionPartitionId {
|
||||||
self.meta().transition_partition_id()
|
self.meta().partition_id()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sort_key(&self) -> Option<&SortKey> {
|
fn sort_key(&self) -> Option<&SortKey> {
|
||||||
|
|
|
@ -8,7 +8,7 @@ use crate::{
|
||||||
parquet::ChunkAdapter,
|
parquet::ChunkAdapter,
|
||||||
IngesterConnection,
|
IngesterConnection,
|
||||||
};
|
};
|
||||||
use data_types::{ColumnId, NamespaceId, ParquetFile, PartitionId, TableId};
|
use data_types::{ColumnId, NamespaceId, ParquetFile, TableId, TransitionPartitionId};
|
||||||
use datafusion::error::DataFusionError;
|
use datafusion::error::DataFusionError;
|
||||||
use futures::join;
|
use futures::join;
|
||||||
use iox_query::{provider, provider::ChunkPruner, QueryChunk};
|
use iox_query::{provider, provider::ChunkPruner, QueryChunk};
|
||||||
|
@ -282,7 +282,7 @@ impl QuerierTable {
|
||||||
let chunks = partitions
|
let chunks = partitions
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|mut c| {
|
.filter_map(|mut c| {
|
||||||
let cached_partition = cached_partitions.get(&c.partition_id())?;
|
let cached_partition = cached_partitions.get(&c.transition_partition_id())?;
|
||||||
c.set_partition_column_ranges(&cached_partition.column_ranges);
|
c.set_partition_column_ranges(&cached_partition.column_ranges);
|
||||||
Some(c)
|
Some(c)
|
||||||
})
|
})
|
||||||
|
@ -322,16 +322,16 @@ impl QuerierTable {
|
||||||
ingester_partitions: &[IngesterPartition],
|
ingester_partitions: &[IngesterPartition],
|
||||||
parquet_files: &[Arc<ParquetFile>],
|
parquet_files: &[Arc<ParquetFile>],
|
||||||
span: Option<Span>,
|
span: Option<Span>,
|
||||||
) -> HashMap<PartitionId, CachedPartition> {
|
) -> HashMap<TransitionPartitionId, CachedPartition> {
|
||||||
let span_recorder = SpanRecorder::new(span);
|
let span_recorder = SpanRecorder::new(span);
|
||||||
|
|
||||||
let mut should_cover: HashMap<PartitionId, HashSet<ColumnId>> =
|
let mut should_cover: HashMap<TransitionPartitionId, HashSet<ColumnId>> =
|
||||||
HashMap::with_capacity(ingester_partitions.len());
|
HashMap::with_capacity(ingester_partitions.len());
|
||||||
|
|
||||||
// For ingester partitions we only need the column ranges -- which are static -- not the sort key. So it is
|
// For ingester partitions we only need the column ranges -- which are static -- not the sort key. So it is
|
||||||
// sufficient to collect the partition IDs.
|
// sufficient to collect the partition IDs.
|
||||||
for p in ingester_partitions {
|
for p in ingester_partitions {
|
||||||
should_cover.entry(p.partition_id()).or_default();
|
should_cover.entry(p.transition_partition_id()).or_default();
|
||||||
}
|
}
|
||||||
|
|
||||||
// For parquet files we must ensure that the -- potentially evolving -- sort key coveres the primary key.
|
// For parquet files we must ensure that the -- potentially evolving -- sort key coveres the primary key.
|
||||||
|
@ -342,7 +342,7 @@ impl QuerierTable {
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
for f in parquet_files {
|
for f in parquet_files {
|
||||||
should_cover
|
should_cover
|
||||||
.entry(f.partition_id)
|
.entry(f.partition_id.clone())
|
||||||
.or_default()
|
.or_default()
|
||||||
.extend(f.column_set.iter().copied().filter(|id| pk.contains(id)));
|
.extend(f.column_set.iter().copied().filter(|id| pk.contains(id)));
|
||||||
}
|
}
|
||||||
|
@ -366,7 +366,7 @@ impl QuerierTable {
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
partitions.into_iter().map(|p| (p.id, p)).collect()
|
partitions.into_iter().map(|p| (p.id.clone(), p)).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a chunk pruner that can be used to prune chunks retrieved via [`chunks`](Self::chunks)
|
/// Get a chunk pruner that can be used to prune chunks retrieved via [`chunks`](Self::chunks)
|
||||||
|
@ -889,7 +889,7 @@ mod tests {
|
||||||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
|
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 2);
|
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 2);
|
||||||
|
@ -899,7 +899,7 @@ mod tests {
|
||||||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
|
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 4);
|
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 4);
|
||||||
|
@ -912,7 +912,7 @@ mod tests {
|
||||||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -922,7 +922,7 @@ mod tests {
|
||||||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 6);
|
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 6);
|
||||||
|
@ -936,7 +936,7 @@ mod tests {
|
||||||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||||
assert_catalog_access_metric_count(
|
assert_catalog_access_metric_count(
|
||||||
&catalog.metric_registry,
|
&catalog.metric_registry,
|
||||||
"partition_get_by_id_batch",
|
"partition_get_by_hash_id_batch",
|
||||||
2,
|
2,
|
||||||
);
|
);
|
||||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 8);
|
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 8);
|
||||||
|
|
|
@ -15,6 +15,7 @@ dml = { path = "../dml" }
|
||||||
flate2 = "1.0"
|
flate2 = "1.0"
|
||||||
futures = "0.3.28"
|
futures = "0.3.28"
|
||||||
generated_types = { path = "../generated_types" }
|
generated_types = { path = "../generated_types" }
|
||||||
|
gossip = { version = "0.1.0", path = "../gossip" }
|
||||||
hashbrown = { workspace = true }
|
hashbrown = { workspace = true }
|
||||||
hyper = "0.14"
|
hyper = "0.14"
|
||||||
iox_catalog = { path = "../iox_catalog" }
|
iox_catalog = { path = "../iox_catalog" }
|
||||||
|
|
|
@ -16,6 +16,9 @@ pub struct RpcWriteRouterServer<D, N> {
|
||||||
|
|
||||||
http: HttpDelegate<D, N>,
|
http: HttpDelegate<D, N>,
|
||||||
grpc: RpcWriteGrpcDelegate,
|
grpc: RpcWriteGrpcDelegate,
|
||||||
|
|
||||||
|
// TODO: this shouldn't be here but it is here while it's unused elsewhere
|
||||||
|
_gossip_handle: Option<gossip::GossipHandle>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<D, N> RpcWriteRouterServer<D, N> {
|
impl<D, N> RpcWriteRouterServer<D, N> {
|
||||||
|
@ -26,12 +29,14 @@ impl<D, N> RpcWriteRouterServer<D, N> {
|
||||||
grpc: RpcWriteGrpcDelegate,
|
grpc: RpcWriteGrpcDelegate,
|
||||||
metrics: Arc<metric::Registry>,
|
metrics: Arc<metric::Registry>,
|
||||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||||
|
gossip_handle: Option<gossip::GossipHandle>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
metrics,
|
metrics,
|
||||||
trace_collector,
|
trace_collector,
|
||||||
http,
|
http,
|
||||||
grpc,
|
grpc,
|
||||||
|
_gossip_handle: gossip_handle,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
// Workaround for "unused crate" lint false positives.
|
// Workaround for "unused crate" lint false positives.
|
||||||
use workspace_hack as _;
|
use workspace_hack as _;
|
||||||
|
|
||||||
use data_types::{PartitionId, TableId, TransitionPartitionId};
|
use data_types::{PartitionHashId, PartitionId, TableId, TransitionPartitionId};
|
||||||
use generated_types::influxdata::iox::catalog::v1::*;
|
use generated_types::influxdata::iox::catalog::v1::*;
|
||||||
use iox_catalog::interface::{Catalog, SoftDeletedRows};
|
use iox_catalog::interface::{Catalog, SoftDeletedRows};
|
||||||
use observability_deps::tracing::*;
|
use observability_deps::tracing::*;
|
||||||
|
@ -47,14 +47,14 @@ impl catalog_service_server::CatalogService for CatalogService {
|
||||||
) -> Result<Response<GetParquetFilesByPartitionIdResponse>, Status> {
|
) -> Result<Response<GetParquetFilesByPartitionIdResponse>, Status> {
|
||||||
let mut repos = self.catalog.repositories().await;
|
let mut repos = self.catalog.repositories().await;
|
||||||
let req = request.into_inner();
|
let req = request.into_inner();
|
||||||
let partition_id = TransitionPartitionId::Deprecated(PartitionId::new(req.partition_id));
|
let partition_id = to_partition_id(req.partition_identifier)?;
|
||||||
|
|
||||||
let parquet_files = repos
|
let parquet_files = repos
|
||||||
.parquet_files()
|
.parquet_files()
|
||||||
.list_by_partition_not_to_delete(&partition_id)
|
.list_by_partition_not_to_delete(&partition_id)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
warn!(error=%e, %req.partition_id, "failed to get parquet_files for partition");
|
warn!(error=%e, %partition_id, "failed to get parquet_files for partition");
|
||||||
Status::not_found(e.to_string())
|
Status::not_found(e.to_string())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
@ -169,13 +169,52 @@ impl catalog_service_server::CatalogService for CatalogService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn to_partition_identifier(partition_id: &TransitionPartitionId) -> PartitionIdentifier {
|
||||||
|
match partition_id {
|
||||||
|
TransitionPartitionId::Deterministic(hash_id) => PartitionIdentifier {
|
||||||
|
id: Some(partition_identifier::Id::HashId(
|
||||||
|
hash_id.as_bytes().to_owned(),
|
||||||
|
)),
|
||||||
|
},
|
||||||
|
TransitionPartitionId::Deprecated(id) => PartitionIdentifier {
|
||||||
|
id: Some(partition_identifier::Id::CatalogId(id.get())),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_partition_id(
|
||||||
|
partition_identifier: Option<PartitionIdentifier>,
|
||||||
|
) -> Result<TransitionPartitionId, Status> {
|
||||||
|
let partition_id =
|
||||||
|
match partition_identifier
|
||||||
|
.and_then(|pi| pi.id)
|
||||||
|
.ok_or(Status::invalid_argument(
|
||||||
|
"No partition identifier specified",
|
||||||
|
))? {
|
||||||
|
partition_identifier::Id::HashId(bytes) => TransitionPartitionId::Deterministic(
|
||||||
|
PartitionHashId::try_from(&bytes[..]).map_err(|e| {
|
||||||
|
Status::invalid_argument(format!(
|
||||||
|
"Could not parse bytes as a `PartitionHashId`: {e}"
|
||||||
|
))
|
||||||
|
})?,
|
||||||
|
),
|
||||||
|
partition_identifier::Id::CatalogId(id) => {
|
||||||
|
TransitionPartitionId::Deprecated(PartitionId::new(id))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(partition_id)
|
||||||
|
}
|
||||||
|
|
||||||
// converts the catalog ParquetFile to protobuf
|
// converts the catalog ParquetFile to protobuf
|
||||||
fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile {
|
fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile {
|
||||||
|
let partition_identifier = to_partition_identifier(&p.partition_id);
|
||||||
|
|
||||||
ParquetFile {
|
ParquetFile {
|
||||||
id: p.id.get(),
|
id: p.id.get(),
|
||||||
namespace_id: p.namespace_id.get(),
|
namespace_id: p.namespace_id.get(),
|
||||||
table_id: p.table_id.get(),
|
table_id: p.table_id.get(),
|
||||||
partition_id: p.partition_id.get(),
|
partition_identifier: Some(partition_identifier),
|
||||||
object_store_id: p.object_store_id.to_string(),
|
object_store_id: p.object_store_id.to_string(),
|
||||||
min_time: p.min_time.get(),
|
min_time: p.min_time.get(),
|
||||||
max_time: p.max_time.get(),
|
max_time: p.max_time.get(),
|
||||||
|
@ -191,8 +230,10 @@ fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile {
|
||||||
|
|
||||||
// converts the catalog Partition to protobuf
|
// converts the catalog Partition to protobuf
|
||||||
fn to_partition(p: data_types::Partition) -> Partition {
|
fn to_partition(p: data_types::Partition) -> Partition {
|
||||||
|
let identifier = to_partition_identifier(&p.transition_partition_id());
|
||||||
|
|
||||||
Partition {
|
Partition {
|
||||||
id: p.id.get(),
|
identifier: Some(identifier),
|
||||||
key: p.partition_key.to_string(),
|
key: p.partition_key.to_string(),
|
||||||
table_id: p.table_id.get(),
|
table_id: p.table_id.get(),
|
||||||
array_sort_key: p.sort_key,
|
array_sort_key: p.sort_key,
|
||||||
|
@ -230,8 +271,7 @@ mod tests {
|
||||||
let p1params = ParquetFileParams {
|
let p1params = ParquetFileParams {
|
||||||
namespace_id: namespace.id,
|
namespace_id: namespace.id,
|
||||||
table_id: table.id,
|
table_id: table.id,
|
||||||
partition_id: partition.id,
|
partition_id: partition.transition_partition_id(),
|
||||||
partition_hash_id: partition.hash_id().cloned(),
|
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
min_time: Timestamp::new(1),
|
min_time: Timestamp::new(1),
|
||||||
max_time: Timestamp::new(5),
|
max_time: Timestamp::new(5),
|
||||||
|
@ -248,13 +288,15 @@ mod tests {
|
||||||
};
|
};
|
||||||
p1 = repos.parquet_files().create(p1params).await.unwrap();
|
p1 = repos.parquet_files().create(p1params).await.unwrap();
|
||||||
p2 = repos.parquet_files().create(p2params).await.unwrap();
|
p2 = repos.parquet_files().create(p2params).await.unwrap();
|
||||||
partition_id = partition.id;
|
partition_id = partition.transition_partition_id();
|
||||||
Arc::clone(&catalog)
|
Arc::clone(&catalog)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let partition_identifier = to_partition_identifier(&partition_id);
|
||||||
|
|
||||||
let grpc = super::CatalogService::new(catalog);
|
let grpc = super::CatalogService::new(catalog);
|
||||||
let request = GetParquetFilesByPartitionIdRequest {
|
let request = GetParquetFilesByPartitionIdRequest {
|
||||||
partition_id: partition_id.get(),
|
partition_identifier: Some(partition_identifier),
|
||||||
};
|
};
|
||||||
|
|
||||||
let tonic_response = grpc
|
let tonic_response = grpc
|
||||||
|
|
|
@ -75,7 +75,7 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
|
||||||
let path = ParquetFilePath::new(
|
let path = ParquetFilePath::new(
|
||||||
parquet_file.namespace_id,
|
parquet_file.namespace_id,
|
||||||
parquet_file.table_id,
|
parquet_file.table_id,
|
||||||
&parquet_file.transition_partition_id(),
|
&parquet_file.partition_id.clone(),
|
||||||
parquet_file.object_store_id,
|
parquet_file.object_store_id,
|
||||||
);
|
);
|
||||||
let path = path.object_store_path();
|
let path = path.object_store_path();
|
||||||
|
@ -128,8 +128,7 @@ mod tests {
|
||||||
let p1params = ParquetFileParams {
|
let p1params = ParquetFileParams {
|
||||||
namespace_id: namespace.id,
|
namespace_id: namespace.id,
|
||||||
table_id: table.id,
|
table_id: table.id,
|
||||||
partition_id: partition.id,
|
partition_id: partition.transition_partition_id(),
|
||||||
partition_hash_id: partition.hash_id().cloned(),
|
|
||||||
object_store_id: Uuid::new_v4(),
|
object_store_id: Uuid::new_v4(),
|
||||||
min_time: Timestamp::new(1),
|
min_time: Timestamp::new(1),
|
||||||
max_time: Timestamp::new(5),
|
max_time: Timestamp::new(5),
|
||||||
|
@ -150,7 +149,7 @@ mod tests {
|
||||||
let path = ParquetFilePath::new(
|
let path = ParquetFilePath::new(
|
||||||
p1.namespace_id,
|
p1.namespace_id,
|
||||||
p1.table_id,
|
p1.table_id,
|
||||||
&p1.transition_partition_id(),
|
&p1.partition_id.clone(),
|
||||||
p1.object_store_id,
|
p1.object_store_id,
|
||||||
);
|
);
|
||||||
let path = path.object_store_path();
|
let path = path.object_store_path();
|
||||||
|
|
|
@ -25,3 +25,4 @@ sysinfo = "0.29.7"
|
||||||
tempfile = "3.7.0"
|
tempfile = "3.7.0"
|
||||||
# Need the multi-threaded executor for testing
|
# Need the multi-threaded executor for testing
|
||||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] }
|
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] }
|
||||||
|
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
use std::{borrow::Cow, path::PathBuf, time::Duration};
|
use std::borrow::Cow;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use metric::{Attributes, U64Gauge};
|
use metric::{Attributes, U64Gauge};
|
||||||
use sysinfo::{DiskExt, RefreshKind, System, SystemExt};
|
use sysinfo::{DiskExt, RefreshKind, System, SystemExt};
|
||||||
|
use tokio::sync::watch;
|
||||||
|
|
||||||
/// The interval at which disk metrics are updated.
|
/// The interval at which disk metrics are updated.
|
||||||
///
|
///
|
||||||
|
@ -9,6 +12,32 @@ use sysinfo::{DiskExt, RefreshKind, System, SystemExt};
|
||||||
/// interval.
|
/// interval.
|
||||||
const UPDATE_INTERVAL: Duration = Duration::from_secs(13);
|
const UPDATE_INTERVAL: Duration = Duration::from_secs(13);
|
||||||
|
|
||||||
|
/// An immutable snapshot of space and usage statistics for some disk.
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub struct DiskSpaceSnapshot {
|
||||||
|
available_disk_space: u64,
|
||||||
|
total_disk_space: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DiskSpaceSnapshot {
|
||||||
|
/// The available space in bytes on the disk.
|
||||||
|
pub fn available_disk_space(&self) -> u64 {
|
||||||
|
self.available_disk_space
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The maximum capacity in bytes of the disk.
|
||||||
|
pub fn total_disk_space(&self) -> u64 {
|
||||||
|
self.total_disk_space
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Overall usage of the disk, as a percentage [0.0, 1.0].
|
||||||
|
#[inline]
|
||||||
|
pub fn disk_usage_ratio(&self) -> f64 {
|
||||||
|
debug_assert!(self.available_disk_space <= self.total_disk_space);
|
||||||
|
1.0 - (self.available_disk_space as f64 / self.total_disk_space as f64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A periodic reporter of disk capacity / free statistics for a given
|
/// A periodic reporter of disk capacity / free statistics for a given
|
||||||
/// directory.
|
/// directory.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -22,12 +51,19 @@ pub struct DiskSpaceMetrics {
|
||||||
/// The index into [`System::disks()`] for the disk containing the observed
|
/// The index into [`System::disks()`] for the disk containing the observed
|
||||||
/// directory.
|
/// directory.
|
||||||
disk_idx: usize,
|
disk_idx: usize,
|
||||||
|
|
||||||
|
/// A stream of [`DiskSpaceSnapshot`] produced by the metric reporter for
|
||||||
|
/// consumption by any listeners.
|
||||||
|
snapshot_tx: watch::Sender<DiskSpaceSnapshot>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DiskSpaceMetrics {
|
impl DiskSpaceMetrics {
|
||||||
/// Create a new [`DiskSpaceMetrics`], returning [`None`] if no disk can be
|
/// Create a new [`DiskSpaceMetrics`], returning [`None`] if no disk can be
|
||||||
/// found for the specified `directory`.
|
/// found for the specified `directory`.
|
||||||
pub fn new(directory: PathBuf, registry: &metric::Registry) -> Option<Self> {
|
pub fn new(
|
||||||
|
directory: PathBuf,
|
||||||
|
registry: &metric::Registry,
|
||||||
|
) -> Option<(Self, watch::Receiver<DiskSpaceSnapshot>)> {
|
||||||
let path: Cow<'static, str> = Cow::from(directory.display().to_string());
|
let path: Cow<'static, str> = Cow::from(directory.display().to_string());
|
||||||
let mut directory = directory.canonicalize().ok()?;
|
let mut directory = directory.canonicalize().ok()?;
|
||||||
|
|
||||||
|
@ -52,14 +88,14 @@ impl DiskSpaceMetrics {
|
||||||
|
|
||||||
// Resolve the mount point once.
|
// Resolve the mount point once.
|
||||||
// The directory path may be `/path/to/dir` and the mount point is `/`.
|
// The directory path may be `/path/to/dir` and the mount point is `/`.
|
||||||
let disk_idx = loop {
|
let (disk_idx, initial_disk) = loop {
|
||||||
if let Some((idx, _disk)) = system
|
if let Some((idx, disk)) = system
|
||||||
.disks()
|
.disks()
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.find(|(_idx, disk)| disk.mount_point() == directory)
|
.find(|(_idx, disk)| disk.mount_point() == directory)
|
||||||
{
|
{
|
||||||
break idx;
|
break (idx, disk);
|
||||||
}
|
}
|
||||||
// The mount point for this directory could not be found.
|
// The mount point for this directory could not be found.
|
||||||
if !directory.pop() {
|
if !directory.pop() {
|
||||||
|
@ -67,18 +103,26 @@ impl DiskSpaceMetrics {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Some(Self {
|
let (snapshot_tx, snapshot_rx) = watch::channel(DiskSpaceSnapshot {
|
||||||
|
available_disk_space: initial_disk.available_space(),
|
||||||
|
total_disk_space: initial_disk.total_space(),
|
||||||
|
});
|
||||||
|
|
||||||
|
Some((
|
||||||
|
Self {
|
||||||
available_disk_space,
|
available_disk_space,
|
||||||
total_disk_space,
|
total_disk_space,
|
||||||
system,
|
system,
|
||||||
disk_idx,
|
disk_idx,
|
||||||
})
|
snapshot_tx,
|
||||||
|
},
|
||||||
|
snapshot_rx,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Start the [`DiskSpaceMetrics`] evaluation loop, blocking forever.
|
/// Start the [`DiskSpaceMetrics`] evaluation loop, blocking forever.
|
||||||
pub async fn run(mut self) {
|
pub async fn run(mut self) {
|
||||||
let mut interval = tokio::time::interval(UPDATE_INTERVAL);
|
let mut interval = tokio::time::interval(UPDATE_INTERVAL);
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
interval.tick().await;
|
interval.tick().await;
|
||||||
|
|
||||||
|
@ -93,6 +137,13 @@ impl DiskSpaceMetrics {
|
||||||
|
|
||||||
self.available_disk_space.set(disk.available_space());
|
self.available_disk_space.set(disk.available_space());
|
||||||
self.total_disk_space.set(disk.total_space());
|
self.total_disk_space.set(disk.total_space());
|
||||||
|
|
||||||
|
// Produce and send a [`DiskSpaceSnapshot`] for any listeners
|
||||||
|
// that might exist.
|
||||||
|
_ = self.snapshot_tx.send(DiskSpaceSnapshot {
|
||||||
|
available_disk_space: disk.available_space(),
|
||||||
|
total_disk_space: disk.total_space(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -103,6 +154,7 @@ mod tests {
|
||||||
|
|
||||||
use metric::Metric;
|
use metric::Metric;
|
||||||
use tempfile::tempdir_in;
|
use tempfile::tempdir_in;
|
||||||
|
use test_helpers::timeout::FutureTimeout;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
@ -121,11 +173,9 @@ mod tests {
|
||||||
|
|
||||||
let registry = Arc::new(metric::Registry::new());
|
let registry = Arc::new(metric::Registry::new());
|
||||||
|
|
||||||
let _handle = tokio::spawn(
|
let (_handle, mut snapshot_rx) =
|
||||||
DiskSpaceMetrics::new(pathbuf, ®istry)
|
DiskSpaceMetrics::new(pathbuf, ®istry).expect("root always exists");
|
||||||
.expect("root always exists")
|
let _handle = tokio::spawn(_handle.run());
|
||||||
.run(),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Wait for the metric to be emitted and non-zero - this should be very
|
// Wait for the metric to be emitted and non-zero - this should be very
|
||||||
// quick!
|
// quick!
|
||||||
|
@ -151,10 +201,45 @@ mod tests {
|
||||||
.fetch();
|
.fetch();
|
||||||
|
|
||||||
if recorded_free_metric > 0 && recorded_total_metric > 0 {
|
if recorded_free_metric > 0 && recorded_total_metric > 0 {
|
||||||
|
snapshot_rx
|
||||||
|
.changed()
|
||||||
|
.with_timeout_panic(Duration::from_secs(5))
|
||||||
|
.await
|
||||||
|
.expect("snapshot value should have changed");
|
||||||
|
|
||||||
|
let snapshot = *snapshot_rx.borrow();
|
||||||
|
assert_eq!(snapshot.available_disk_space, recorded_free_metric);
|
||||||
|
assert_eq!(snapshot.total_disk_space, recorded_total_metric);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Token test to assert disk usage ratio
|
||||||
|
#[test]
|
||||||
|
fn assert_disk_usage_ratio() {
|
||||||
|
// 80% used
|
||||||
|
let snapshot = DiskSpaceSnapshot {
|
||||||
|
available_disk_space: 2000,
|
||||||
|
total_disk_space: 10000,
|
||||||
|
};
|
||||||
|
assert_eq!(snapshot.disk_usage_ratio(), 0.8);
|
||||||
|
|
||||||
|
// 90% used
|
||||||
|
let snapshot = DiskSpaceSnapshot {
|
||||||
|
available_disk_space: 2000,
|
||||||
|
total_disk_space: 20000,
|
||||||
|
};
|
||||||
|
assert_eq!(snapshot.disk_usage_ratio(), 0.9);
|
||||||
|
|
||||||
|
// Free!
|
||||||
|
let snapshot = DiskSpaceSnapshot {
|
||||||
|
available_disk_space: 42,
|
||||||
|
total_disk_space: 42,
|
||||||
|
};
|
||||||
|
assert_eq!(snapshot.disk_usage_ratio(), 0.0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue