Merge branch 'main' into idpe-17789/provide-job-on-commit
commit
cc70a2c38b
|
@ -699,7 +699,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"regex-automata 0.3.3",
|
||||
"regex-automata 0.3.4",
|
||||
"serde",
|
||||
]
|
||||
|
||||
|
@ -763,6 +763,7 @@ dependencies = [
|
|||
"pdatastructs",
|
||||
"proptest",
|
||||
"rand",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"trace",
|
||||
|
@ -2662,6 +2663,7 @@ dependencies = [
|
|||
"flatbuffers",
|
||||
"futures",
|
||||
"generated_types",
|
||||
"gossip",
|
||||
"hashbrown 0.14.0",
|
||||
"influxdb_iox_client",
|
||||
"ingester_query_grpc",
|
||||
|
@ -3107,6 +3109,7 @@ dependencies = [
|
|||
"authz",
|
||||
"clap_blocks",
|
||||
"data_types",
|
||||
"gossip",
|
||||
"hashbrown 0.14.0",
|
||||
"hyper",
|
||||
"iox_catalog",
|
||||
|
@ -4575,7 +4578,7 @@ checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
|||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata 0.3.3",
|
||||
"regex-automata 0.3.4",
|
||||
"regex-syntax 0.7.4",
|
||||
]
|
||||
|
||||
|
@ -4590,9 +4593,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.3"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
|
||||
checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
|
@ -4693,6 +4696,7 @@ dependencies = [
|
|||
"flate2",
|
||||
"futures",
|
||||
"generated_types",
|
||||
"gossip",
|
||||
"hashbrown 0.14.0",
|
||||
"hyper",
|
||||
"influxdb-line-protocol",
|
||||
|
@ -4906,18 +4910,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.177"
|
||||
version = "1.0.179"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63ba2516aa6bf82e0b19ca8b50019d52df58455d3cf9bdaf6315225fdd0c560a"
|
||||
checksum = "0a5bf42b8d227d4abf38a1ddb08602e229108a517cd4e5bb28f9c7eaafdce5c0"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.177"
|
||||
version = "1.0.179"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "401797fe7833d72109fedec6bfcbe67c0eed9b99772f26eb8afd261f0abc6fd3"
|
||||
checksum = "741e124f5485c7e60c03b043f79f320bff3527f4bbf12cf3831750dc46a0ec2c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -6265,6 +6269,7 @@ dependencies = [
|
|||
"pin-project",
|
||||
"sysinfo",
|
||||
"tempfile",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"trace",
|
||||
|
@ -6879,7 +6884,7 @@ dependencies = [
|
|||
"rand",
|
||||
"rand_core",
|
||||
"regex",
|
||||
"regex-automata 0.3.3",
|
||||
"regex-automata 0.3.4",
|
||||
"regex-syntax 0.7.4",
|
||||
"reqwest",
|
||||
"ring",
|
||||
|
|
|
@ -23,6 +23,7 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
|||
[dev-dependencies]
|
||||
criterion = { version = "0.5", default-features = false, features = ["rayon"]}
|
||||
proptest = { version = "1", default_features = false, features = ["std"] }
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
||||
[lib]
|
||||
# Allow --save-baseline to work
|
||||
|
|
|
@ -5,6 +5,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
|
|||
use iox_time::{MockProvider, Time};
|
||||
use parking_lot::Mutex;
|
||||
use rand::rngs::mock::StepRng;
|
||||
use test_helpers::maybe_start_logging;
|
||||
use tokio::{runtime::Handle, sync::Notify};
|
||||
|
||||
use crate::{
|
||||
|
@ -116,6 +117,7 @@ async fn test_refresh_does_not_update_lru_time() {
|
|||
time_provider,
|
||||
loader,
|
||||
notify_idle,
|
||||
pool,
|
||||
..
|
||||
} = TestStateLruAndRefresh::new();
|
||||
|
||||
|
@ -135,12 +137,14 @@ async fn test_refresh_does_not_update_lru_time() {
|
|||
|
||||
let barrier = loader.block_next(1, String::from("foo"));
|
||||
backend.set(1, String::from("a"));
|
||||
pool.wait_converged().await;
|
||||
|
||||
// trigger refresh
|
||||
time_provider.inc(Duration::from_secs(1));
|
||||
|
||||
time_provider.inc(Duration::from_secs(1));
|
||||
backend.set(2, String::from("b"));
|
||||
pool.wait_converged().await;
|
||||
|
||||
time_provider.inc(Duration::from_secs(1));
|
||||
|
||||
|
@ -150,6 +154,7 @@ async fn test_refresh_does_not_update_lru_time() {
|
|||
|
||||
// add a third item to the cache, forcing LRU to evict one of the items
|
||||
backend.set(3, String::from("c"));
|
||||
pool.wait_converged().await;
|
||||
|
||||
// Should evict `1` even though it was refreshed after `2` was added
|
||||
assert_eq!(backend.get(&1), None);
|
||||
|
@ -192,6 +197,8 @@ async fn test_if_refresh_to_slow_then_expire() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_refresh_can_trigger_lru_eviction() {
|
||||
maybe_start_logging();
|
||||
|
||||
let TestStateLRUAndRefresh {
|
||||
mut backend,
|
||||
refresh_duration_provider,
|
||||
|
@ -224,13 +231,16 @@ async fn test_refresh_can_trigger_lru_eviction() {
|
|||
backend.set(1, String::from("a"));
|
||||
backend.set(2, String::from("c"));
|
||||
backend.set(3, String::from("d"));
|
||||
assert_eq!(backend.get(&1), Some(String::from("a")));
|
||||
pool.wait_converged().await;
|
||||
assert_eq!(backend.get(&2), Some(String::from("c")));
|
||||
assert_eq!(backend.get(&3), Some(String::from("d")));
|
||||
time_provider.inc(Duration::from_millis(1));
|
||||
assert_eq!(backend.get(&1), Some(String::from("a")));
|
||||
|
||||
// refresh
|
||||
time_provider.inc(Duration::from_secs(1));
|
||||
time_provider.inc(Duration::from_secs(10));
|
||||
notify_idle.notified_with_timeout().await;
|
||||
pool.wait_converged().await;
|
||||
|
||||
// needed to evict 2->"c"
|
||||
assert_eq!(backend.get(&1), Some(String::from("b")));
|
||||
|
@ -285,6 +295,7 @@ async fn test_remove_if_check_does_not_extend_lifetime() {
|
|||
size_estimator,
|
||||
time_provider,
|
||||
remove_if_handle,
|
||||
pool,
|
||||
..
|
||||
} = TestStateLruAndRemoveIf::new().await;
|
||||
|
||||
|
@ -293,15 +304,18 @@ async fn test_remove_if_check_does_not_extend_lifetime() {
|
|||
size_estimator.mock_size(3, String::from("c"), TestSize(4));
|
||||
|
||||
backend.set(1, String::from("a"));
|
||||
pool.wait_converged().await;
|
||||
time_provider.inc(Duration::from_secs(1));
|
||||
|
||||
backend.set(2, String::from("b"));
|
||||
pool.wait_converged().await;
|
||||
time_provider.inc(Duration::from_secs(1));
|
||||
|
||||
// Checking remove_if should not count as a "use" of 1
|
||||
// for the "least recently used" calculation
|
||||
remove_if_handle.remove_if(&1, |_| false);
|
||||
backend.set(3, String::from("c"));
|
||||
pool.wait_converged().await;
|
||||
|
||||
// adding "c" totals 12 size, but backend has room for only 10
|
||||
// so "least recently used" (in this case 1, not 2) should be removed
|
||||
|
@ -397,6 +411,7 @@ impl TestStateLRUAndRefresh {
|
|||
"my_pool",
|
||||
TestSize(10),
|
||||
Arc::clone(&metric_registry),
|
||||
&Handle::current(),
|
||||
));
|
||||
backend.add_policy(LruPolicy::new(
|
||||
Arc::clone(&pool),
|
||||
|
@ -442,6 +457,7 @@ impl TestStateTtlAndLRU {
|
|||
"my_pool",
|
||||
TestSize(10),
|
||||
Arc::clone(&metric_registry),
|
||||
&Handle::current(),
|
||||
));
|
||||
backend.add_policy(LruPolicy::new(
|
||||
Arc::clone(&pool),
|
||||
|
@ -465,6 +481,7 @@ struct TestStateLruAndRemoveIf {
|
|||
time_provider: Arc<MockProvider>,
|
||||
size_estimator: Arc<TestSizeEstimator>,
|
||||
remove_if_handle: RemoveIfHandle<u8, String>,
|
||||
pool: Arc<ResourcePool<TestSize>>,
|
||||
}
|
||||
|
||||
impl TestStateLruAndRemoveIf {
|
||||
|
@ -479,6 +496,7 @@ impl TestStateLruAndRemoveIf {
|
|||
"my_pool",
|
||||
TestSize(10),
|
||||
Arc::clone(&metric_registry),
|
||||
&Handle::current(),
|
||||
));
|
||||
backend.add_policy(LruPolicy::new(
|
||||
Arc::clone(&pool),
|
||||
|
@ -495,6 +513,7 @@ impl TestStateLruAndRemoveIf {
|
|||
time_provider,
|
||||
size_estimator,
|
||||
remove_if_handle,
|
||||
pool,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -507,6 +526,7 @@ struct TestStateLruAndRefresh {
|
|||
time_provider: Arc<MockProvider>,
|
||||
loader: Arc<TestLoader<u8, (), String>>,
|
||||
notify_idle: Arc<Notify>,
|
||||
pool: Arc<ResourcePool<TestSize>>,
|
||||
}
|
||||
|
||||
impl TestStateLruAndRefresh {
|
||||
|
@ -537,6 +557,7 @@ impl TestStateLruAndRefresh {
|
|||
"my_pool",
|
||||
TestSize(10),
|
||||
Arc::clone(&metric_registry),
|
||||
&Handle::current(),
|
||||
));
|
||||
backend.add_policy(LruPolicy::new(
|
||||
Arc::clone(&pool),
|
||||
|
@ -551,6 +572,7 @@ impl TestStateLruAndRefresh {
|
|||
time_provider,
|
||||
loader,
|
||||
notify_idle,
|
||||
pool,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -393,7 +393,11 @@ where
|
|||
/// structures while calling this function if you plan to also [subscribe](Subscriber) to
|
||||
/// changes because this would easily lead to deadlocks.
|
||||
pub fn execute_requests(&mut self, change_requests: Vec<ChangeRequest<'_, K, V>>) {
|
||||
let inner = self.inner.upgrade().expect("backend gone");
|
||||
let Some(inner) = self.inner.upgrade() else {
|
||||
// backend gone, can happen during shutdowns, try not to panic
|
||||
return;
|
||||
};
|
||||
|
||||
lock_inner!(mut guard = inner);
|
||||
perform_changes(&mut guard, change_requests);
|
||||
}
|
||||
|
|
|
@ -9,7 +9,15 @@ use std::{
|
|||
///
|
||||
/// Can be used to represent in-RAM memory as well as on-disc memory.
|
||||
pub trait Resource:
|
||||
Add<Output = Self> + Copy + Debug + Into<u64> + PartialOrd + Send + Sub<Output = Self> + 'static
|
||||
Add<Output = Self>
|
||||
+ Copy
|
||||
+ Debug
|
||||
+ Into<u64>
|
||||
+ PartialOrd
|
||||
+ Send
|
||||
+ Sync
|
||||
+ Sub<Output = Self>
|
||||
+ 'static
|
||||
{
|
||||
/// Create resource consumption of zero.
|
||||
fn zero() -> Self;
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
//! CLI config for cluster gossip communication.
|
||||
|
||||
use crate::socket_addr::SocketAddr;
|
||||
|
||||
/// Configuration parameters for the cluster gossip communication mechanism.
|
||||
#[derive(Debug, Clone, clap::Parser)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub struct GossipConfig {
|
||||
/// A comma-delimited set of seed gossip peer addresses.
|
||||
///
|
||||
/// Example: "10.0.0.1:4242,10.0.0.2:4242"
|
||||
///
|
||||
/// These seeds will be used to discover all other peers that talk to the
|
||||
/// same seeds. Typically all nodes in the cluster should use the same set
|
||||
/// of seeds.
|
||||
#[clap(
|
||||
long = "gossip-seed-list",
|
||||
env = "INFLUXDB_IOX_GOSSIP_SEED_LIST",
|
||||
required = false,
|
||||
num_args=1..,
|
||||
value_delimiter = ',',
|
||||
requires = "gossip_bind_address", // Field name, not flag
|
||||
)]
|
||||
pub seed_list: Vec<String>,
|
||||
|
||||
/// The UDP socket address IOx will use for gossip communication between
|
||||
/// peers.
|
||||
///
|
||||
/// Example: "0.0.0.0:4242"
|
||||
///
|
||||
/// If not provided, the gossip sub-system is disabled.
|
||||
#[clap(
|
||||
long = "gossip-bind-address",
|
||||
env = "INFLUXDB_IOX_GOSSIP_BIND_ADDR",
|
||||
requires = "seed_list", // Field name, not flag
|
||||
action
|
||||
)]
|
||||
pub gossip_bind_address: Option<SocketAddr>,
|
||||
}
|
||||
|
||||
impl GossipConfig {
|
||||
/// Initialise the gossip config to be disabled.
|
||||
pub fn disabled() -> Self {
|
||||
Self {
|
||||
seed_list: vec![],
|
||||
gossip_bind_address: None,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,10 +2,16 @@
|
|||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::gossip::GossipConfig;
|
||||
|
||||
/// CLI config for the ingester using the RPC write path
|
||||
#[derive(Debug, Clone, clap::Parser)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub struct IngesterConfig {
|
||||
/// Gossip config.
|
||||
#[clap(flatten)]
|
||||
pub gossip_config: GossipConfig,
|
||||
|
||||
/// Where this ingester instance should store its write-ahead log files. Each ingester instance
|
||||
/// must have its own directory.
|
||||
#[clap(long = "wal-directory", env = "INFLUXDB_IOX_WAL_DIRECTORY", action)]
|
||||
|
|
|
@ -22,6 +22,7 @@ pub mod catalog_dsn;
|
|||
pub mod compactor;
|
||||
pub mod compactor_scheduler;
|
||||
pub mod garbage_collector;
|
||||
pub mod gossip;
|
||||
pub mod ingester;
|
||||
pub mod ingester_address;
|
||||
pub mod object_store;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
//! CLI config for the router using the RPC write path
|
||||
|
||||
use crate::{
|
||||
gossip::GossipConfig,
|
||||
ingester_address::IngesterAddress,
|
||||
single_tenant::{
|
||||
CONFIG_AUTHZ_ENV_NAME, CONFIG_AUTHZ_FLAG, CONFIG_CST_ENV_NAME, CONFIG_CST_FLAG,
|
||||
|
@ -15,6 +16,10 @@ use std::{
|
|||
#[derive(Debug, Clone, clap::Parser)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub struct RouterConfig {
|
||||
/// Gossip config.
|
||||
#[clap(flatten)]
|
||||
pub gossip_config: GossipConfig,
|
||||
|
||||
/// Addr for connection to authz
|
||||
#[clap(
|
||||
long = CONFIG_AUTHZ_FLAG,
|
||||
|
|
|
@ -171,7 +171,7 @@ fn to_queryable_parquet_chunk(
|
|||
parquet_file_id = file.file.id.get(),
|
||||
parquet_file_namespace_id = file.file.namespace_id.get(),
|
||||
parquet_file_table_id = file.file.table_id.get(),
|
||||
parquet_file_partition_id = file.file.partition_id.get(),
|
||||
parquet_file_partition_id = %file.file.partition_id,
|
||||
parquet_file_object_store_id = uuid.to_string().as_str(),
|
||||
"built parquet chunk from metadata"
|
||||
);
|
||||
|
|
|
@ -70,8 +70,7 @@ impl ParquetFileSink for MockParquetFileSink {
|
|||
let out = ((row_count > 0) || !self.filter_empty_files).then(|| ParquetFileParams {
|
||||
namespace_id: partition.namespace_id,
|
||||
table_id: partition.table.id,
|
||||
partition_id: partition.partition_id,
|
||||
partition_hash_id: partition.partition_hash_id.clone(),
|
||||
partition_id: partition.transition_partition_id(),
|
||||
object_store_id: Uuid::from_u128(guard.len() as u128),
|
||||
min_time: Timestamp::new(0),
|
||||
max_time: Timestamp::new(0),
|
||||
|
@ -95,7 +94,7 @@ impl ParquetFileSink for MockParquetFileSink {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{NamespaceId, PartitionId, TableId};
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use datafusion::{
|
||||
arrow::{array::new_null_array, datatypes::DataType},
|
||||
physical_plan::stream::RecordBatchStreamAdapter,
|
||||
|
@ -159,7 +158,7 @@ mod tests {
|
|||
Arc::clone(&schema),
|
||||
futures::stream::once(async move { Ok(record_batch_captured) }),
|
||||
));
|
||||
let partition_hash_id = partition.partition_hash_id.clone();
|
||||
let partition_id = partition.transition_partition_id();
|
||||
assert_eq!(
|
||||
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
|
||||
.await
|
||||
|
@ -167,8 +166,7 @@ mod tests {
|
|||
Some(ParquetFileParams {
|
||||
namespace_id: NamespaceId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
partition_id: PartitionId::new(1),
|
||||
partition_hash_id,
|
||||
partition_id,
|
||||
object_store_id: Uuid::from_u128(2),
|
||||
min_time: Timestamp::new(0),
|
||||
max_time: Timestamp::new(0),
|
||||
|
@ -223,7 +221,7 @@ mod tests {
|
|||
Arc::clone(&schema),
|
||||
futures::stream::empty(),
|
||||
));
|
||||
let partition_hash_id = partition.partition_hash_id.clone();
|
||||
let partition_id = partition.transition_partition_id();
|
||||
assert_eq!(
|
||||
sink.store(stream, Arc::clone(&partition), level, max_l0_created_at)
|
||||
.await
|
||||
|
@ -231,8 +229,7 @@ mod tests {
|
|||
Some(ParquetFileParams {
|
||||
namespace_id: NamespaceId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
partition_id: PartitionId::new(1),
|
||||
partition_hash_id,
|
||||
partition_id,
|
||||
object_store_id: Uuid::from_u128(0),
|
||||
min_time: Timestamp::new(0),
|
||||
max_time: Timestamp::new(0),
|
||||
|
|
|
@ -1,19 +1,35 @@
|
|||
use std::{collections::HashMap, fmt::Display};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{ParquetFile, PartitionId};
|
||||
|
||||
use super::PartitionFilesSource;
|
||||
use async_trait::async_trait;
|
||||
use data_types::{ParquetFile, PartitionId, TransitionPartitionId};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MockPartitionFilesSource {
|
||||
files: HashMap<PartitionId, Vec<ParquetFile>>,
|
||||
// This complexity is because we're in the process of moving to partition hash IDs rather than
|
||||
// partition catalog IDs, and Parquet files might only have the partition hash ID on their
|
||||
// record, but the compactor deals with partition catalog IDs because we haven't transitioned
|
||||
// it yet. This should become simpler when the transition is complete.
|
||||
partition_lookup: HashMap<PartitionId, TransitionPartitionId>,
|
||||
file_lookup: HashMap<TransitionPartitionId, Vec<ParquetFile>>,
|
||||
}
|
||||
|
||||
impl MockPartitionFilesSource {
|
||||
#[allow(dead_code)] // not used anywhere
|
||||
pub fn new(files: HashMap<PartitionId, Vec<ParquetFile>>) -> Self {
|
||||
Self { files }
|
||||
#[cfg(test)]
|
||||
pub fn new(
|
||||
partition_lookup: HashMap<PartitionId, TransitionPartitionId>,
|
||||
parquet_files: Vec<ParquetFile>,
|
||||
) -> Self {
|
||||
let mut file_lookup: HashMap<TransitionPartitionId, Vec<ParquetFile>> = HashMap::new();
|
||||
for file in parquet_files {
|
||||
let files = file_lookup.entry(file.partition_id.clone()).or_default();
|
||||
files.push(file);
|
||||
}
|
||||
|
||||
Self {
|
||||
partition_lookup,
|
||||
file_lookup,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,46 +41,60 @@ impl Display for MockPartitionFilesSource {
|
|||
|
||||
#[async_trait]
|
||||
impl PartitionFilesSource for MockPartitionFilesSource {
|
||||
async fn fetch(&self, partition: PartitionId) -> Vec<ParquetFile> {
|
||||
self.files.get(&partition).cloned().unwrap_or_default()
|
||||
async fn fetch(&self, partition_id: PartitionId) -> Vec<ParquetFile> {
|
||||
self.partition_lookup
|
||||
.get(&partition_id)
|
||||
.and_then(|partition_hash_id| self.file_lookup.get(partition_hash_id).cloned())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use iox_tests::ParquetFileBuilder;
|
||||
|
||||
use super::*;
|
||||
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
assert_eq!(
|
||||
MockPartitionFilesSource::new(HashMap::default()).to_string(),
|
||||
MockPartitionFilesSource::new(Default::default(), Default::default()).to_string(),
|
||||
"mock",
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_fetch() {
|
||||
let f_1_1 = ParquetFileBuilder::new(1).with_partition(1).build();
|
||||
let f_1_2 = ParquetFileBuilder::new(2).with_partition(1).build();
|
||||
let f_2_1 = ParquetFileBuilder::new(3).with_partition(2).build();
|
||||
let partition_id_1 = PartitionId::new(1);
|
||||
let partition_id_2 = PartitionId::new(2);
|
||||
let partition_identifier_1 = partition_identifier(1);
|
||||
let partition_identifier_2 = partition_identifier(2);
|
||||
let f_1_1 = ParquetFileBuilder::new(1)
|
||||
.with_partition(partition_identifier_1.clone())
|
||||
.build();
|
||||
let f_1_2 = ParquetFileBuilder::new(2)
|
||||
.with_partition(partition_identifier_1.clone())
|
||||
.build();
|
||||
let f_2_1 = ParquetFileBuilder::new(3)
|
||||
.with_partition(partition_identifier_2.clone())
|
||||
.build();
|
||||
|
||||
let files = HashMap::from([
|
||||
(PartitionId::new(1), vec![f_1_1.clone(), f_1_2.clone()]),
|
||||
(PartitionId::new(2), vec![f_2_1.clone()]),
|
||||
let partition_lookup = HashMap::from([
|
||||
(partition_id_1, partition_identifier_1.clone()),
|
||||
(partition_id_2, partition_identifier_2.clone()),
|
||||
]);
|
||||
let source = MockPartitionFilesSource::new(files);
|
||||
|
||||
let files = vec![f_1_1.clone(), f_1_2.clone(), f_2_1.clone()];
|
||||
let source = MockPartitionFilesSource::new(partition_lookup, files);
|
||||
|
||||
// different partitions
|
||||
assert_eq!(
|
||||
source.fetch(PartitionId::new(1)).await,
|
||||
source.fetch(partition_id_1).await,
|
||||
vec![f_1_1.clone(), f_1_2.clone()],
|
||||
);
|
||||
assert_eq!(source.fetch(PartitionId::new(2)).await, vec![f_2_1],);
|
||||
assert_eq!(source.fetch(partition_id_2).await, vec![f_2_1],);
|
||||
|
||||
// fetching does not drain
|
||||
assert_eq!(source.fetch(PartitionId::new(1)).await, vec![f_1_1, f_1_2],);
|
||||
assert_eq!(source.fetch(partition_id_1).await, vec![f_1_1, f_1_2],);
|
||||
|
||||
// unknown partition => empty result
|
||||
assert_eq!(source.fetch(PartitionId::new(3)).await, vec![],);
|
||||
|
|
|
@ -172,7 +172,11 @@ impl RoundInfoSource for LevelBasedRoundInfo {
|
|||
_partition_info: &PartitionInfo,
|
||||
files: &[ParquetFile],
|
||||
) -> Result<RoundInfo, DynError> {
|
||||
let start_level = get_start_level(files);
|
||||
let start_level = get_start_level(
|
||||
files,
|
||||
self.max_num_files_per_plan,
|
||||
self.max_total_file_size_per_plan,
|
||||
);
|
||||
|
||||
if self.too_many_small_files_to_compact(files, start_level) {
|
||||
return Ok(RoundInfo::ManySmallFiles {
|
||||
|
@ -187,23 +191,53 @@ impl RoundInfoSource for LevelBasedRoundInfo {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_start_level(files: &[ParquetFile]) -> CompactionLevel {
|
||||
// get_start_level decides what level to start compaction from. Often this is the lowest level
|
||||
// we have ParquetFiles in, but occasionally we decide to compact L1->L2 when L0s still exist.
|
||||
//
|
||||
// If we ignore the invariants (where intra-level overlaps are allowed), this would be a math problem
|
||||
// to optimize write amplification.
|
||||
//
|
||||
// However, allowing intra-level overlaps in L0 but not L1/L2 adds extra challenge to compacting L0s to L1.
|
||||
// This is especially true when there are large quantitites of overlapping L0s and L1s, potentially resulting
|
||||
// in many split/compact cycles to resolve the overlaps.
|
||||
//
|
||||
// Since L1 & L2 only have inter-level overlaps, they can be compacted with just a few splits to align the L1s
|
||||
// with the L2s. The relative ease of moving data from L1 to L2 provides additional motivation to compact the
|
||||
// L1s to L2s when a backlog of L0s exist. The easily solvable L1->L2 compaction can give us a clean slate in
|
||||
// L1, greatly simplifying the remaining L0->L1 compactions.
|
||||
fn get_start_level(files: &[ParquetFile], max_files: usize, max_bytes: usize) -> CompactionLevel {
|
||||
// panic if the files are empty
|
||||
assert!(!files.is_empty());
|
||||
|
||||
// Start with initial level
|
||||
// If there are files in this level, itis the start level
|
||||
// Otherwise repeat until reaching the final level.
|
||||
let mut level = CompactionLevel::Initial;
|
||||
while level != CompactionLevel::Final {
|
||||
if files.iter().any(|f| f.compaction_level == level) {
|
||||
return level;
|
||||
}
|
||||
let mut l0_cnt: usize = 0;
|
||||
let mut l0_bytes: usize = 0;
|
||||
let mut l1_bytes: usize = 0;
|
||||
|
||||
level = level.next();
|
||||
for f in files {
|
||||
match f.compaction_level {
|
||||
CompactionLevel::Initial => {
|
||||
l0_cnt += 1;
|
||||
l0_bytes += f.file_size_bytes as usize;
|
||||
}
|
||||
CompactionLevel::FileNonOverlapped => {
|
||||
l1_bytes += f.file_size_bytes as usize;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
level
|
||||
if l1_bytes > 3 * max_bytes && (l0_cnt > max_files || l0_bytes > max_bytes) {
|
||||
// L1 is big enough to pose an overlap challenge compacting from L0, and there is quite a bit more coming from L0.
|
||||
// The criteria for this early L1->L2 compaction significanly impacts write amplification. The above values optimize
|
||||
// existing test cases, but may be changed as additional test cases are added.
|
||||
CompactionLevel::FileNonOverlapped
|
||||
} else if l0_bytes > 0 {
|
||||
CompactionLevel::Initial
|
||||
} else if l1_bytes > 0 {
|
||||
CompactionLevel::FileNonOverlapped
|
||||
} else {
|
||||
CompactionLevel::Final
|
||||
}
|
||||
}
|
||||
|
||||
fn get_num_overlapped_files(
|
||||
|
|
|
@ -301,7 +301,26 @@ pub fn merge_small_l0_chains(
|
|||
for chain in &chains {
|
||||
let this_chain_bytes = chain.iter().map(|f| f.file_size_bytes as usize).sum();
|
||||
|
||||
if prior_chain_bytes > 0 && prior_chain_bytes + this_chain_bytes <= max_compact_size {
|
||||
// matching max_lo_created_at times indicates that the files were deliberately split. We shouldn't merge
|
||||
// chains with matching max_lo_created_at times, because that would encourage undoing the previous split,
|
||||
// which minimally increases write amplification, and may cause unproductive split/compact loops.
|
||||
let mut matches = 0;
|
||||
if prior_chain_bytes > 0 {
|
||||
for f in chain.iter() {
|
||||
for f2 in &merged_chains[prior_chain_idx as usize] {
|
||||
if f.max_l0_created_at == f2.max_l0_created_at {
|
||||
matches += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge it if: there a prior chain to merge with, and merging wouldn't make it too big, or undo a previous split
|
||||
if prior_chain_bytes > 0
|
||||
&& prior_chain_bytes + this_chain_bytes <= max_compact_size
|
||||
&& matches == 0
|
||||
{
|
||||
// this chain can be added to the prior chain.
|
||||
merged_chains[prior_chain_idx as usize].append(&mut chain.clone());
|
||||
prior_chain_bytes += this_chain_bytes;
|
||||
|
|
|
@ -68,8 +68,8 @@ async fn test_num_files_over_limit() {
|
|||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(8, CompactionLevel::FileNonOverlapped),
|
||||
(9, CompactionLevel::FileNonOverlapped),
|
||||
(10, CompactionLevel::FileNonOverlapped),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
|
|
@ -746,97 +746,85 @@ async fn random_backfill_over_l2s() {
|
|||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L0.76, L0.77, L0.79, L0.80"
|
||||
- " Creating 8 files"
|
||||
- "**** Simulation run 15, type=compact(ManySmallFiles). 10 Input Files, 200mb total:"
|
||||
- "L0 "
|
||||
- "L0.75[42,356] 1.04us 33mb|-----------L0.75-----------| "
|
||||
- "L0.86[357,357] 1.04us 0b |L0.86| "
|
||||
- "L0.87[358,670] 1.04us 33mb |-----------L0.87-----------| "
|
||||
- "L0.84[671,672] 1.04us 109kb |L0.84| "
|
||||
- "L0.85[673,986] 1.04us 33mb |-----------L0.85-----------| "
|
||||
- "L0.78[42,356] 1.05us 33mb|-----------L0.78-----------| "
|
||||
- "L0.90[357,357] 1.05us 0b |L0.90| "
|
||||
- "L0.91[358,670] 1.05us 33mb |-----------L0.91-----------| "
|
||||
- "L0.88[671,672] 1.05us 109kb |L0.88| "
|
||||
- "L0.89[673,986] 1.05us 33mb |-----------L0.89-----------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 200mb total:"
|
||||
- "L0, all files 200mb "
|
||||
- "L0.?[42,986] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 10 files: L0.75, L0.78, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 16, type=split(HighL0OverlapSingleFile)(split_times=[670]). 1 Input Files, 100mb total:"
|
||||
- "L1, all files 100mb "
|
||||
- "L1.82[358,672] 1.03us |-----------------------------------------L1.82------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[358,670] 1.03us 99mb|-----------------------------------------L1.?------------------------------------------| "
|
||||
- "L1.?[671,672] 1.03us 651kb |L1.?|"
|
||||
- "**** Simulation run 17, type=split(HighL0OverlapSingleFile)(split_times=[356]). 1 Input Files, 100mb total:"
|
||||
- "L1, all files 100mb "
|
||||
- "L1.81[42,357] 1.03us |-----------------------------------------L1.81------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[42,356] 1.03us 100mb|-----------------------------------------L1.?------------------------------------------| "
|
||||
- "L1.?[357,357] 1.03us 325kb |L1.?|"
|
||||
- "**** Simulation run 18, type=split(HighL0OverlapSingleFile)(split_times=[356, 670]). 1 Input Files, 200mb total:"
|
||||
- "L0, all files 200mb "
|
||||
- "L0.92[42,986] 1.05us |-----------------------------------------L0.92------------------------------------------|"
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 200mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[42,356] 1.05us 67mb |-----------L0.?------------| "
|
||||
- "L0.?[357,670] 1.05us 66mb |-----------L0.?------------| "
|
||||
- "L0.?[671,986] 1.05us 67mb |------------L0.?------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L1.81, L1.82, L0.92"
|
||||
- " Creating 7 files"
|
||||
- "**** Simulation run 19, type=split(ReduceOverlap)(split_times=[672]). 1 Input Files, 67mb total:"
|
||||
- "**** Simulation run 15, type=compact(ManySmallFiles). 2 Input Files, 67mb total:"
|
||||
- "L0, all files 33mb "
|
||||
- "L0.75[42,356] 1.04us |-----------------------------------------L0.75------------------------------------------|"
|
||||
- "L0.78[42,356] 1.05us |-----------------------------------------L0.78------------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
||||
- "L0, all files 67mb "
|
||||
- "L0.99[671,986] 1.05us |-----------------------------------------L0.99------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[671,672] 1.05us 218kb|L0.?| "
|
||||
- "L0.?[673,986] 1.05us 67mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 20, type=split(ReduceOverlap)(split_times=[357]). 1 Input Files, 66mb total:"
|
||||
- "L0, all files 66mb "
|
||||
- "L0.98[357,670] 1.05us |-----------------------------------------L0.98------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[357,357] 1.05us 0b |L0.?| "
|
||||
- "L0.?[358,670] 1.05us 66mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "L0.?[42,356] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.98, L0.99"
|
||||
- " Creating 4 files"
|
||||
- "**** Simulation run 21, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[232]). 4 Input Files, 167mb total:"
|
||||
- " Soft Deleting 2 files: L0.75, L0.78"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 16, type=compact(ManySmallFiles). 2 Input Files, 66mb total:"
|
||||
- "L0, all files 33mb "
|
||||
- "L0.87[358,670] 1.04us |-----------------------------------------L0.87------------------------------------------|"
|
||||
- "L0.91[358,670] 1.05us |-----------------------------------------L0.91------------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
||||
- "L0, all files 66mb "
|
||||
- "L0.?[358,670] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.87, L0.91"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 17, type=compact(ManySmallFiles). 2 Input Files, 218kb total:"
|
||||
- "L0, all files 109kb "
|
||||
- "L0.84[671,672] 1.04us |-----------------------------------------L0.84------------------------------------------|"
|
||||
- "L0.88[671,672] 1.05us |-----------------------------------------L0.88------------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 218kb total:"
|
||||
- "L0, all files 218kb "
|
||||
- "L0.?[671,672] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.84, L0.88"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 18, type=compact(ManySmallFiles). 2 Input Files, 67mb total:"
|
||||
- "L0, all files 33mb "
|
||||
- "L0.85[673,986] 1.04us |-----------------------------------------L0.85------------------------------------------|"
|
||||
- "L0.89[673,986] 1.05us |-----------------------------------------L0.89------------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
||||
- "L0, all files 67mb "
|
||||
- "L0.?[673,986] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.85, L0.89"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 19, type=compact(ManySmallFiles). 2 Input Files, 0b total:"
|
||||
- "L0, all files 0b "
|
||||
- "L0.86[357,357] 1.04us |-----------------------------------------L0.86------------------------------------------|"
|
||||
- "L0.90[357,357] 1.05us |-----------------------------------------L0.90------------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 0b total:"
|
||||
- "L0, all files 0b "
|
||||
- "L0.?[357,357] 1.05us |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.86, L0.90"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 20, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[232]). 3 Input Files, 167mb total:"
|
||||
- "L0 "
|
||||
- "L0.97[42,356] 1.05us 67mb|-----------------------------------------L0.97-----------------------------------------| "
|
||||
- "L0.102[357,357] 1.05us 0b |L0.102|"
|
||||
- "L0.92[42,356] 1.05us 67mb|-----------------------------------------L0.92-----------------------------------------| "
|
||||
- "L0.96[357,357] 1.05us 0b |L0.96|"
|
||||
- "L1 "
|
||||
- "L1.95[42,356] 1.03us 100mb|-----------------------------------------L1.95-----------------------------------------| "
|
||||
- "L1.96[357,357] 1.03us 325kb |L1.96|"
|
||||
- "L1.81[42,357] 1.03us 100mb|-----------------------------------------L1.81------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 167mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[42,232] 1.05us 101mb|------------------------L1.?------------------------| "
|
||||
- "L1.?[233,357] 1.05us 66mb |--------------L1.?---------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L1.95, L1.96, L0.97, L0.102"
|
||||
- " Soft Deleting 3 files: L1.81, L0.92, L0.96"
|
||||
- " Creating 2 files"
|
||||
- "**** Simulation run 22, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[547]). 4 Input Files, 166mb total:"
|
||||
- "**** Simulation run 21, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[547]). 3 Input Files, 166mb total:"
|
||||
- "L0 "
|
||||
- "L0.103[358,670] 1.05us 66mb|----------------------------------------L0.103-----------------------------------------| "
|
||||
- "L0.100[671,672] 1.05us 218kb |L0.100|"
|
||||
- "L0.93[358,670] 1.05us 66mb|-----------------------------------------L0.93-----------------------------------------| "
|
||||
- "L0.94[671,672] 1.05us 218kb |L0.94|"
|
||||
- "L1 "
|
||||
- "L1.93[358,670] 1.03us 99mb|-----------------------------------------L1.93-----------------------------------------| "
|
||||
- "L1.94[671,672] 1.03us 651kb |L1.94|"
|
||||
- "L1.82[358,672] 1.03us 100mb|-----------------------------------------L1.82------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 166mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[358,547] 1.05us 100mb|------------------------L1.?------------------------| "
|
||||
- "L1.?[548,672] 1.05us 66mb |--------------L1.?---------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L1.93, L1.94, L0.100, L0.103"
|
||||
- " Soft Deleting 3 files: L1.82, L0.93, L0.94"
|
||||
- " Creating 2 files"
|
||||
- "**** Simulation run 23, type=split(CompactAndSplitOutput(TotalSizeLessThanMaxCompactSize))(split_times=[861]). 2 Input Files, 167mb total:"
|
||||
- "**** Simulation run 22, type=split(CompactAndSplitOutput(TotalSizeLessThanMaxCompactSize))(split_times=[861]). 2 Input Files, 167mb total:"
|
||||
- "L0 "
|
||||
- "L0.101[673,986] 1.05us 67mb|-----------------------------------------L0.101-----------------------------------------|"
|
||||
- "L0.95[673,986] 1.05us 67mb|-----------------------------------------L0.95------------------------------------------|"
|
||||
- "L1 "
|
||||
- "L1.83[673,986] 1.03us 100mb|-----------------------------------------L1.83------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 167mb total:"
|
||||
|
@ -844,60 +832,60 @@ async fn random_backfill_over_l2s() {
|
|||
- "L1.?[673,861] 1.05us 100mb|------------------------L1.?------------------------| "
|
||||
- "L1.?[862,986] 1.05us 67mb |--------------L1.?---------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L1.83, L0.101"
|
||||
- " Soft Deleting 2 files: L1.83, L0.95"
|
||||
- " Creating 2 files"
|
||||
- "**** Simulation run 24, type=split(ReduceOverlap)(split_times=[399, 499]). 1 Input Files, 100mb total:"
|
||||
- "**** Simulation run 23, type=split(ReduceOverlap)(split_times=[399, 499]). 1 Input Files, 100mb total:"
|
||||
- "L1, all files 100mb "
|
||||
- "L1.106[358,547] 1.05us |-----------------------------------------L1.106-----------------------------------------|"
|
||||
- "L1.99[358,547] 1.05us |-----------------------------------------L1.99------------------------------------------|"
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[358,399] 1.05us 22mb|------L1.?-------| "
|
||||
- "L1.?[400,499] 1.05us 52mb |--------------------L1.?---------------------| "
|
||||
- "L1.?[500,547] 1.05us 26mb |--------L1.?--------| "
|
||||
- "**** Simulation run 25, type=split(ReduceOverlap)(split_times=[299]). 1 Input Files, 66mb total:"
|
||||
- "**** Simulation run 24, type=split(ReduceOverlap)(split_times=[299]). 1 Input Files, 66mb total:"
|
||||
- "L1, all files 66mb "
|
||||
- "L1.105[233,357] 1.05us |-----------------------------------------L1.105-----------------------------------------|"
|
||||
- "L1.98[233,357] 1.05us |-----------------------------------------L1.98------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[233,299] 1.05us 35mb|--------------------L1.?---------------------| "
|
||||
- "L1.?[300,357] 1.05us 31mb |-----------------L1.?------------------| "
|
||||
- "**** Simulation run 26, type=split(ReduceOverlap)(split_times=[99, 199]). 1 Input Files, 101mb total:"
|
||||
- "**** Simulation run 25, type=split(ReduceOverlap)(split_times=[99, 199]). 1 Input Files, 101mb total:"
|
||||
- "L1, all files 101mb "
|
||||
- "L1.104[42,232] 1.05us |-----------------------------------------L1.104-----------------------------------------|"
|
||||
- "L1.97[42,232] 1.05us |-----------------------------------------L1.97------------------------------------------|"
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 101mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[42,99] 1.05us 30mb |----------L1.?-----------| "
|
||||
- "L1.?[100,199] 1.05us 52mb |--------------------L1.?--------------------| "
|
||||
- "L1.?[200,232] 1.05us 18mb |----L1.?-----| "
|
||||
- "**** Simulation run 27, type=split(ReduceOverlap)(split_times=[599]). 1 Input Files, 66mb total:"
|
||||
- "**** Simulation run 26, type=split(ReduceOverlap)(split_times=[599]). 1 Input Files, 66mb total:"
|
||||
- "L1, all files 66mb "
|
||||
- "L1.107[548,672] 1.05us |-----------------------------------------L1.107-----------------------------------------|"
|
||||
- "L1.100[548,672] 1.05us |-----------------------------------------L1.100-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 66mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[548,599] 1.05us 27mb|---------------L1.?----------------| "
|
||||
- "L1.?[600,672] 1.05us 39mb |-----------------------L1.?-----------------------| "
|
||||
- "**** Simulation run 28, type=split(ReduceOverlap)(split_times=[899]). 1 Input Files, 67mb total:"
|
||||
- "**** Simulation run 27, type=split(ReduceOverlap)(split_times=[899]). 1 Input Files, 67mb total:"
|
||||
- "L1, all files 67mb "
|
||||
- "L1.109[862,986] 1.05us |-----------------------------------------L1.109-----------------------------------------|"
|
||||
- "L1.102[862,986] 1.05us |-----------------------------------------L1.102-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 67mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[862,899] 1.05us 20mb|----------L1.?----------| "
|
||||
- "L1.?[900,986] 1.05us 47mb |----------------------------L1.?----------------------------| "
|
||||
- "**** Simulation run 29, type=split(ReduceOverlap)(split_times=[699, 799]). 1 Input Files, 100mb total:"
|
||||
- "**** Simulation run 28, type=split(ReduceOverlap)(split_times=[699, 799]). 1 Input Files, 100mb total:"
|
||||
- "L1, all files 100mb "
|
||||
- "L1.108[673,861] 1.05us |-----------------------------------------L1.108-----------------------------------------|"
|
||||
- "L1.101[673,861] 1.05us |-----------------------------------------L1.101-----------------------------------------|"
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 100mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[673,699] 1.05us 14mb|---L1.?---| "
|
||||
- "L1.?[700,799] 1.05us 53mb |--------------------L1.?---------------------| "
|
||||
- "L1.?[800,861] 1.05us 34mb |-----------L1.?------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 6 files: L1.104, L1.105, L1.106, L1.107, L1.108, L1.109"
|
||||
- " Soft Deleting 6 files: L1.97, L1.98, L1.99, L1.100, L1.101, L1.102"
|
||||
- " Creating 15 files"
|
||||
- "**** Simulation run 30, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[71, 142]). 4 Input Files, 283mb total:"
|
||||
- "**** Simulation run 29, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[71, 142]). 4 Input Files, 283mb total:"
|
||||
- "L1 "
|
||||
- "L1.115[42,99] 1.05us 30mb |--------L1.115---------| "
|
||||
- "L1.116[100,199] 1.05us 52mb |------------------L1.116------------------| "
|
||||
- "L1.108[42,99] 1.05us 30mb |--------L1.108---------| "
|
||||
- "L1.109[100,199] 1.05us 52mb |------------------L1.109------------------| "
|
||||
- "L2 "
|
||||
- "L2.1[0,99] 99ns 100mb |-------------------L2.1-------------------| "
|
||||
- "L2.2[100,199] 199ns 100mb |-------------------L2.2-------------------| "
|
||||
|
@ -907,13 +895,13 @@ async fn random_backfill_over_l2s() {
|
|||
- "L2.?[72,142] 1.05us 99mb |------------L2.?-------------| "
|
||||
- "L2.?[143,199] 1.05us 82mb |---------L2.?----------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L2.1, L2.2, L1.115, L1.116"
|
||||
- " Soft Deleting 4 files: L2.1, L2.2, L1.108, L1.109"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 31, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[271, 342]). 5 Input Files, 284mb total:"
|
||||
- "**** Simulation run 30, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[271, 342]). 5 Input Files, 284mb total:"
|
||||
- "L1 "
|
||||
- "L1.117[200,232] 1.05us 18mb|---L1.117---| "
|
||||
- "L1.113[233,299] 1.05us 35mb |----------L1.113-----------| "
|
||||
- "L1.114[300,357] 1.05us 31mb |--------L1.114---------| "
|
||||
- "L1.110[200,232] 1.05us 18mb|---L1.110---| "
|
||||
- "L1.106[233,299] 1.05us 35mb |----------L1.106-----------| "
|
||||
- "L1.107[300,357] 1.05us 31mb |--------L1.107---------| "
|
||||
- "L2 "
|
||||
- "L2.3[200,299] 299ns 100mb|-------------------L2.3-------------------| "
|
||||
- "L2.4[300,399] 399ns 100mb |-------------------L2.4-------------------| "
|
||||
|
@ -923,14 +911,14 @@ async fn random_backfill_over_l2s() {
|
|||
- "L2.?[272,342] 1.05us 100mb |------------L2.?-------------| "
|
||||
- "L2.?[343,399] 1.05us 83mb |---------L2.?----------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 5 files: L2.3, L2.4, L1.113, L1.114, L1.117"
|
||||
- " Soft Deleting 5 files: L2.3, L2.4, L1.106, L1.107, L1.110"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 32, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[404, 465]). 4 Input Files, 257mb total:"
|
||||
- "**** Simulation run 31, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[404, 465]). 4 Input Files, 257mb total:"
|
||||
- "L1 "
|
||||
- "L1.110[358,399] 1.05us 22mb |-------L1.110--------| "
|
||||
- "L1.111[400,499] 1.05us 52mb |------------------------L1.111-------------------------| "
|
||||
- "L1.103[358,399] 1.05us 22mb |-------L1.103--------| "
|
||||
- "L1.104[400,499] 1.05us 52mb |------------------------L1.104-------------------------| "
|
||||
- "L2 "
|
||||
- "L2.130[343,399] 1.05us 83mb|------------L2.130------------| "
|
||||
- "L2.123[343,399] 1.05us 83mb|------------L2.123------------| "
|
||||
- "L2.5[400,499] 499ns 100mb |-------------------------L2.5--------------------------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 257mb total:"
|
||||
- "L2 "
|
||||
|
@ -938,13 +926,13 @@ async fn random_backfill_over_l2s() {
|
|||
- "L2.?[405,465] 1.05us 99mb |--------------L2.?--------------| "
|
||||
- "L2.?[466,499] 1.05us 58mb |------L2.?-------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L2.5, L1.110, L1.111, L2.130"
|
||||
- " Soft Deleting 4 files: L2.5, L1.103, L1.104, L2.123"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 33, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[569, 638]). 5 Input Files, 292mb total:"
|
||||
- "**** Simulation run 32, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[569, 638]). 5 Input Files, 292mb total:"
|
||||
- "L1 "
|
||||
- "L1.112[500,547] 1.05us 26mb|------L1.112-------| "
|
||||
- "L1.118[548,599] 1.05us 27mb |-------L1.118--------| "
|
||||
- "L1.119[600,672] 1.05us 39mb |------------L1.119------------| "
|
||||
- "L1.105[500,547] 1.05us 26mb|------L1.105-------| "
|
||||
- "L1.111[548,599] 1.05us 27mb |-------L1.111--------| "
|
||||
- "L1.112[600,672] 1.05us 39mb |------------L1.112------------| "
|
||||
- "L2 "
|
||||
- "L2.6[500,599] 599ns 100mb|-------------------L2.6-------------------| "
|
||||
- "L2.7[600,699] 699ns 100mb |-------------------L2.7-------------------| "
|
||||
|
@ -954,14 +942,14 @@ async fn random_backfill_over_l2s() {
|
|||
- "L2.?[570,638] 1.05us 100mb |------------L2.?------------| "
|
||||
- "L2.?[639,699] 1.05us 91mb |----------L2.?-----------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 5 files: L2.6, L2.7, L1.112, L1.118, L1.119"
|
||||
- " Soft Deleting 5 files: L2.6, L2.7, L1.105, L1.111, L1.112"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 34, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[702, 765]). 4 Input Files, 258mb total:"
|
||||
- "**** Simulation run 33, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[702, 765]). 4 Input Files, 258mb total:"
|
||||
- "L1 "
|
||||
- "L1.122[673,699] 1.05us 14mb |---L1.122---| "
|
||||
- "L1.123[700,799] 1.05us 53mb |-----------------------L1.123------------------------| "
|
||||
- "L1.115[673,699] 1.05us 14mb |---L1.115---| "
|
||||
- "L1.116[700,799] 1.05us 53mb |-----------------------L1.116------------------------| "
|
||||
- "L2 "
|
||||
- "L2.136[639,699] 1.05us 91mb|------------L2.136-------------| "
|
||||
- "L2.129[639,699] 1.05us 91mb|------------L2.129-------------| "
|
||||
- "L2.8[700,799] 799ns 100mb |------------------------L2.8-------------------------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 258mb total:"
|
||||
- "L2 "
|
||||
|
@ -969,12 +957,12 @@ async fn random_backfill_over_l2s() {
|
|||
- "L2.?[703,765] 1.05us 100mb |--------------L2.?--------------| "
|
||||
- "L2.?[766,799] 1.05us 56mb |------L2.?------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L2.8, L1.122, L1.123, L2.136"
|
||||
- " Soft Deleting 4 files: L2.8, L1.115, L1.116, L2.129"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 35, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[865]). 3 Input Files, 154mb total:"
|
||||
- "**** Simulation run 34, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[865]). 3 Input Files, 154mb total:"
|
||||
- "L1 "
|
||||
- "L1.124[800,861] 1.05us 34mb|-----------------------L1.124------------------------| "
|
||||
- "L1.120[862,899] 1.05us 20mb |------------L1.120-------------| "
|
||||
- "L1.117[800,861] 1.05us 34mb|-----------------------L1.117------------------------| "
|
||||
- "L1.113[862,899] 1.05us 20mb |------------L1.113-------------| "
|
||||
- "L2 "
|
||||
- "L2.9[800,899] 899ns 100mb|-----------------------------------------L2.9------------------------------------------| "
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 154mb total:"
|
||||
|
@ -982,28 +970,28 @@ async fn random_backfill_over_l2s() {
|
|||
- "L2.?[800,865] 1.05us 101mb|--------------------------L2.?---------------------------| "
|
||||
- "L2.?[866,899] 1.05us 53mb |-----------L2.?------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L2.9, L1.120, L1.124"
|
||||
- " Soft Deleting 3 files: L2.9, L1.113, L1.117"
|
||||
- " Creating 2 files"
|
||||
- "**** Final Output Files (4.58gb written)"
|
||||
- "**** Final Output Files (4.06gb written)"
|
||||
- "L1 "
|
||||
- "L1.121[900,986] 1.05us 47mb |L1.121| "
|
||||
- "L1.114[900,986] 1.05us 47mb |L1.114| "
|
||||
- "L2 "
|
||||
- "L2.10[900,999] 999ns 100mb |L2.10-| "
|
||||
- "L2.125[0,71] 1.05us 101mb|L2.125| "
|
||||
- "L2.126[72,142] 1.05us 99mb |L2.126| "
|
||||
- "L2.127[143,199] 1.05us 82mb |L2.127| "
|
||||
- "L2.128[200,271] 1.05us 101mb |L2.128| "
|
||||
- "L2.129[272,342] 1.05us 100mb |L2.129| "
|
||||
- "L2.131[343,404] 1.05us 100mb |L2.131| "
|
||||
- "L2.132[405,465] 1.05us 99mb |L2.132| "
|
||||
- "L2.133[466,499] 1.05us 58mb |L2.133| "
|
||||
- "L2.134[500,569] 1.05us 101mb |L2.134| "
|
||||
- "L2.135[570,638] 1.05us 100mb |L2.135| "
|
||||
- "L2.137[639,702] 1.05us 101mb |L2.137| "
|
||||
- "L2.138[703,765] 1.05us 100mb |L2.138| "
|
||||
- "L2.139[766,799] 1.05us 56mb |L2.139| "
|
||||
- "L2.140[800,865] 1.05us 101mb |L2.140| "
|
||||
- "L2.141[866,899] 1.05us 53mb |L2.141| "
|
||||
- "L2.118[0,71] 1.05us 101mb|L2.118| "
|
||||
- "L2.119[72,142] 1.05us 99mb |L2.119| "
|
||||
- "L2.120[143,199] 1.05us 82mb |L2.120| "
|
||||
- "L2.121[200,271] 1.05us 101mb |L2.121| "
|
||||
- "L2.122[272,342] 1.05us 100mb |L2.122| "
|
||||
- "L2.124[343,404] 1.05us 100mb |L2.124| "
|
||||
- "L2.125[405,465] 1.05us 99mb |L2.125| "
|
||||
- "L2.126[466,499] 1.05us 58mb |L2.126| "
|
||||
- "L2.127[500,569] 1.05us 101mb |L2.127| "
|
||||
- "L2.128[570,638] 1.05us 100mb |L2.128| "
|
||||
- "L2.130[639,702] 1.05us 101mb |L2.130| "
|
||||
- "L2.131[703,765] 1.05us 100mb |L2.131| "
|
||||
- "L2.132[766,799] 1.05us 56mb |L2.132| "
|
||||
- "L2.133[800,865] 1.05us 101mb |L2.133| "
|
||||
- "L2.134[866,899] 1.05us 53mb |L2.134| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
@ -3020,63 +3008,66 @@ async fn actual_case_from_catalog_1() {
|
|||
- "WARNING: file L0.161[327,333] 336ns 183mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L0.162[330,338] 340ns 231mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L0.163[331,338] 341ns 232mb exceeds soft limit 100mb by more than 50%"
|
||||
- "**** Final Output Files (17.64gb written)"
|
||||
- "**** Final Output Files (15.47gb written)"
|
||||
- "L2 "
|
||||
- "L2.578[134,149] 342ns 202mb |L2.578| "
|
||||
- "L2.579[150,165] 342ns 218mb |L2.579| "
|
||||
- "L2.580[166,176] 342ns 186mb |L2.580| "
|
||||
- "L2.581[177,182] 342ns 150mb |L2.581| "
|
||||
- "L2.582[183,197] 342ns 267mb |L2.582| "
|
||||
- "L2.583[198,207] 342ns 157mb |L2.583| "
|
||||
- "L2.584[208,220] 342ns 147mb |L2.584| "
|
||||
- "L2.585[221,232] 342ns 270mb |L2.585| "
|
||||
- "L2.588[233,253] 342ns 286mb |L2.588| "
|
||||
- "L2.589[254,270] 342ns 289mb |L2.589| "
|
||||
- "L2.590[271,281] 342ns 225mb |L2.590| "
|
||||
- "L2.591[282,296] 342ns 234mb |L2.591| "
|
||||
- "L2.592[297,302] 342ns 232mb |L2.592| "
|
||||
- "L2.593[303,308] 342ns 244mb |L2.593| "
|
||||
- "L2.594[309,314] 342ns 282mb |L2.594|"
|
||||
- "L2.595[315,317] 342ns 214mb |L2.595|"
|
||||
- "L2.596[318,320] 342ns 222mb |L2.596|"
|
||||
- "L2.597[321,323] 342ns 146mb |L2.597|"
|
||||
- "L2.598[324,326] 342ns 254mb |L2.598|"
|
||||
- "L2.599[327,329] 342ns 197mb |L2.599|"
|
||||
- "L2.600[330,332] 342ns 228mb |L2.600|"
|
||||
- "L2.601[333,335] 342ns 199mb |L2.601|"
|
||||
- "L2.602[336,338] 342ns 280mb |L2.602|"
|
||||
- "L2.850[1,26] 342ns 101mb |L2.850| "
|
||||
- "L2.853[69,85] 342ns 104mb |L2.853| "
|
||||
- "L2.854[86,98] 342ns 107mb |L2.854| "
|
||||
- "L2.861[27,48] 342ns 103mb |L2.861| "
|
||||
- "L2.862[49,68] 342ns 98mb |L2.862| "
|
||||
- "L2.863[99,108] 342ns 102mb |L2.863| "
|
||||
- "L2.864[109,117] 342ns 91mb |L2.864| "
|
||||
- "L2.865[118,124] 342ns 91mb |L2.865| "
|
||||
- "L2.866[125,130] 342ns 107mb |L2.866| "
|
||||
- "L2.867[131,133] 342ns 64mb |L2.867| "
|
||||
- "L2.868[339,339] 342ns 25mb |L2.868|"
|
||||
- "WARNING: file L2.578[134,149] 342ns 202mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.579[150,165] 342ns 218mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.580[166,176] 342ns 186mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.581[177,182] 342ns 150mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.582[183,197] 342ns 267mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.583[198,207] 342ns 157mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.585[221,232] 342ns 270mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.588[233,253] 342ns 286mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.589[254,270] 342ns 289mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.590[271,281] 342ns 225mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.591[282,296] 342ns 234mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.592[297,302] 342ns 232mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.593[303,308] 342ns 244mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.594[309,314] 342ns 282mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.595[315,317] 342ns 214mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.596[318,320] 342ns 222mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.598[324,326] 342ns 254mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.599[327,329] 342ns 197mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.600[330,332] 342ns 228mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.601[333,335] 342ns 199mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.602[336,338] 342ns 280mb exceeds soft limit 100mb by more than 50%"
|
||||
- "L2.594[150,165] 342ns 218mb |L2.594| "
|
||||
- "L2.595[166,171] 342ns 118mb |L2.595| "
|
||||
- "L2.598[183,197] 342ns 267mb |L2.598| "
|
||||
- "L2.599[198,207] 342ns 157mb |L2.599| "
|
||||
- "L2.600[208,220] 342ns 147mb |L2.600| "
|
||||
- "L2.601[221,232] 342ns 270mb |L2.601| "
|
||||
- "L2.602[233,244] 342ns 147mb |L2.602| "
|
||||
- "L2.603[245,253] 342ns 139mb |L2.603| "
|
||||
- "L2.604[271,276] 342ns 117mb |L2.604| "
|
||||
- "L2.605[277,281] 342ns 109mb |L2.605| "
|
||||
- "L2.612[254,261] 342ns 105mb |L2.612| "
|
||||
- "L2.613[262,270] 342ns 184mb |L2.613| "
|
||||
- "L2.616[309,311] 342ns 101mb |L2.616|"
|
||||
- "L2.617[312,314] 342ns 181mb |L2.617|"
|
||||
- "L2.618[315,317] 342ns 214mb |L2.618|"
|
||||
- "L2.619[318,320] 342ns 222mb |L2.619|"
|
||||
- "L2.620[321,323] 342ns 146mb |L2.620|"
|
||||
- "L2.621[324,326] 342ns 254mb |L2.621|"
|
||||
- "L2.622[327,329] 342ns 197mb |L2.622|"
|
||||
- "L2.623[330,332] 342ns 228mb |L2.623|"
|
||||
- "L2.624[333,335] 342ns 199mb |L2.624|"
|
||||
- "L2.625[336,337] 342ns 156mb |L2.625|"
|
||||
- "L2.626[338,338] 342ns 124mb |L2.626|"
|
||||
- "L2.628[1,36] 342ns 103mb |L2.628-| "
|
||||
- "L2.629[37,71] 342ns 103mb |L2.629-| "
|
||||
- "L2.630[72,83] 342ns 103mb |L2.630| "
|
||||
- "L2.638[172,177] 342ns 109mb |L2.638| "
|
||||
- "L2.639[178,182] 342ns 109mb |L2.639| "
|
||||
- "L2.640[282,288] 342ns 100mb |L2.640| "
|
||||
- "L2.643[300,303] 342ns 110mb |L2.643| "
|
||||
- "L2.646[84,94] 342ns 107mb |L2.646| "
|
||||
- "L2.647[95,104] 342ns 97mb |L2.647| "
|
||||
- "L2.648[105,111] 342ns 86mb |L2.648| "
|
||||
- "L2.649[112,119] 342ns 114mb |L2.649| "
|
||||
- "L2.650[120,126] 342ns 98mb |L2.650| "
|
||||
- "L2.651[127,130] 342ns 82mb |L2.651| "
|
||||
- "L2.652[131,138] 342ns 108mb |L2.652| "
|
||||
- "L2.653[139,145] 342ns 93mb |L2.653| "
|
||||
- "L2.654[146,149] 342ns 77mb |L2.654| "
|
||||
- "L2.655[289,293] 342ns 110mb |L2.655| "
|
||||
- "L2.656[294,297] 342ns 82mb |L2.656| "
|
||||
- "L2.657[298,299] 342ns 82mb |L2.657| "
|
||||
- "L2.658[304,306] 342ns 113mb |L2.658| "
|
||||
- "L2.659[307,308] 342ns 113mb |L2.659| "
|
||||
- "L2.660[339,339] 342ns 25mb |L2.660|"
|
||||
- "WARNING: file L2.594[150,165] 342ns 218mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.598[183,197] 342ns 267mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.599[198,207] 342ns 157mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.601[221,232] 342ns 270mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.613[262,270] 342ns 184mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.617[312,314] 342ns 181mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.618[315,317] 342ns 214mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.619[318,320] 342ns 222mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.621[324,326] 342ns 254mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.622[327,329] 342ns 197mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.623[330,332] 342ns 228mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.624[333,335] 342ns 199mb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L2.625[336,337] 342ns 156mb exceeds soft limit 100mb by more than 50%"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
|
@ -4670,17 +4670,17 @@ async fn l0s_almost_needing_vertical_split() {
|
|||
- "L0.998[24,100] 1.02us |-----------------------------------------L0.998-----------------------------------------|"
|
||||
- "L0.999[24,100] 1.02us |-----------------------------------------L0.999-----------------------------------------|"
|
||||
- "L0.1000[24,100] 1.02us |----------------------------------------L0.1000-----------------------------------------|"
|
||||
- "**** Final Output Files (6.5gb written)"
|
||||
- "**** Final Output Files (5.23gb written)"
|
||||
- "L2 "
|
||||
- "L2.3141[24,37] 1.02us 108mb|---L2.3141---| "
|
||||
- "L2.3150[38,49] 1.02us 102mb |--L2.3150--| "
|
||||
- "L2.3151[50,60] 1.02us 93mb |-L2.3151-| "
|
||||
- "L2.3152[61,63] 1.02us 37mb |L2.3152| "
|
||||
- "L2.3153[64,73] 1.02us 101mb |L2.3153-| "
|
||||
- "L2.3154[74,82] 1.02us 90mb |L2.3154| "
|
||||
- "L2.3155[83,90] 1.02us 101mb |L2.3155| "
|
||||
- "L2.3156[91,98] 1.02us 93mb |L2.3156| "
|
||||
- "L2.3157[99,100] 1.02us 26mb |L2.3157|"
|
||||
- "L2.3086[24,35] 1.02us 102mb|--L2.3086--| "
|
||||
- "L2.3095[36,47] 1.02us 105mb |--L2.3095--| "
|
||||
- "L2.3096[48,58] 1.02us 95mb |-L2.3096-| "
|
||||
- "L2.3097[59,65] 1.02us 76mb |L2.3097| "
|
||||
- "L2.3098[66,76] 1.02us 106mb |-L2.3098-| "
|
||||
- "L2.3099[77,86] 1.02us 96mb |L2.3099-| "
|
||||
- "L2.3100[87,90] 1.02us 53mb |L2.3100| "
|
||||
- "L2.3101[91,98] 1.02us 90mb |L2.3101| "
|
||||
- "L2.3102[99,100] 1.02us 26mb |L2.3102|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -78,14 +78,12 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use test_helpers::tracing::TracingCapture;
|
||||
|
||||
use super::*;
|
||||
use crate::commit::mock::{CommitHistoryEntry, MockCommit};
|
||||
use iox_tests::ParquetFileBuilder;
|
||||
use assert_matches::assert_matches;
|
||||
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||
use std::sync::Arc;
|
||||
use test_helpers::tracing::TracingCapture;
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -111,14 +109,21 @@ mod tests {
|
|||
.with_row_count(105)
|
||||
.build();
|
||||
|
||||
let created_1 = ParquetFileBuilder::new(1000).with_partition(1).build();
|
||||
let created_2 = ParquetFileBuilder::new(1001).with_partition(1).build();
|
||||
let partition_id_1 = PartitionId::new(1);
|
||||
let transition_partition_id_1 = partition_identifier(1);
|
||||
|
||||
let created_1 = ParquetFileBuilder::new(1000)
|
||||
.with_partition(transition_partition_id_1.clone())
|
||||
.build();
|
||||
let created_2 = ParquetFileBuilder::new(1001)
|
||||
.with_partition(transition_partition_id_1)
|
||||
.build();
|
||||
|
||||
let capture = TracingCapture::new();
|
||||
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(1),
|
||||
partition_id_1,
|
||||
&[existing_1.clone()],
|
||||
&[],
|
||||
&[created_1.clone().into(), created_2.clone().into()],
|
||||
|
@ -130,9 +135,11 @@ mod tests {
|
|||
Ok(res) if res == vec![ParquetFileId::new(1000), ParquetFileId::new(1001)]
|
||||
);
|
||||
|
||||
let partition_id_2 = PartitionId::new(2);
|
||||
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(2),
|
||||
partition_id_2,
|
||||
&[existing_2.clone(), existing_3.clone()],
|
||||
&[existing_1.clone()],
|
||||
&[],
|
||||
|
@ -151,14 +158,14 @@ level = INFO; message = committed parquet file change; target_level = Final; par
|
|||
inner.history(),
|
||||
vec![
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(1),
|
||||
partition_id: partition_id_1,
|
||||
delete: vec![existing_1.clone()],
|
||||
upgrade: vec![],
|
||||
created: vec![created_1, created_2],
|
||||
target_level: CompactionLevel::Final,
|
||||
},
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(2),
|
||||
partition_id: partition_id_2,
|
||||
delete: vec![existing_2, existing_3],
|
||||
upgrade: vec![existing_1],
|
||||
created: vec![],
|
||||
|
|
|
@ -303,15 +303,12 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use metric::{assert_histogram, Attributes};
|
||||
|
||||
use crate::commit::mock::{CommitHistoryEntry, MockCommit};
|
||||
use iox_tests::ParquetFileBuilder;
|
||||
|
||||
use super::*;
|
||||
use crate::commit::mock::{CommitHistoryEntry, MockCommit};
|
||||
use assert_matches::assert_matches;
|
||||
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||
use metric::{assert_histogram, Attributes};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -326,6 +323,9 @@ mod tests {
|
|||
let inner = Arc::new(MockCommit::new());
|
||||
let commit = MetricsCommitWrapper::new(Arc::clone(&inner), ®istry);
|
||||
|
||||
let partition_id_1 = PartitionId::new(1);
|
||||
let transition_partition_id_1 = partition_identifier(1);
|
||||
|
||||
let existing_1 = ParquetFileBuilder::new(1)
|
||||
.with_file_size_bytes(10_001)
|
||||
.with_row_count(1_001)
|
||||
|
@ -350,7 +350,7 @@ mod tests {
|
|||
let created = ParquetFileBuilder::new(1000)
|
||||
.with_file_size_bytes(10_016)
|
||||
.with_row_count(1_016)
|
||||
.with_partition(1)
|
||||
.with_partition(transition_partition_id_1)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.build();
|
||||
|
||||
|
@ -392,7 +392,7 @@ mod tests {
|
|||
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(1),
|
||||
partition_id_1,
|
||||
&[existing_1.clone()],
|
||||
&[existing_2a.clone()],
|
||||
&[created.clone().into()],
|
||||
|
@ -401,9 +401,11 @@ mod tests {
|
|||
.await;
|
||||
assert_matches!(ids, Ok(res) if res == vec![ParquetFileId::new(1000)]);
|
||||
|
||||
let partition_id_2 = PartitionId::new(2);
|
||||
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(2),
|
||||
partition_id_2,
|
||||
&[existing_2b.clone(), existing_3.clone()],
|
||||
&[existing_4.clone()],
|
||||
&[],
|
||||
|
@ -449,14 +451,14 @@ mod tests {
|
|||
inner.history(),
|
||||
vec![
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(1),
|
||||
partition_id: partition_id_1,
|
||||
delete: vec![existing_1],
|
||||
upgrade: vec![existing_2a.clone()],
|
||||
created: vec![created],
|
||||
target_level: CompactionLevel::FileNonOverlapped,
|
||||
},
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(2),
|
||||
partition_id: partition_id_2,
|
||||
delete: vec![existing_2b, existing_3],
|
||||
upgrade: vec![existing_4],
|
||||
created: vec![],
|
||||
|
|
|
@ -78,10 +78,9 @@ impl Commit for MockCommit {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use assert_matches::assert_matches;
|
||||
use iox_tests::ParquetFileBuilder;
|
||||
|
||||
use super::*;
|
||||
use assert_matches::assert_matches;
|
||||
use iox_tests::{partition_identifier, ParquetFileBuilder};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -92,6 +91,11 @@ mod tests {
|
|||
async fn test_commit() {
|
||||
let commit = MockCommit::new();
|
||||
|
||||
let partition_id_1 = PartitionId::new(1);
|
||||
let transition_partition_id_1 = partition_identifier(1);
|
||||
let partition_id_2 = PartitionId::new(2);
|
||||
let transition_partition_id_2 = partition_identifier(2);
|
||||
|
||||
let existing_1 = ParquetFileBuilder::new(1).build();
|
||||
let existing_2 = ParquetFileBuilder::new(2).build();
|
||||
let existing_3 = ParquetFileBuilder::new(3).build();
|
||||
|
@ -101,14 +105,22 @@ mod tests {
|
|||
let existing_7 = ParquetFileBuilder::new(7).build();
|
||||
let existing_8 = ParquetFileBuilder::new(8).build();
|
||||
|
||||
let created_1_1 = ParquetFileBuilder::new(1000).with_partition(1).build();
|
||||
let created_1_2 = ParquetFileBuilder::new(1001).with_partition(1).build();
|
||||
let created_1_3 = ParquetFileBuilder::new(1003).with_partition(1).build();
|
||||
let created_2_1 = ParquetFileBuilder::new(1002).with_partition(2).build();
|
||||
let created_1_1 = ParquetFileBuilder::new(1000)
|
||||
.with_partition(transition_partition_id_1.clone())
|
||||
.build();
|
||||
let created_1_2 = ParquetFileBuilder::new(1001)
|
||||
.with_partition(transition_partition_id_1.clone())
|
||||
.build();
|
||||
let created_1_3 = ParquetFileBuilder::new(1003)
|
||||
.with_partition(transition_partition_id_1)
|
||||
.build();
|
||||
let created_2_1 = ParquetFileBuilder::new(1002)
|
||||
.with_partition(transition_partition_id_2)
|
||||
.build();
|
||||
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(1),
|
||||
partition_id_1,
|
||||
&[existing_1.clone(), existing_2.clone()],
|
||||
&[existing_3.clone(), existing_4.clone()],
|
||||
&[created_1_1.clone().into(), created_1_2.clone().into()],
|
||||
|
@ -122,7 +134,7 @@ mod tests {
|
|||
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(2),
|
||||
partition_id_2,
|
||||
&[existing_3.clone()],
|
||||
&[],
|
||||
&[created_2_1.clone().into()],
|
||||
|
@ -136,7 +148,7 @@ mod tests {
|
|||
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(1),
|
||||
partition_id_1,
|
||||
&[existing_5.clone(), existing_6.clone(), existing_7.clone()],
|
||||
&[],
|
||||
&[created_1_3.clone().into()],
|
||||
|
@ -151,7 +163,7 @@ mod tests {
|
|||
// simulate fill implosion of the file (this may happen w/ delete predicates)
|
||||
let ids = commit
|
||||
.commit(
|
||||
PartitionId::new(1),
|
||||
partition_id_1,
|
||||
&[existing_8.clone()],
|
||||
&[],
|
||||
&[],
|
||||
|
@ -167,28 +179,28 @@ mod tests {
|
|||
commit.history(),
|
||||
vec![
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(1),
|
||||
partition_id: partition_id_1,
|
||||
delete: vec![existing_1, existing_2],
|
||||
upgrade: vec![existing_3.clone(), existing_4.clone()],
|
||||
created: vec![created_1_1, created_1_2],
|
||||
target_level: CompactionLevel::FileNonOverlapped,
|
||||
},
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(2),
|
||||
partition_id: partition_id_2,
|
||||
delete: vec![existing_3],
|
||||
upgrade: vec![],
|
||||
created: vec![created_2_1],
|
||||
target_level: CompactionLevel::Final,
|
||||
},
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(1),
|
||||
partition_id: partition_id_1,
|
||||
delete: vec![existing_5, existing_6, existing_7,],
|
||||
upgrade: vec![],
|
||||
created: vec![created_1_3],
|
||||
target_level: CompactionLevel::FileNonOverlapped,
|
||||
},
|
||||
CommitHistoryEntry {
|
||||
partition_id: PartitionId::new(1),
|
||||
partition_id: partition_id_1,
|
||||
delete: vec![existing_8],
|
||||
upgrade: vec![],
|
||||
created: vec![],
|
||||
|
|
|
@ -4,7 +4,7 @@ use assert_matches::assert_matches;
|
|||
use compactor_scheduler::{
|
||||
create_scheduler, CompactionJob, LocalSchedulerConfig, Scheduler, SchedulerConfig,
|
||||
};
|
||||
use data_types::{ColumnType, ParquetFile, ParquetFileParams, PartitionId};
|
||||
use data_types::{ColumnType, ParquetFile, ParquetFileParams, PartitionId, TransitionPartitionId};
|
||||
use iox_tests::{ParquetFileBuilder, TestCatalog, TestParquetFileBuilder, TestPartition};
|
||||
|
||||
mod end_job;
|
||||
|
@ -65,7 +65,7 @@ impl TestLocalScheduler {
|
|||
|
||||
pub async fn create_params_for_new_parquet_file(&self) -> ParquetFileParams {
|
||||
ParquetFileBuilder::new(42)
|
||||
.with_partition(self.get_partition_id().get())
|
||||
.with_partition(self.get_transition_partition_id())
|
||||
.build()
|
||||
.into()
|
||||
}
|
||||
|
@ -81,4 +81,8 @@ impl TestLocalScheduler {
|
|||
pub fn get_partition_id(&self) -> PartitionId {
|
||||
self.test_partition.partition.id
|
||||
}
|
||||
|
||||
pub fn get_transition_partition_id(&self) -> TransitionPartitionId {
|
||||
self.test_partition.partition.transition_partition_id()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -202,8 +202,7 @@ impl SimulatedFile {
|
|||
ParquetFileParams {
|
||||
namespace_id: partition_info.namespace_id,
|
||||
table_id: partition_info.table.id,
|
||||
partition_id: partition_info.partition_id,
|
||||
partition_hash_id: partition_info.partition_hash_id.clone(),
|
||||
partition_id: partition_info.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
min_time,
|
||||
max_time,
|
||||
|
|
|
@ -527,10 +527,9 @@ pub struct ParquetFile {
|
|||
pub namespace_id: NamespaceId,
|
||||
/// the table
|
||||
pub table_id: TableId,
|
||||
/// the partition
|
||||
pub partition_id: PartitionId,
|
||||
/// the partition hash ID, if generated
|
||||
pub partition_hash_id: Option<PartitionHashId>,
|
||||
/// the partition identifier
|
||||
#[sqlx(flatten)]
|
||||
pub partition_id: TransitionPartitionId,
|
||||
/// the uuid used in the object store path for this file
|
||||
pub object_store_id: Uuid,
|
||||
/// the min timestamp of data in this file
|
||||
|
@ -588,7 +587,6 @@ impl ParquetFile {
|
|||
namespace_id: params.namespace_id,
|
||||
table_id: params.table_id,
|
||||
partition_id: params.partition_id,
|
||||
partition_hash_id: params.partition_hash_id,
|
||||
object_store_id: params.object_store_id,
|
||||
min_time: params.min_time,
|
||||
max_time: params.max_time,
|
||||
|
@ -602,21 +600,9 @@ impl ParquetFile {
|
|||
}
|
||||
}
|
||||
|
||||
/// If this parquet file params will be storing a `PartitionHashId` in the catalog, use that.
|
||||
/// Otherwise, use the database-assigned `PartitionId`.
|
||||
pub fn transition_partition_id(&self) -> TransitionPartitionId {
|
||||
TransitionPartitionId::from((self.partition_id, self.partition_hash_id.as_ref()))
|
||||
}
|
||||
|
||||
/// Estimate the memory consumption of this object and its contents
|
||||
pub fn size(&self) -> usize {
|
||||
std::mem::size_of_val(self)
|
||||
+ self
|
||||
.partition_hash_id
|
||||
.as_ref()
|
||||
.map(|id| id.size() - std::mem::size_of_val(id))
|
||||
.unwrap_or_default()
|
||||
+ self.column_set.size()
|
||||
std::mem::size_of_val(self) + self.partition_id.size() + self.column_set.size()
|
||||
- std::mem::size_of_val(&self.column_set)
|
||||
}
|
||||
|
||||
|
@ -638,10 +624,8 @@ pub struct ParquetFileParams {
|
|||
pub namespace_id: NamespaceId,
|
||||
/// the table
|
||||
pub table_id: TableId,
|
||||
/// the partition
|
||||
pub partition_id: PartitionId,
|
||||
/// the partition hash ID, if generated
|
||||
pub partition_hash_id: Option<PartitionHashId>,
|
||||
/// the partition identifier
|
||||
pub partition_id: TransitionPartitionId,
|
||||
/// the uuid used in the object store path for this file
|
||||
pub object_store_id: Uuid,
|
||||
/// the min timestamp of data in this file
|
||||
|
@ -662,21 +646,12 @@ pub struct ParquetFileParams {
|
|||
pub max_l0_created_at: Timestamp,
|
||||
}
|
||||
|
||||
impl ParquetFileParams {
|
||||
/// If this parquet file params will be storing a `PartitionHashId` in the catalog, use that.
|
||||
/// Otherwise, use the database-assigned `PartitionId`.
|
||||
pub fn transition_partition_id(&self) -> TransitionPartitionId {
|
||||
TransitionPartitionId::from((self.partition_id, self.partition_hash_id.as_ref()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParquetFile> for ParquetFileParams {
|
||||
fn from(value: ParquetFile) -> Self {
|
||||
Self {
|
||||
namespace_id: value.namespace_id,
|
||||
table_id: value.table_id,
|
||||
partition_id: value.partition_id,
|
||||
partition_hash_id: value.partition_hash_id,
|
||||
object_store_id: value.object_store_id,
|
||||
min_time: value.min_time,
|
||||
max_time: value.max_time,
|
||||
|
|
|
@ -31,6 +31,34 @@ impl TransitionPartitionId {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, R> sqlx::FromRow<'a, R> for TransitionPartitionId
|
||||
where
|
||||
R: sqlx::Row,
|
||||
&'static str: sqlx::ColumnIndex<R>,
|
||||
PartitionId: sqlx::decode::Decode<'a, R::Database>,
|
||||
PartitionId: sqlx::types::Type<R::Database>,
|
||||
Option<PartitionHashId>: sqlx::decode::Decode<'a, R::Database>,
|
||||
Option<PartitionHashId>: sqlx::types::Type<R::Database>,
|
||||
{
|
||||
fn from_row(row: &'a R) -> sqlx::Result<Self> {
|
||||
let partition_id: Option<PartitionId> = row.try_get("partition_id")?;
|
||||
let partition_hash_id: Option<PartitionHashId> = row.try_get("partition_hash_id")?;
|
||||
|
||||
let transition_partition_id = match (partition_id, partition_hash_id) {
|
||||
(_, Some(hash_id)) => TransitionPartitionId::Deterministic(hash_id),
|
||||
(Some(id), _) => TransitionPartitionId::Deprecated(id),
|
||||
(None, None) => {
|
||||
return Err(sqlx::Error::ColumnDecode {
|
||||
index: "partition_id".into(),
|
||||
source: "Both partition_id and partition_hash_id were NULL".into(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
Ok(transition_partition_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(PartitionId, Option<&PartitionHashId>)> for TransitionPartitionId {
|
||||
fn from((partition_id, partition_hash_id): (PartitionId, Option<&PartitionHashId>)) -> Self {
|
||||
partition_hash_id
|
||||
|
|
|
@ -267,8 +267,7 @@ mod tests {
|
|||
let parquet_file_params = ParquetFileParams {
|
||||
namespace_id: namespace.id,
|
||||
table_id: partition.table_id,
|
||||
partition_id: partition.id,
|
||||
partition_hash_id: partition.hash_id().cloned(),
|
||||
partition_id: partition.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
min_time: Timestamp::new(1),
|
||||
max_time: Timestamp::new(10),
|
||||
|
@ -298,7 +297,7 @@ mod tests {
|
|||
let location = ParquetFilePath::new(
|
||||
file_in_catalog.namespace_id,
|
||||
file_in_catalog.table_id,
|
||||
&file_in_catalog.transition_partition_id(),
|
||||
&file_in_catalog.partition_id.clone(),
|
||||
file_in_catalog.object_store_id,
|
||||
)
|
||||
.object_store_path();
|
||||
|
@ -376,7 +375,7 @@ mod tests {
|
|||
let location = ParquetFilePath::new(
|
||||
file_in_catalog.namespace_id,
|
||||
file_in_catalog.table_id,
|
||||
&file_in_catalog.transition_partition_id(),
|
||||
&file_in_catalog.partition_id.clone(),
|
||||
file_in_catalog.object_store_id,
|
||||
)
|
||||
.object_store_path();
|
||||
|
@ -469,7 +468,7 @@ mod tests {
|
|||
let loc = ParquetFilePath::new(
|
||||
file_in_catalog.namespace_id,
|
||||
file_in_catalog.table_id,
|
||||
&file_in_catalog.transition_partition_id(),
|
||||
&file_in_catalog.partition_id.clone(),
|
||||
file_in_catalog.object_store_id,
|
||||
)
|
||||
.object_store_path();
|
||||
|
|
|
@ -52,6 +52,7 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
let proto_files = vec![
|
||||
authz_path.join("authz.proto"),
|
||||
catalog_path.join("parquet_file.proto"),
|
||||
catalog_path.join("partition_identifier.proto"),
|
||||
catalog_path.join("service.proto"),
|
||||
compactor_path.join("service.proto"),
|
||||
delete_path.join("service.proto"),
|
||||
|
|
|
@ -2,6 +2,8 @@ syntax = "proto3";
|
|||
package influxdata.iox.catalog.v1;
|
||||
option go_package = "github.com/influxdata/iox/catalog/v1";
|
||||
|
||||
import "influxdata/iox/catalog/v1/partition_identifier.proto";
|
||||
|
||||
message ParquetFile {
|
||||
reserved 7;
|
||||
reserved "min_sequence_number";
|
||||
|
@ -11,6 +13,8 @@ message ParquetFile {
|
|||
reserved "shard_id";
|
||||
reserved 8;
|
||||
reserved "max_sequence_number";
|
||||
reserved 5;
|
||||
reserved "partition_id";
|
||||
|
||||
// the id of the file in the catalog
|
||||
int64 id = 1;
|
||||
|
@ -18,8 +22,9 @@ message ParquetFile {
|
|||
int64 namespace_id = 3;
|
||||
// the table id
|
||||
int64 table_id = 4;
|
||||
// the partition id
|
||||
int64 partition_id = 5;
|
||||
|
||||
PartitionIdentifier partition_identifier = 19;
|
||||
|
||||
// the object store uuid
|
||||
string object_store_id = 6;
|
||||
// the min timestamp of data in this file
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.catalog.v1;
|
||||
option go_package = "github.com/influxdata/iox/catalog/v1";
|
||||
|
||||
message PartitionIdentifier {
|
||||
// Either the catalog-assigned partition ID or the deterministic identifier created from the
|
||||
// table ID and partition key.
|
||||
oneof id {
|
||||
int64 catalog_id = 1;
|
||||
bytes hash_id = 2;
|
||||
}
|
||||
}
|
|
@ -3,6 +3,7 @@ package influxdata.iox.catalog.v1;
|
|||
option go_package = "github.com/influxdata/iox/catalog/v1";
|
||||
|
||||
import "influxdata/iox/catalog/v1/parquet_file.proto";
|
||||
import "influxdata/iox/catalog/v1/partition_identifier.proto";
|
||||
|
||||
service CatalogService {
|
||||
// Get the parquet_file catalog records in the given partition
|
||||
|
@ -19,8 +20,11 @@ service CatalogService {
|
|||
}
|
||||
|
||||
message GetParquetFilesByPartitionIdRequest {
|
||||
// the partition id
|
||||
int64 partition_id = 1;
|
||||
// Was the catalog-assigned partition ID.
|
||||
reserved 1;
|
||||
reserved "partition_id";
|
||||
|
||||
PartitionIdentifier partition_identifier = 2;
|
||||
}
|
||||
|
||||
message GetParquetFilesByPartitionIdResponse {
|
||||
|
@ -35,15 +39,17 @@ message Partition {
|
|||
reserved "sequencer_id";
|
||||
reserved 7;
|
||||
reserved "shard_id";
|
||||
reserved 1;
|
||||
reserved "id";
|
||||
|
||||
// the partition id
|
||||
int64 id = 1;
|
||||
// the table id the partition is in
|
||||
int64 table_id = 3;
|
||||
// the partition key
|
||||
string key = 4;
|
||||
// the sort key for data in parquet files in the partition
|
||||
repeated string array_sort_key = 6;
|
||||
|
||||
PartitionIdentifier identifier = 8;
|
||||
}
|
||||
|
||||
message GetPartitionsByTableIdRequest {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use async_trait::async_trait;
|
||||
use tracing::warn;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
// Re-export the bytes type to ensure upstream users of this crate are
|
||||
// interacting with the same type.
|
||||
|
@ -32,5 +32,7 @@ pub struct NopDispatcher;
|
|||
|
||||
#[async_trait::async_trait]
|
||||
impl Dispatcher for NopDispatcher {
|
||||
async fn dispatch(&self, _payload: crate::Bytes) {}
|
||||
async fn dispatch(&self, _payload: crate::Bytes) {
|
||||
debug!("received no-op message payload");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
use data_types::{PartitionHashId, PartitionId, TransitionPartitionId};
|
||||
use futures_util::TryStreamExt;
|
||||
use influxdb_iox_client::{
|
||||
catalog::{self, generated_types::ParquetFile},
|
||||
catalog::{
|
||||
self,
|
||||
generated_types::{partition_identifier, ParquetFile, PartitionIdentifier},
|
||||
},
|
||||
connection::Connection,
|
||||
store,
|
||||
};
|
||||
use observability_deps::tracing::{debug, info};
|
||||
use std::path::{Path, PathBuf};
|
||||
use thiserror::Error;
|
||||
use tokio::{
|
||||
|
@ -35,10 +38,6 @@ type Result<T, E = ExportError> = std::result::Result<T, E>;
|
|||
pub struct RemoteExporter {
|
||||
catalog_client: catalog::Client,
|
||||
store_client: store::Client,
|
||||
|
||||
/// Optional partition filter. If `Some(partition_id)`, only these
|
||||
/// files with that `partition_id` are downloaded.
|
||||
partition_filter: Option<i64>,
|
||||
}
|
||||
|
||||
impl RemoteExporter {
|
||||
|
@ -46,19 +45,9 @@ impl RemoteExporter {
|
|||
Self {
|
||||
catalog_client: catalog::Client::new(connection.clone()),
|
||||
store_client: store::Client::new(connection),
|
||||
partition_filter: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Specify that only files and metadata for the specific
|
||||
/// partition id should be exported.
|
||||
pub fn with_partition_filter(mut self, partition_id: i64) -> Self {
|
||||
info!(partition_id, "Filtering by partition");
|
||||
|
||||
self.partition_filter = Some(partition_id);
|
||||
self
|
||||
}
|
||||
|
||||
/// Exports all data and metadata for `table_name` in
|
||||
/// `namespace` to local files.
|
||||
///
|
||||
|
@ -95,39 +84,14 @@ impl RemoteExporter {
|
|||
let indexed_parquet_file_metadata = parquet_files.into_iter().enumerate();
|
||||
|
||||
for (index, parquet_file) in indexed_parquet_file_metadata {
|
||||
if self.should_export(parquet_file.partition_id) {
|
||||
self.export_parquet_file(
|
||||
&output_directory,
|
||||
index,
|
||||
num_parquet_files,
|
||||
&parquet_file,
|
||||
)
|
||||
self.export_parquet_file(&output_directory, index, num_parquet_files, &parquet_file)
|
||||
.await?;
|
||||
} else {
|
||||
debug!(
|
||||
"skipping file {} of {num_parquet_files} ({} does not match request)",
|
||||
index + 1,
|
||||
parquet_file.partition_id
|
||||
);
|
||||
}
|
||||
}
|
||||
println!("Done.");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return true if this partition should be exported
|
||||
fn should_export(&self, partition_id: i64) -> bool {
|
||||
self.partition_filter
|
||||
.map(|partition_filter| {
|
||||
// if a partition filter was specified, only export
|
||||
// the file if the partition matches
|
||||
partition_filter == partition_id
|
||||
})
|
||||
// export files if there is no partition
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
/// Exports table and partition information for the specified
|
||||
/// table. Overwrites existing files, if any, to ensure it has the
|
||||
/// latest catalog information.
|
||||
|
@ -158,13 +122,11 @@ impl RemoteExporter {
|
|||
.await?;
|
||||
|
||||
for partition in partitions {
|
||||
let partition_id = partition.id;
|
||||
if self.should_export(partition_id) {
|
||||
let partition_json = serde_json::to_string_pretty(&partition)?;
|
||||
let filename = format!("partition.{partition_id}.json");
|
||||
let file_path = output_directory.join(&filename);
|
||||
write_string_to_file(&partition_json, &file_path).await?;
|
||||
}
|
||||
let partition_id = to_partition_id(partition.identifier.as_ref());
|
||||
let partition_json = serde_json::to_string_pretty(&partition)?;
|
||||
let filename = format!("partition.{partition_id}.json");
|
||||
let file_path = output_directory.join(&filename);
|
||||
write_string_to_file(&partition_json, &file_path).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -183,9 +145,10 @@ impl RemoteExporter {
|
|||
parquet_file: &ParquetFile,
|
||||
) -> Result<()> {
|
||||
let uuid = &parquet_file.object_store_id;
|
||||
let partition_id = parquet_file.partition_id;
|
||||
let file_size_bytes = parquet_file.file_size_bytes as u64;
|
||||
|
||||
let partition_id = to_partition_id(parquet_file.partition_identifier.as_ref());
|
||||
|
||||
// copy out the metadata as pbjson encoded data always (to
|
||||
// ensure we have the most up to date version)
|
||||
{
|
||||
|
@ -230,6 +193,21 @@ impl RemoteExporter {
|
|||
}
|
||||
}
|
||||
|
||||
fn to_partition_id(partition_identifier: Option<&PartitionIdentifier>) -> TransitionPartitionId {
|
||||
match partition_identifier
|
||||
.and_then(|pi| pi.id.as_ref())
|
||||
.expect("Catalog service should send the partition identifier")
|
||||
{
|
||||
partition_identifier::Id::HashId(bytes) => TransitionPartitionId::Deterministic(
|
||||
PartitionHashId::try_from(&bytes[..])
|
||||
.expect("Catalog service should send valid hash_id bytes"),
|
||||
),
|
||||
partition_identifier::Id::CatalogId(id) => {
|
||||
TransitionPartitionId::Deprecated(PartitionId::new(*id))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// writes the contents of a string to a file, overwriting the previous contents, if any
|
||||
async fn write_string_to_file(contents: &str, path: &Path) -> Result<()> {
|
||||
let mut file = OpenOptions::new()
|
||||
|
|
|
@ -7,7 +7,7 @@ use data_types::{
|
|||
NamespacePartitionTemplateOverride, TablePartitionTemplateOverride, PARTITION_BY_DAY_PROTO,
|
||||
},
|
||||
ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceName, NamespaceNameError,
|
||||
ParquetFileParams, Partition, PartitionHashId, Statistics, Table, TableId, Timestamp,
|
||||
ParquetFileParams, Partition, Statistics, Table, TableId, Timestamp,
|
||||
};
|
||||
use generated_types::influxdata::iox::catalog::v1 as proto;
|
||||
// ParquetFile as ProtoParquetFile, Partition as ProtoPartition,
|
||||
|
@ -567,9 +567,6 @@ impl RemoteImporter {
|
|||
// need to make columns in the target catalog
|
||||
let column_set = insert_columns(table.id, decoded_iox_parquet_metadata, repos).await?;
|
||||
|
||||
// Create the the partition_hash_id
|
||||
let partition_hash_id = Some(PartitionHashId::new(table.id, &partition.partition_key));
|
||||
|
||||
let params = if let Some(proto_parquet_file) = &parquet_metadata {
|
||||
let compaction_level = proto_parquet_file
|
||||
.compaction_level
|
||||
|
@ -579,8 +576,7 @@ impl RemoteImporter {
|
|||
ParquetFileParams {
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_hash_id,
|
||||
partition_id: partition.id,
|
||||
partition_id: partition.transition_partition_id(),
|
||||
object_store_id,
|
||||
min_time: Timestamp::new(proto_parquet_file.min_time),
|
||||
max_time: Timestamp::new(proto_parquet_file.max_time),
|
||||
|
@ -599,8 +595,7 @@ impl RemoteImporter {
|
|||
ParquetFileParams {
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_hash_id,
|
||||
partition_id: partition.id,
|
||||
partition_id: partition.transition_partition_id(),
|
||||
object_store_id,
|
||||
min_time,
|
||||
max_time,
|
||||
|
|
|
@ -67,7 +67,7 @@ libc = { version = "0.2" }
|
|||
num_cpus = "1.16.0"
|
||||
once_cell = { version = "1.18", features = ["parking_lot"] }
|
||||
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
|
||||
serde = "1.0.177"
|
||||
serde = "1.0.179"
|
||||
serde_json = "1.0.104"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.7.0"
|
||||
|
|
|
@ -55,10 +55,6 @@ struct GetTable {
|
|||
#[clap(action)]
|
||||
table: String,
|
||||
|
||||
/// If specified, only files from the specified partitions are downloaded
|
||||
#[clap(action, short, long)]
|
||||
partition_id: Option<i64>,
|
||||
|
||||
/// The output directory to use. If not specified, files will be placed in a directory named
|
||||
/// after the table in the current working directory.
|
||||
#[clap(action, short)]
|
||||
|
@ -91,13 +87,9 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
|||
Command::GetTable(GetTable {
|
||||
namespace,
|
||||
table,
|
||||
partition_id,
|
||||
output_directory,
|
||||
}) => {
|
||||
let mut exporter = RemoteExporter::new(connection);
|
||||
if let Some(partition_id) = partition_id {
|
||||
exporter = exporter.with_partition_filter(partition_id);
|
||||
}
|
||||
Ok(exporter
|
||||
.export_table(output_directory, namespace, table)
|
||||
.await?)
|
||||
|
|
|
@ -7,6 +7,7 @@ use clap_blocks::{
|
|||
catalog_dsn::CatalogDsnConfig,
|
||||
compactor::CompactorConfig,
|
||||
compactor_scheduler::CompactorSchedulerConfig,
|
||||
gossip::GossipConfig,
|
||||
ingester::IngesterConfig,
|
||||
ingester_address::IngesterAddress,
|
||||
object_store::{make_object_store, ObjectStoreConfig},
|
||||
|
@ -476,6 +477,7 @@ impl Config {
|
|||
persist_queue_depth,
|
||||
persist_hot_partition_cost,
|
||||
rpc_write_max_incoming_bytes: 1024 * 1024 * 1024, // 1GiB
|
||||
gossip_config: GossipConfig::disabled(),
|
||||
};
|
||||
|
||||
let router_config = RouterConfig {
|
||||
|
@ -489,6 +491,7 @@ impl Config {
|
|||
rpc_write_replicas: 1.try_into().unwrap(),
|
||||
rpc_write_max_outgoing_bytes: ingester_config.rpc_write_max_incoming_bytes,
|
||||
rpc_write_health_error_window_seconds: Duration::from_secs(5),
|
||||
gossip_config: GossipConfig::disabled(),
|
||||
};
|
||||
|
||||
// create a CompactorConfig for the all in one server based on
|
||||
|
@ -637,6 +640,7 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
Arc::clone(&catalog),
|
||||
Arc::clone(&object_store),
|
||||
&router_config,
|
||||
&GossipConfig::disabled(),
|
||||
router_run_config
|
||||
.tracing_config()
|
||||
.traces_jaeger_trace_context_header_name
|
||||
|
|
|
@ -98,6 +98,7 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
catalog,
|
||||
object_store,
|
||||
&config.router_config,
|
||||
&config.router_config.gossip_config,
|
||||
config
|
||||
.run_config
|
||||
.tracing_config()
|
||||
|
|
|
@ -157,10 +157,12 @@ async fn sharded_compactor_0_always_compacts_partition_1() {
|
|||
.assert()
|
||||
.success()
|
||||
.stdout(
|
||||
// Important parts are the expected partition ID
|
||||
predicate::str::contains(r#""partitionId": "1","#)
|
||||
// and compaction level
|
||||
.and(predicate::str::contains(r#""compactionLevel": 1"#)),
|
||||
// Important parts are the expected partition identifier
|
||||
predicate::str::contains(
|
||||
r#""hashId": "uGKn6bMp7mpBjN4ZEZjq6xUSdT8ZuHqB3vKubD0O0jc=""#,
|
||||
)
|
||||
// and compaction level
|
||||
.and(predicate::str::contains(r#""compactionLevel": 1"#)),
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
|
@ -240,10 +242,12 @@ async fn sharded_compactor_1_never_compacts_partition_1() {
|
|||
.assert()
|
||||
.success()
|
||||
.stdout(
|
||||
// Important parts are the expected partition ID
|
||||
predicate::str::contains(r#""partitionId": "1","#)
|
||||
// and compaction level is 0 so it's not returned
|
||||
.and(predicate::str::contains("compactionLevel").not()),
|
||||
// Important parts are the expected partition identifier
|
||||
predicate::str::contains(
|
||||
r#""hashId": "uGKn6bMp7mpBjN4ZEZjq6xUSdT8ZuHqB3vKubD0O0jc=""#,
|
||||
)
|
||||
// and compaction level is 0 so it's not returned
|
||||
.and(predicate::str::contains("compactionLevel").not()),
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
|
|
|
@ -280,10 +280,9 @@ async fn remote_partition_and_get_from_store_and_pull() {
|
|||
.arg("1")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(
|
||||
predicate::str::contains(r#""id": "1""#)
|
||||
.and(predicate::str::contains(r#""partitionId": "1","#)),
|
||||
)
|
||||
.stdout(predicate::str::contains(
|
||||
r#""hashId": "uGKn6bMp7mpBjN4ZEZjq6xUSdT8ZuHqB3vKubD0O0jc=""#,
|
||||
))
|
||||
.get_output()
|
||||
.stdout
|
||||
.clone();
|
||||
|
|
|
@ -29,9 +29,15 @@ impl Client {
|
|||
&mut self,
|
||||
partition_id: i64,
|
||||
) -> Result<Vec<ParquetFile>, Error> {
|
||||
let partition_identifier = PartitionIdentifier {
|
||||
id: Some(partition_identifier::Id::CatalogId(partition_id)),
|
||||
};
|
||||
|
||||
let response = self
|
||||
.inner
|
||||
.get_parquet_files_by_partition_id(GetParquetFilesByPartitionIdRequest { partition_id })
|
||||
.get_parquet_files_by_partition_id(GetParquetFilesByPartitionIdRequest {
|
||||
partition_identifier: Some(partition_identifier),
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(response.into_inner().parquet_files)
|
||||
|
|
|
@ -48,6 +48,7 @@ trace = { version = "0.1.0", path = "../trace" }
|
|||
uuid = "1.4.1"
|
||||
wal = { version = "0.1.0", path = "../wal" }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
gossip = { version = "0.1.0", path = "../gossip" }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.5.0"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
//! Partition level data buffer structures.
|
||||
|
||||
use std::{collections::VecDeque, sync::Arc};
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::{
|
||||
sequence_number_set::SequenceNumberSet, NamespaceId, PartitionHashId, PartitionId,
|
||||
|
@ -8,11 +8,12 @@ use data_types::{
|
|||
};
|
||||
use mutable_batch::MutableBatch;
|
||||
use observability_deps::tracing::*;
|
||||
use schema::sort::SortKey;
|
||||
use schema::{merge::SchemaMerger, sort::SortKey, Schema};
|
||||
|
||||
use self::{
|
||||
buffer::{traits::Queryable, BufferState, DataBuffer, Persisting},
|
||||
buffer::{traits::Queryable, DataBuffer},
|
||||
persisting::{BatchIdent, PersistingData},
|
||||
persisting_list::PersistingList,
|
||||
};
|
||||
use super::{namespace::NamespaceName, table::TableMetadata};
|
||||
use crate::{
|
||||
|
@ -21,6 +22,7 @@ use crate::{
|
|||
|
||||
mod buffer;
|
||||
pub(crate) mod persisting;
|
||||
mod persisting_list;
|
||||
pub(crate) mod resolver;
|
||||
|
||||
/// The load state of the [`SortKey`] for a given partition.
|
||||
|
@ -89,7 +91,7 @@ pub struct PartitionData {
|
|||
///
|
||||
/// The [`BatchIdent`] is a generational counter that is used to tag each
|
||||
/// persisting with a unique, opaque identifier.
|
||||
persisting: VecDeque<(BatchIdent, BufferState<Persisting>)>,
|
||||
persisting: PersistingList,
|
||||
|
||||
/// The number of persist operations started over the lifetime of this
|
||||
/// [`PartitionData`].
|
||||
|
@ -123,7 +125,7 @@ impl PartitionData {
|
|||
table_id,
|
||||
table,
|
||||
buffer: DataBuffer::default(),
|
||||
persisting: VecDeque::with_capacity(1),
|
||||
persisting: PersistingList::default(),
|
||||
started_persistence_count: BatchIdent::default(),
|
||||
completed_persistence_count: 0,
|
||||
}
|
||||
|
@ -169,7 +171,7 @@ impl PartitionData {
|
|||
/// persisting batches, plus 1 for the "hot" buffer. Reading the row count
|
||||
/// of each batch is `O(1)`. This method is expected to be fast.
|
||||
pub(crate) fn rows(&self) -> usize {
|
||||
self.persisting.iter().map(|(_, v)| v.rows()).sum::<usize>() + self.buffer.rows()
|
||||
self.persisting.rows() + self.buffer.rows()
|
||||
}
|
||||
|
||||
/// Return the timestamp min/max values for the data contained within this
|
||||
|
@ -188,11 +190,8 @@ impl PartitionData {
|
|||
/// statistics for each batch is `O(1)`. This method is expected to be fast.
|
||||
pub(crate) fn timestamp_stats(&self) -> Option<TimestampMinMax> {
|
||||
self.persisting
|
||||
.iter()
|
||||
.map(|(_, v)| {
|
||||
v.timestamp_stats()
|
||||
.expect("persisting batches must be non-empty")
|
||||
})
|
||||
.timestamp_stats()
|
||||
.into_iter()
|
||||
.chain(self.buffer.timestamp_stats())
|
||||
.reduce(|acc, v| TimestampMinMax {
|
||||
min: acc.min.min(v.min),
|
||||
|
@ -200,6 +199,30 @@ impl PartitionData {
|
|||
})
|
||||
}
|
||||
|
||||
/// Return the schema of the data currently buffered within this
|
||||
/// [`PartitionData`].
|
||||
///
|
||||
/// This schema is not additive - it is the union of the individual schema
|
||||
/// batches currently buffered and as such columns are removed as the
|
||||
/// individual batches containing those columns are persisted and dropped.
|
||||
pub(crate) fn schema(&self) -> Option<Schema> {
|
||||
if self.persisting.is_empty() && self.buffer.rows() == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(
|
||||
self.persisting
|
||||
.schema()
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.chain(self.buffer.schema())
|
||||
.fold(SchemaMerger::new(), |acc, v| {
|
||||
acc.merge(&v).expect("schemas are incompatible")
|
||||
})
|
||||
.build(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Return all data for this partition, ordered by the calls to
|
||||
/// [`PartitionData::buffer_write()`].
|
||||
pub(crate) fn get_query_data(&mut self, projection: &OwnedProjection) -> Option<QueryAdaptor> {
|
||||
|
@ -213,8 +236,7 @@ impl PartitionData {
|
|||
// existing rows materialise to the correct output.
|
||||
let data = self
|
||||
.persisting
|
||||
.iter()
|
||||
.flat_map(|(_, b)| b.get_query_data(projection))
|
||||
.get_query_data(projection)
|
||||
.chain(buffered_data)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
@ -287,7 +309,7 @@ impl PartitionData {
|
|||
// Increment the "started persist" counter.
|
||||
//
|
||||
// This is used to cheaply identify batches given to the
|
||||
// mark_persisted() call.
|
||||
// mark_persisted() call and ensure monotonicity.
|
||||
let batch_ident = self.started_persistence_count.next();
|
||||
|
||||
debug!(
|
||||
|
@ -310,10 +332,9 @@ impl PartitionData {
|
|||
batch_ident,
|
||||
);
|
||||
|
||||
// Push the new buffer to the back of the persisting queue, so that
|
||||
// iterating from back to front during queries iterates over writes from
|
||||
// oldest to newest.
|
||||
self.persisting.push_back((batch_ident, fsm));
|
||||
// Push the buffer into the persisting list (which maintains batch
|
||||
// order).
|
||||
self.persisting.push(batch_ident, fsm);
|
||||
|
||||
Some(data)
|
||||
}
|
||||
|
@ -328,22 +349,11 @@ impl PartitionData {
|
|||
/// This method panics if [`Self`] is not marked as undergoing a persist
|
||||
/// operation, or `batch` is not currently being persisted.
|
||||
pub(crate) fn mark_persisted(&mut self, batch: PersistingData) -> SequenceNumberSet {
|
||||
// Find the batch in the persisting queue.
|
||||
let idx = self
|
||||
.persisting
|
||||
.iter()
|
||||
.position(|(old, _)| *old == batch.batch_ident())
|
||||
.expect("no currently persisting batch");
|
||||
|
||||
// Remove the batch from the queue, preserving the order of the queue
|
||||
// for batch iteration during queries.
|
||||
let (old_ident, fsm) = self.persisting.remove(idx).unwrap();
|
||||
assert_eq!(old_ident, batch.batch_ident());
|
||||
let fsm = self.persisting.remove(batch.batch_ident());
|
||||
|
||||
self.completed_persistence_count += 1;
|
||||
|
||||
debug!(
|
||||
batch_ident = %old_ident,
|
||||
persistence_count = %self.completed_persistence_count,
|
||||
namespace_id = %self.namespace_id,
|
||||
table_id = %self.table_id,
|
||||
|
|
|
@ -7,7 +7,7 @@ use schema::Projection;
|
|||
///
|
||||
/// A [`Buffer`] can contain no writes.
|
||||
///
|
||||
/// [`BufferState`]: super::super::BufferState
|
||||
/// [`BufferState`]: super::BufferState
|
||||
#[derive(Debug, Default)]
|
||||
pub(super) struct Buffer {
|
||||
buffer: Option<MutableBatch>,
|
||||
|
|
|
@ -77,7 +77,7 @@ pub(crate) struct BufferState<T> {
|
|||
|
||||
impl BufferState<Buffering> {
|
||||
/// Initialise a new buffer state machine.
|
||||
pub(super) fn new() -> Self {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
state: Buffering::default(),
|
||||
sequence_numbers: SequenceNumberSet::default(),
|
||||
|
|
|
@ -2,14 +2,18 @@ use std::fmt::Display;
|
|||
|
||||
use crate::query_adaptor::QueryAdaptor;
|
||||
|
||||
/// An opaque generational identifier of a buffer in a [`PartitionData`].
|
||||
/// An opaque, monotonic generational identifier of a buffer in a
|
||||
/// [`PartitionData`].
|
||||
///
|
||||
/// A [`BatchIdent`] is strictly greater than all those that were obtained
|
||||
/// before it.
|
||||
///
|
||||
/// [`PartitionData`]: super::PartitionData
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
||||
pub(super) struct BatchIdent(u64);
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd)]
|
||||
pub(crate) struct BatchIdent(u64);
|
||||
|
||||
impl BatchIdent {
|
||||
/// Return the next unique value.
|
||||
/// Return the next unique monotonic value.
|
||||
pub(super) fn next(&mut self) -> Self {
|
||||
self.0 += 1;
|
||||
Self(self.0)
|
||||
|
|
|
@ -0,0 +1,467 @@
|
|||
use std::collections::VecDeque;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::TimestampMinMax;
|
||||
use schema::{merge::SchemaMerger, Schema};
|
||||
|
||||
use crate::query::projection::OwnedProjection;
|
||||
|
||||
use super::{
|
||||
buffer::{traits::Queryable, BufferState, Persisting},
|
||||
persisting::BatchIdent,
|
||||
};
|
||||
|
||||
/// An ordered list of buffered, persisting data as [`BufferState<Persisting>`]
|
||||
/// FSM instances.
|
||||
///
|
||||
/// This type maintains a cache of row count & timestamp min/max statistics
|
||||
/// across all persisting batches, and performs incremental computation at
|
||||
/// persist time, moving it out of the query execution path.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct PersistingList {
|
||||
/// The currently persisting [`DataBuffer`] instances, if any.
|
||||
///
|
||||
/// This queue is ordered from newest at the head, to oldest at the tail -
|
||||
/// forward iteration order matches write order.
|
||||
///
|
||||
/// The [`BatchIdent`] is a generational counter that is used to tag each
|
||||
/// persisting with a unique, opaque, monotonic identifier.
|
||||
///
|
||||
/// [`DataBuffer`]: super::buffer::DataBuffer
|
||||
persisting: VecDeque<(BatchIdent, BufferState<Persisting>)>,
|
||||
|
||||
cached: Option<CachedStats>,
|
||||
}
|
||||
|
||||
impl Default for PersistingList {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
persisting: VecDeque::with_capacity(1),
|
||||
cached: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PersistingList {
|
||||
/// Add this `buffer` which was assigned `ident` when marked as persisting
|
||||
/// to the list.
|
||||
///
|
||||
/// This call incrementally recomputes the cached data statistics.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if a batch with a later `ident` has already been added to this
|
||||
/// list - calls MUST push ordered buffers/idents to maintain correct
|
||||
/// ordering of row updates across batches.
|
||||
///
|
||||
/// The provided buffer MUST be non-empty (containing a timestamp column,
|
||||
/// and a schema)
|
||||
pub(crate) fn push(&mut self, ident: BatchIdent, buffer: BufferState<Persisting>) {
|
||||
// Recompute the statistics.
|
||||
match &mut self.cached {
|
||||
Some(v) => v.push(&buffer),
|
||||
None => {
|
||||
// Set the cached stats, as there's no other stats to merge
|
||||
// with, so skip merging schemas.
|
||||
self.cached = Some(CachedStats {
|
||||
rows: buffer.rows(),
|
||||
timestamps: buffer
|
||||
.timestamp_stats()
|
||||
.expect("persisting batch must contain timestamps"),
|
||||
schema: buffer.schema().expect("persisting batch must have schema"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Invariant: the batch being added MUST be ordered strictly after
|
||||
// existing batches.
|
||||
//
|
||||
// The BatchIdent provides this ordering assurance, as it is a monotonic
|
||||
// (opaque) identifier.
|
||||
assert!(self
|
||||
.persisting
|
||||
.back()
|
||||
.map(|(last, _)| ident > *last)
|
||||
.unwrap_or(true));
|
||||
|
||||
self.persisting.push_back((ident, buffer));
|
||||
}
|
||||
|
||||
/// Remove the buffer identified by `ident` from the list.
|
||||
///
|
||||
/// There is no ordering requirement for this call, but is more efficient
|
||||
/// when removals match the order of calls to [`PersistingList::push()`].
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This method panics if there is currently no batch identified by `ident`
|
||||
/// in the list.
|
||||
pub(crate) fn remove(&mut self, ident: BatchIdent) -> BufferState<Persisting> {
|
||||
let idx = self
|
||||
.persisting
|
||||
.iter()
|
||||
.position(|(old, _)| *old == ident)
|
||||
.expect("no currently persisting batch");
|
||||
|
||||
let (old_ident, fsm) = self.persisting.remove(idx).unwrap();
|
||||
assert_eq!(old_ident, ident);
|
||||
|
||||
// Recompute the cache of all remaining persisting batch stats (if any)
|
||||
self.cached = CachedStats::new(self.persisting.iter().map(|(_, v)| v));
|
||||
|
||||
fsm
|
||||
}
|
||||
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
self.persisting.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the row count sum across all batches in this list.
|
||||
///
|
||||
/// This is an `O(1)` operation.
|
||||
pub(crate) fn rows(&self) -> usize {
|
||||
self.cached.as_ref().map(|v| v.rows).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Returns the timestamp min/max values across all batches in this list.
|
||||
///
|
||||
/// This is an `O(1)` operation.
|
||||
pub(crate) fn timestamp_stats(&self) -> Option<TimestampMinMax> {
|
||||
self.cached.as_ref().map(|v| v.timestamps)
|
||||
}
|
||||
|
||||
/// Returns the merged schema of all batches in this list.
|
||||
///
|
||||
/// This is an `O(1)` operation.
|
||||
pub(crate) fn schema(&self) -> Option<&Schema> {
|
||||
self.cached.as_ref().map(|v| &v.schema)
|
||||
}
|
||||
|
||||
/// Returns the [`RecordBatch`] in this list, optionally applying the given
|
||||
/// projection.
|
||||
///
|
||||
/// This is an `O(n)` operation.
|
||||
pub(crate) fn get_query_data<'a, 'b: 'a>(
|
||||
&'a self,
|
||||
projection: &'b OwnedProjection,
|
||||
) -> impl Iterator<Item = RecordBatch> + 'a {
|
||||
self.persisting
|
||||
.iter()
|
||||
.flat_map(move |(_, b)| b.get_query_data(projection))
|
||||
}
|
||||
}
|
||||
|
||||
/// The set of cached statistics describing the batches of data within the
|
||||
/// [`PersistingList`].
|
||||
#[derive(Debug)]
|
||||
struct CachedStats {
|
||||
rows: usize,
|
||||
timestamps: TimestampMinMax,
|
||||
|
||||
/// The merged schema of all the persisting batches.
|
||||
schema: Schema,
|
||||
}
|
||||
|
||||
impl CachedStats {
|
||||
/// Generate a new [`CachedStats`] from an iterator of batches, if any.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If any batches are empty (containing no schema or timestamp column), or
|
||||
/// the batches do not contain compatible schemas, this call panics.
|
||||
fn new<'a, T>(mut iter: T) -> Option<Self>
|
||||
where
|
||||
T: Iterator<Item = &'a BufferState<Persisting>> + 'a,
|
||||
{
|
||||
let v = iter.next()?;
|
||||
|
||||
let mut schema = SchemaMerger::new();
|
||||
schema = schema
|
||||
.merge(&v.schema().expect("persisting batch must be non-empty"))
|
||||
.unwrap();
|
||||
|
||||
let mut rows = v.rows();
|
||||
debug_assert!(rows > 0);
|
||||
|
||||
let mut timestamps = v
|
||||
.timestamp_stats()
|
||||
.expect("unprojected batch should have timestamp");
|
||||
|
||||
for buf in iter {
|
||||
rows += buf.rows();
|
||||
if let Some(v) = buf.schema() {
|
||||
debug_assert!(buf.rows() > 0);
|
||||
|
||||
schema = schema
|
||||
.merge(&v)
|
||||
.expect("persit list contains incompatible schemas");
|
||||
|
||||
let ts = buf
|
||||
.timestamp_stats()
|
||||
.expect("no timestamp for bach containing rows");
|
||||
|
||||
timestamps.min = timestamps.min.min(ts.min);
|
||||
timestamps.max = timestamps.max.max(ts.max);
|
||||
}
|
||||
}
|
||||
|
||||
Some(Self {
|
||||
rows,
|
||||
timestamps,
|
||||
schema: schema.build(),
|
||||
})
|
||||
}
|
||||
|
||||
// Incrementally recompute the cached stats by adding `buffer` to the
|
||||
// statistics.
|
||||
fn push(&mut self, buffer: &BufferState<Persisting>) {
|
||||
// This re-computation below MUST complete - no early exit is allowed or
|
||||
// the stats will be left in an inconsistent state.
|
||||
|
||||
self.rows += buffer.rows();
|
||||
|
||||
let ts = buffer
|
||||
.timestamp_stats()
|
||||
.expect("persisting batch must contain timestamps");
|
||||
|
||||
self.timestamps.min = self.timestamps.min.min(ts.min);
|
||||
self.timestamps.max = self.timestamps.max.max(ts.max);
|
||||
|
||||
let mut schema = SchemaMerger::new();
|
||||
schema = schema.merge(&self.schema).unwrap();
|
||||
schema = schema
|
||||
.merge(&buffer.schema().expect("persisting batch must have schema"))
|
||||
.expect("incompatible schema");
|
||||
self.schema = schema.build()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use arrow_util::assert_batches_eq;
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::SequenceNumber;
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
|
||||
use crate::buffer_tree::partition::buffer::Transition;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// Ensure the ordering of yielded batches matches that of the calls to
|
||||
/// push(), preserving batch ordering, and in turn, causal row ordering.
|
||||
#[test]
|
||||
fn test_batch_ordering() {
|
||||
let mut list = PersistingList::default();
|
||||
let mut ident_oracle = BatchIdent::default();
|
||||
|
||||
assert!(list.is_empty());
|
||||
|
||||
// Generate a buffer with a single row.
|
||||
let buffer = buffer_with_lp(r#"bananas,tag=platanos great="yes" 42"#);
|
||||
|
||||
// Add it to the list.
|
||||
list.push(ident_oracle.next(), buffer);
|
||||
|
||||
// The statistics must now match the expected values.
|
||||
assert!(!list.is_empty());
|
||||
assert_eq!(list.rows(), 1);
|
||||
assert_matches!(
|
||||
list.timestamp_stats(),
|
||||
Some(TimestampMinMax { min: 42, max: 42 })
|
||||
);
|
||||
assert_schema_matches(list.schema().unwrap(), &["time", "great", "tag"]);
|
||||
|
||||
// Assert the row content
|
||||
let data = list
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.collect::<Vec<_>>();
|
||||
let expected = vec![
|
||||
"+-------+----------+--------------------------------+",
|
||||
"| great | tag | time |",
|
||||
"+-------+----------+--------------------------------+",
|
||||
"| yes | platanos | 1970-01-01T00:00:00.000000042Z |",
|
||||
"+-------+----------+--------------------------------+",
|
||||
];
|
||||
assert_eq!(data.len(), 1);
|
||||
assert_batches_eq!(&expected, &data);
|
||||
|
||||
// Push a new buffer updating the last row to check yielded row ordering.
|
||||
let buffer = buffer_with_lp(r#"bananas,tag=platanos great="definitely" 42"#);
|
||||
list.push(ident_oracle.next(), buffer);
|
||||
|
||||
// The statistics must now match the expected values.
|
||||
assert!(!list.is_empty());
|
||||
assert_eq!(list.rows(), 2);
|
||||
assert_matches!(
|
||||
list.timestamp_stats(),
|
||||
Some(TimestampMinMax { min: 42, max: 42 })
|
||||
);
|
||||
assert_schema_matches(list.schema().unwrap(), &["time", "great", "tag"]);
|
||||
|
||||
// Assert the row content
|
||||
let data = list
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.collect::<Vec<_>>();
|
||||
let expected = vec![
|
||||
"+------------+----------+--------------------------------+",
|
||||
"| great | tag | time |",
|
||||
"+------------+----------+--------------------------------+",
|
||||
"| yes | platanos | 1970-01-01T00:00:00.000000042Z |",
|
||||
"| definitely | platanos | 1970-01-01T00:00:00.000000042Z |",
|
||||
"+------------+----------+--------------------------------+",
|
||||
];
|
||||
assert_eq!(data.len(), 2);
|
||||
assert_batches_eq!(&expected, &data);
|
||||
}
|
||||
|
||||
/// Assert projection across batches works, and does not panic when given a
|
||||
/// missing column.
|
||||
#[test]
|
||||
fn test_projection() {
|
||||
let mut list = PersistingList::default();
|
||||
let mut ident_oracle = BatchIdent::default();
|
||||
|
||||
assert!(list.is_empty());
|
||||
|
||||
// Populate the list.
|
||||
list.push(
|
||||
ident_oracle.next(),
|
||||
buffer_with_lp(
|
||||
"\
|
||||
bananas,tag=platanos v=1 42\n\
|
||||
bananas,tag=platanos v=2,bananas=100 4242\n\
|
||||
",
|
||||
),
|
||||
);
|
||||
|
||||
list.push(
|
||||
ident_oracle.next(),
|
||||
buffer_with_lp(
|
||||
"\
|
||||
bananas,tag=platanos v=3 424242\n\
|
||||
bananas v=4,bananas=200 42424242\n\
|
||||
",
|
||||
),
|
||||
);
|
||||
|
||||
// Assert the row content
|
||||
let data = list
|
||||
.get_query_data(&OwnedProjection::from(vec!["time", "tag", "missing"]))
|
||||
.collect::<Vec<_>>();
|
||||
let expected = vec![
|
||||
"+--------------------------------+----------+",
|
||||
"| time | tag |",
|
||||
"+--------------------------------+----------+",
|
||||
"| 1970-01-01T00:00:00.000000042Z | platanos |",
|
||||
"| 1970-01-01T00:00:00.000004242Z | platanos |",
|
||||
"| 1970-01-01T00:00:00.000424242Z | platanos |",
|
||||
"| 1970-01-01T00:00:00.042424242Z | |",
|
||||
"+--------------------------------+----------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &data);
|
||||
}
|
||||
|
||||
/// Validate the cached statistics as batches are added and removed.
|
||||
#[test]
|
||||
fn test_cached_statistics() {
|
||||
let mut list = PersistingList::default();
|
||||
let mut ident_oracle = BatchIdent::default();
|
||||
|
||||
assert!(list.is_empty());
|
||||
|
||||
// Generate a buffer with a single row.
|
||||
let first_batch = ident_oracle.next();
|
||||
list.push(
|
||||
first_batch,
|
||||
buffer_with_lp(r#"bananas,tag=platanos great="yes" 42"#),
|
||||
);
|
||||
|
||||
// The statistics must now match the expected values.
|
||||
assert!(!list.is_empty());
|
||||
assert_eq!(list.rows(), 1);
|
||||
assert_matches!(
|
||||
list.timestamp_stats(),
|
||||
Some(TimestampMinMax { min: 42, max: 42 })
|
||||
);
|
||||
assert_schema_matches(list.schema().unwrap(), &["time", "great", "tag"]);
|
||||
|
||||
// Push another row.
|
||||
let second_batch = ident_oracle.next();
|
||||
list.push(
|
||||
second_batch,
|
||||
buffer_with_lp(r#"bananas,another=yes great="definitely",incremental=true 4242"#),
|
||||
);
|
||||
|
||||
// The statistics must now match the expected values.
|
||||
assert!(!list.is_empty());
|
||||
assert_eq!(list.rows(), 2);
|
||||
assert_matches!(
|
||||
list.timestamp_stats(),
|
||||
Some(TimestampMinMax { min: 42, max: 4242 })
|
||||
);
|
||||
assert_schema_matches(
|
||||
list.schema().unwrap(),
|
||||
&["time", "great", "tag", "another", "incremental"],
|
||||
);
|
||||
|
||||
// Remove the first batch.
|
||||
list.remove(first_batch);
|
||||
|
||||
// The statistics must now match the second batch values.
|
||||
assert!(!list.is_empty());
|
||||
assert_eq!(list.rows(), 1);
|
||||
assert_matches!(
|
||||
list.timestamp_stats(),
|
||||
Some(TimestampMinMax {
|
||||
min: 4242,
|
||||
max: 4242
|
||||
})
|
||||
);
|
||||
assert_schema_matches(
|
||||
list.schema().unwrap(),
|
||||
&["time", "great", "another", "incremental"],
|
||||
);
|
||||
|
||||
// Remove the second/final batch.
|
||||
list.remove(second_batch);
|
||||
|
||||
assert!(list.is_empty());
|
||||
assert_eq!(list.rows(), 0);
|
||||
assert_matches!(list.timestamp_stats(), None);
|
||||
assert_matches!(list.schema(), None);
|
||||
}
|
||||
|
||||
/// Assert the schema columns match the given names.
|
||||
fn assert_schema_matches(schema: &Schema, cols: &[&str]) {
|
||||
let schema = schema.as_arrow();
|
||||
let got = schema
|
||||
.all_fields()
|
||||
.into_iter()
|
||||
.map(|v| v.name().to_owned())
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
let want = cols
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
assert_eq!(got, want);
|
||||
}
|
||||
|
||||
/// Return a persisting buffer containing the given LP content.
|
||||
fn buffer_with_lp(lp: &str) -> BufferState<Persisting> {
|
||||
let mut buffer = BufferState::new();
|
||||
// Write some data to a buffer.
|
||||
buffer
|
||||
.write(lp_to_mutable_batch(lp).1, SequenceNumber::new(0))
|
||||
.expect("write to empty buffer should succeed");
|
||||
|
||||
// Convert the buffer into a persisting snapshot.
|
||||
match buffer.snapshot() {
|
||||
Transition::Ok(v) => v.into_persisting(),
|
||||
Transition::Unchanged(_) => panic!("did not transition to snapshot state"),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,3 +1,5 @@
|
|||
use gossip::{GossipHandle, NopDispatcher};
|
||||
|
||||
/// This needs to be pub for the benchmarks but should not be used outside the crate.
|
||||
#[cfg(feature = "benches")]
|
||||
pub use wal_replay::*;
|
||||
|
@ -5,7 +7,7 @@ pub use wal_replay::*;
|
|||
mod graceful_shutdown;
|
||||
mod wal_replay;
|
||||
|
||||
use std::{path::PathBuf, sync::Arc, time::Duration};
|
||||
use std::{net::SocketAddr, path::PathBuf, sync::Arc, time::Duration};
|
||||
|
||||
use arrow_flight::flight_service_server::FlightService;
|
||||
use backoff::BackoffConfig;
|
||||
|
@ -109,6 +111,9 @@ pub struct IngesterGuard<T> {
|
|||
/// The task handle executing the graceful shutdown once triggered.
|
||||
graceful_shutdown_handler: tokio::task::JoinHandle<()>,
|
||||
shutdown_complete: Shared<oneshot::Receiver<()>>,
|
||||
|
||||
/// An optional handle to the gossip sub-system, if running.
|
||||
gossip_handle: Option<GossipHandle>,
|
||||
}
|
||||
|
||||
impl<T> IngesterGuard<T>
|
||||
|
@ -137,6 +142,27 @@ impl<T> Drop for IngesterGuard<T> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Configuration parameters for the optional gossip sub-system.
|
||||
#[derive(Debug, Default)]
|
||||
pub enum GossipConfig {
|
||||
/// Disable the gossip sub-system.
|
||||
#[default]
|
||||
Disabled,
|
||||
|
||||
/// Enable the gossip sub-system, listening on the specified `bind_addr` and
|
||||
/// using `peers` as the initial peer seed list.
|
||||
Enabled {
|
||||
/// UDP socket address to use for gossip communication.
|
||||
bind_addr: SocketAddr,
|
||||
/// Initial peer seed list in the form of either:
|
||||
///
|
||||
/// - "dns.address.example:port"
|
||||
/// - "10.0.0.1:port"
|
||||
///
|
||||
peers: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Errors that occur during initialisation of an `ingester` instance.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum InitError {
|
||||
|
@ -152,6 +178,10 @@ pub enum InitError {
|
|||
/// An error replaying the entries in the WAL.
|
||||
#[error(transparent)]
|
||||
WalReplay(Box<dyn std::error::Error>),
|
||||
|
||||
/// An error binding the UDP socket for gossip communication.
|
||||
#[error("failed to bind udp gossip socket: {0}")]
|
||||
GossipBind(std::io::Error),
|
||||
}
|
||||
|
||||
/// Initialise a new `ingester` instance, returning the gRPC service handler
|
||||
|
@ -238,6 +268,7 @@ pub async fn new<F>(
|
|||
persist_queue_depth: usize,
|
||||
persist_hot_partition_cost: usize,
|
||||
object_store: ParquetStorage,
|
||||
gossip: GossipConfig,
|
||||
shutdown: F,
|
||||
) -> Result<IngesterGuard<impl IngesterRpcInterface>, InitError>
|
||||
where
|
||||
|
@ -351,11 +382,9 @@ where
|
|||
|
||||
// Initialize disk metrics to emit disk capacity / free statistics for the
|
||||
// WAL directory.
|
||||
let disk_metric_task = tokio::task::spawn(
|
||||
DiskSpaceMetrics::new(wal_directory, &metrics)
|
||||
.expect("failed to resolve WAL directory to disk")
|
||||
.run(),
|
||||
);
|
||||
let (disk_metric_task, _snapshot_rx) = DiskSpaceMetrics::new(wal_directory, &metrics)
|
||||
.expect("failed to resolve WAL directory to disk");
|
||||
let disk_metric_task = tokio::task::spawn(disk_metric_task.run());
|
||||
|
||||
// Replay the WAL log files, if any.
|
||||
let max_sequence_number =
|
||||
|
@ -422,6 +451,23 @@ where
|
|||
wal_reference_handle,
|
||||
));
|
||||
|
||||
// Optionally start the gossip subsystem
|
||||
let gossip_handle = match gossip {
|
||||
GossipConfig::Disabled => {
|
||||
info!("gossip disabled");
|
||||
None
|
||||
}
|
||||
GossipConfig::Enabled { bind_addr, peers } => {
|
||||
// Start the gossip sub-system, which logs during init.
|
||||
let handle =
|
||||
gossip::Builder::new(peers, NopDispatcher::default(), Arc::clone(&metrics))
|
||||
.bind(bind_addr)
|
||||
.await
|
||||
.map_err(InitError::GossipBind)?;
|
||||
Some(handle)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(IngesterGuard {
|
||||
rpc: GrpcDelegate::new(
|
||||
Arc::new(write_path),
|
||||
|
@ -438,5 +484,6 @@ where
|
|||
disk_metric_task,
|
||||
graceful_shutdown_handler: shutdown_task,
|
||||
shutdown_complete: shutdown_rx.shared(),
|
||||
gossip_handle,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -200,6 +200,7 @@
|
|||
unused_crate_dependencies,
|
||||
missing_docs
|
||||
)]
|
||||
#![allow(clippy::default_constructed_unit_structs)]
|
||||
|
||||
// Workaround for "unused crate" lint false positives.
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -2,7 +2,8 @@ use std::{fmt::Debug, sync::Arc, time::Duration};
|
|||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{
|
||||
sequence_number_set::SequenceNumberSet, NamespaceId, ParquetFileParams, PartitionId, TableId,
|
||||
sequence_number_set::SequenceNumberSet, NamespaceId, ParquetFileParams, TableId,
|
||||
TransitionPartitionId,
|
||||
};
|
||||
|
||||
use crate::wal::reference_tracker::WalReferenceHandle;
|
||||
|
@ -54,9 +55,9 @@ impl CompletedPersist {
|
|||
self.meta.table_id
|
||||
}
|
||||
|
||||
/// Returns the [`PartitionId`] of the persisted data.
|
||||
pub(crate) fn partition_id(&self) -> PartitionId {
|
||||
self.meta.partition_id
|
||||
/// Returns the [`TransitionPartitionId`] of the persisted data.
|
||||
pub(crate) fn partition_id(&self) -> &TransitionPartitionId {
|
||||
&self.meta.partition_id
|
||||
}
|
||||
|
||||
/// Returns the [`SequenceNumberSet`] of the persisted data.
|
||||
|
@ -166,15 +167,16 @@ pub(crate) mod mock {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_util::{ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_TABLE_ID};
|
||||
use crate::test_util::{
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, ARBITRARY_TRANSITION_PARTITION_ID,
|
||||
};
|
||||
use data_types::{ColumnId, ColumnSet, SequenceNumber, Timestamp};
|
||||
|
||||
fn arbitrary_file_meta() -> ParquetFileParams {
|
||||
ParquetFileParams {
|
||||
namespace_id: ARBITRARY_NAMESPACE_ID,
|
||||
table_id: ARBITRARY_TABLE_ID,
|
||||
partition_id: ARBITRARY_PARTITION_ID,
|
||||
partition_hash_id: None,
|
||||
partition_id: ARBITRARY_TRANSITION_PARTITION_ID.clone(),
|
||||
object_store_id: Default::default(),
|
||||
min_time: Timestamp::new(42),
|
||||
max_time: Timestamp::new(42),
|
||||
|
@ -226,7 +228,7 @@ mod tests {
|
|||
|
||||
assert_eq!(note.namespace_id(), meta.namespace_id);
|
||||
assert_eq!(note.table_id(), meta.table_id);
|
||||
assert_eq!(note.partition_id(), meta.partition_id);
|
||||
assert_eq!(note.partition_id(), &meta.partition_id);
|
||||
|
||||
assert_eq!(note.column_count(), meta.column_set.len());
|
||||
assert_eq!(note.row_count(), meta.row_count as usize);
|
||||
|
|
|
@ -151,7 +151,9 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::{
|
||||
persist::completion_observer::mock::MockCompletionObserver,
|
||||
test_util::{ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_TABLE_ID},
|
||||
test_util::{
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, ARBITRARY_TRANSITION_PARTITION_ID,
|
||||
},
|
||||
};
|
||||
use data_types::{
|
||||
sequence_number_set::SequenceNumberSet, ColumnId, ColumnSet, ParquetFileParams, Timestamp,
|
||||
|
@ -169,8 +171,7 @@ mod tests {
|
|||
let meta = ParquetFileParams {
|
||||
namespace_id: ARBITRARY_NAMESPACE_ID,
|
||||
table_id: ARBITRARY_TABLE_ID,
|
||||
partition_id: ARBITRARY_PARTITION_ID,
|
||||
partition_hash_id: None,
|
||||
partition_id: ARBITRARY_TRANSITION_PARTITION_ID.clone(),
|
||||
object_store_id: Default::default(),
|
||||
min_time: Timestamp::new(Duration::from_secs(1_000).as_nanos() as _),
|
||||
max_time: Timestamp::new(Duration::from_secs(1_042).as_nanos() as _), // 42 seconds later
|
||||
|
|
|
@ -16,7 +16,7 @@ mod tests {
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{CompactionLevel, ParquetFile, TransitionPartitionId};
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use futures::TryStreamExt;
|
||||
use iox_catalog::{
|
||||
interface::{get_schema_by_id, Catalog, SoftDeletedRows},
|
||||
|
@ -190,7 +190,7 @@ mod tests {
|
|||
// Generate a partition with data
|
||||
let partition = partition_with_write(Arc::clone(&catalog)).await;
|
||||
let table_id = partition.lock().table_id();
|
||||
let partition_id = partition.lock().partition_id();
|
||||
let partition_id = partition.lock().transition_partition_id();
|
||||
let namespace_id = partition.lock().namespace_id();
|
||||
assert_matches!(partition.lock().sort_key(), SortKeyState::Provided(None));
|
||||
|
||||
|
@ -221,7 +221,7 @@ mod tests {
|
|||
assert_matches!(&completion_observer.calls().as_slice(), &[n] => {
|
||||
assert_eq!(n.namespace_id(), namespace_id);
|
||||
assert_eq!(n.table_id(), table_id);
|
||||
assert_eq!(n.partition_id(), partition_id);
|
||||
assert_eq!(n.partition_id(), &partition_id);
|
||||
assert_eq!(n.sequence_numbers().len(), 1);
|
||||
});
|
||||
|
||||
|
@ -243,12 +243,12 @@ mod tests {
|
|||
.repositories()
|
||||
.await
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition_id))
|
||||
.list_by_partition_not_to_delete(&partition_id)
|
||||
.await
|
||||
.expect("query for parquet files failed");
|
||||
|
||||
// Validate a single file was inserted with the expected properties.
|
||||
let (object_store_id, file_size_bytes) = assert_matches!(&*files, &[ParquetFile {
|
||||
let (object_store_id, file_size_bytes) = assert_matches!(&*files, [ParquetFile {
|
||||
namespace_id: got_namespace_id,
|
||||
table_id: got_table_id,
|
||||
partition_id: got_partition_id,
|
||||
|
@ -263,12 +263,12 @@ mod tests {
|
|||
{
|
||||
assert_eq!(created_at.get(), max_l0_created_at.get());
|
||||
|
||||
assert_eq!(got_namespace_id, namespace_id);
|
||||
assert_eq!(got_table_id, table_id);
|
||||
assert_eq!(got_partition_id, partition_id);
|
||||
assert_eq!(got_namespace_id, &namespace_id);
|
||||
assert_eq!(got_table_id, &table_id);
|
||||
assert_eq!(got_partition_id, &partition_id);
|
||||
|
||||
assert_eq!(row_count, 1);
|
||||
assert_eq!(compaction_level, CompactionLevel::Initial);
|
||||
assert_eq!(*row_count, 1);
|
||||
assert_eq!(compaction_level, &CompactionLevel::Initial);
|
||||
|
||||
(object_store_id, file_size_bytes)
|
||||
}
|
||||
|
@ -292,7 +292,7 @@ mod tests {
|
|||
}] => {
|
||||
let want_path = format!("{object_store_id}.parquet");
|
||||
assert!(location.as_ref().ends_with(&want_path));
|
||||
assert_eq!(size, file_size_bytes as usize);
|
||||
assert_eq!(size, *file_size_bytes as usize);
|
||||
}
|
||||
)
|
||||
}
|
||||
|
@ -326,8 +326,7 @@ mod tests {
|
|||
// Generate a partition with data
|
||||
let partition = partition_with_write(Arc::clone(&catalog)).await;
|
||||
let table_id = partition.lock().table_id();
|
||||
let partition_id = partition.lock().partition_id();
|
||||
let transition_partition_id = partition.lock().transition_partition_id();
|
||||
let partition_id = partition.lock().transition_partition_id();
|
||||
let namespace_id = partition.lock().namespace_id();
|
||||
assert_matches!(partition.lock().sort_key(), SortKeyState::Provided(None));
|
||||
|
||||
|
@ -344,7 +343,7 @@ mod tests {
|
|||
.await
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
&transition_partition_id,
|
||||
&partition_id,
|
||||
None,
|
||||
&["bananas", "are", "good", "for", "you"],
|
||||
)
|
||||
|
@ -367,7 +366,7 @@ mod tests {
|
|||
assert_matches!(&completion_observer.calls().as_slice(), &[n] => {
|
||||
assert_eq!(n.namespace_id(), namespace_id);
|
||||
assert_eq!(n.table_id(), table_id);
|
||||
assert_eq!(n.partition_id(), partition_id);
|
||||
assert_eq!(n.partition_id(), &partition_id);
|
||||
assert_eq!(n.sequence_numbers().len(), 1);
|
||||
});
|
||||
|
||||
|
@ -392,12 +391,12 @@ mod tests {
|
|||
.repositories()
|
||||
.await
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition_id))
|
||||
.list_by_partition_not_to_delete(&partition_id)
|
||||
.await
|
||||
.expect("query for parquet files failed");
|
||||
|
||||
// Validate a single file was inserted with the expected properties.
|
||||
let (object_store_id, file_size_bytes) = assert_matches!(&*files, &[ParquetFile {
|
||||
let (object_store_id, file_size_bytes) = assert_matches!(&*files, [ParquetFile {
|
||||
namespace_id: got_namespace_id,
|
||||
table_id: got_table_id,
|
||||
partition_id: got_partition_id,
|
||||
|
@ -412,12 +411,12 @@ mod tests {
|
|||
{
|
||||
assert_eq!(created_at.get(), max_l0_created_at.get());
|
||||
|
||||
assert_eq!(got_namespace_id, namespace_id);
|
||||
assert_eq!(got_table_id, table_id);
|
||||
assert_eq!(got_partition_id, partition_id);
|
||||
assert_eq!(got_namespace_id, &namespace_id);
|
||||
assert_eq!(got_table_id, &table_id);
|
||||
assert_eq!(got_partition_id, &partition_id);
|
||||
|
||||
assert_eq!(row_count, 1);
|
||||
assert_eq!(compaction_level, CompactionLevel::Initial);
|
||||
assert_eq!(*row_count, 1);
|
||||
assert_eq!(compaction_level, &CompactionLevel::Initial);
|
||||
|
||||
(object_store_id, file_size_bytes)
|
||||
}
|
||||
|
@ -438,18 +437,14 @@ mod tests {
|
|||
assert_eq!(files.len(), 2, "expected two uploaded files");
|
||||
|
||||
// Ensure the catalog record points at a valid file in object storage.
|
||||
let want_path = ParquetFilePath::new(
|
||||
namespace_id,
|
||||
table_id,
|
||||
&transition_partition_id,
|
||||
object_store_id,
|
||||
)
|
||||
.object_store_path();
|
||||
let want_path =
|
||||
ParquetFilePath::new(namespace_id, table_id, &partition_id, *object_store_id)
|
||||
.object_store_path();
|
||||
let file = files
|
||||
.into_iter()
|
||||
.find(|f| f.location == want_path)
|
||||
.expect("did not find final file in object storage");
|
||||
|
||||
assert_eq!(file.size, file_size_bytes as usize);
|
||||
assert_eq!(file.size, *file_size_bytes as usize);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,8 @@ pub(crate) mod mock {
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, NamespaceId, ParquetFileParams, PartitionId, TableId, Timestamp,
|
||||
ColumnId, ColumnSet, NamespaceId, ParquetFileParams, PartitionHashId, PartitionKey,
|
||||
TableId, Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
use tokio::task::JoinHandle;
|
||||
|
@ -155,13 +156,16 @@ pub(crate) mod mock {
|
|||
let wait_ms: u64 = rand::random::<u64>() % 100;
|
||||
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
|
||||
let sequence_numbers = partition.lock().mark_persisted(data);
|
||||
let table_id = TableId::new(2);
|
||||
let partition_hash_id =
|
||||
PartitionHashId::new(table_id, &PartitionKey::from("arbitrary"));
|
||||
let partition_id = TransitionPartitionId::Deterministic(partition_hash_id);
|
||||
completion_observer
|
||||
.persist_complete(Arc::new(CompletedPersist::new(
|
||||
ParquetFileParams {
|
||||
namespace_id: NamespaceId::new(1),
|
||||
table_id: TableId::new(2),
|
||||
partition_id: PartitionId::new(3),
|
||||
partition_hash_id: None,
|
||||
table_id,
|
||||
partition_id,
|
||||
object_store_id: Default::default(),
|
||||
min_time: Timestamp::new(42),
|
||||
max_time: Timestamp::new(42),
|
||||
|
|
|
@ -394,8 +394,7 @@ where
|
|||
ParquetFileParams {
|
||||
namespace_id: NamespaceId::new(1),
|
||||
table_id: TableId::new(2),
|
||||
partition_id: PartitionId::new(3),
|
||||
partition_hash_id: None,
|
||||
partition_id: ARBITRARY_TRANSITION_PARTITION_ID.clone(),
|
||||
object_store_id: Default::default(),
|
||||
min_time: Timestamp::new(42),
|
||||
max_time: Timestamp::new(42),
|
||||
|
|
|
@ -30,7 +30,7 @@ use futures::{stream::FuturesUnordered, FutureExt, StreamExt, TryStreamExt};
|
|||
use generated_types::influxdata::iox::ingester::v1::{
|
||||
write_service_server::WriteService, WriteRequest,
|
||||
};
|
||||
use ingester::{IngesterGuard, IngesterRpcInterface};
|
||||
use ingester::{GossipConfig, IngesterGuard, IngesterRpcInterface};
|
||||
use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
|
||||
use iox_catalog::{
|
||||
interface::{Catalog, SoftDeletedRows},
|
||||
|
@ -168,6 +168,7 @@ impl TestContextBuilder {
|
|||
max_persist_queue_depth,
|
||||
persist_hot_partition_cost,
|
||||
storage.clone(),
|
||||
GossipConfig::default(),
|
||||
shutdown_rx.map(|v| v.expect("shutdown sender dropped without calling shutdown")),
|
||||
)
|
||||
.await
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
DROP TRIGGER IF EXISTS update_partition ON parquet_file;
|
||||
|
||||
ALTER TABLE parquet_file
|
||||
ALTER COLUMN partition_id
|
||||
DROP NOT NULL;
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_partition_on_new_file_at()
|
||||
RETURNS TRIGGER
|
||||
LANGUAGE PLPGSQL
|
||||
AS $$
|
||||
BEGIN
|
||||
UPDATE partition
|
||||
SET new_file_at = NEW.created_at
|
||||
WHERE (NEW.partition_id IS NULL OR id = NEW.partition_id)
|
||||
AND (NEW.partition_hash_id IS NULL OR hash_id = NEW.partition_hash_id);
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE TRIGGER update_partition
|
||||
AFTER INSERT ON parquet_file
|
||||
FOR EACH ROW
|
||||
EXECUTE PROCEDURE update_partition_on_new_file_at();
|
|
@ -0,0 +1,98 @@
|
|||
CREATE TABLE parquet_file_temp
|
||||
AS SELECT * FROM parquet_file;
|
||||
|
||||
DROP TABLE parquet_file;
|
||||
|
||||
CREATE TABLE parquet_file
|
||||
(
|
||||
id INTEGER
|
||||
constraint parquet_file_pkey
|
||||
primary key autoincrement,
|
||||
shard_id numeric not null
|
||||
constraint parquet_file_sequencer_id_fkey
|
||||
references shard,
|
||||
table_id numeric not null
|
||||
references table_name,
|
||||
partition_id numeric
|
||||
references partition,
|
||||
partition_hash_id bytea
|
||||
references partition (hash_id),
|
||||
|
||||
object_store_id uuid not null
|
||||
constraint parquet_location_unique
|
||||
unique,
|
||||
max_sequence_number numeric,
|
||||
min_time numeric,
|
||||
max_time numeric,
|
||||
to_delete numeric,
|
||||
row_count numeric default 0 not null,
|
||||
file_size_bytes numeric default 0 not null,
|
||||
compaction_level smallint default 0 not null,
|
||||
created_at numeric,
|
||||
namespace_id numeric not null
|
||||
references namespace
|
||||
on delete cascade,
|
||||
column_set numeric[] not null,
|
||||
max_l0_created_at numeric default 0 not null
|
||||
);
|
||||
|
||||
create index if not exists parquet_file_deleted_at_idx
|
||||
on parquet_file (to_delete);
|
||||
|
||||
create index if not exists parquet_file_partition_idx
|
||||
on parquet_file (partition_id);
|
||||
|
||||
create index if not exists parquet_file_table_idx
|
||||
on parquet_file (table_id);
|
||||
|
||||
create index if not exists parquet_file_shard_compaction_delete_idx
|
||||
on parquet_file (shard_id, compaction_level, to_delete);
|
||||
|
||||
create index if not exists parquet_file_shard_compaction_delete_created_idx
|
||||
on parquet_file (shard_id, compaction_level, to_delete, created_at);
|
||||
|
||||
create index if not exists parquet_file_partition_created_idx
|
||||
on parquet_file (partition_id, created_at);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS parquet_file_partition_hash_id_idx
|
||||
ON parquet_file (partition_hash_id)
|
||||
WHERE partition_hash_id IS NOT NULL;
|
||||
|
||||
create trigger if not exists update_partition
|
||||
after insert
|
||||
on parquet_file
|
||||
for each row
|
||||
begin
|
||||
UPDATE partition
|
||||
SET new_file_at = NEW.created_at
|
||||
WHERE (NEW.partition_id IS NULL OR id = NEW.partition_id)
|
||||
AND (NEW.partition_hash_id IS NULL OR hash_id = NEW.partition_hash_id);
|
||||
end;
|
||||
|
||||
create trigger if not exists update_billing
|
||||
after insert
|
||||
on parquet_file
|
||||
for each row
|
||||
begin
|
||||
INSERT INTO billing_summary (namespace_id, total_file_size_bytes)
|
||||
VALUES (NEW.namespace_id, NEW.file_size_bytes)
|
||||
ON CONFLICT (namespace_id) DO UPDATE
|
||||
SET total_file_size_bytes = billing_summary.total_file_size_bytes + NEW.file_size_bytes
|
||||
WHERE billing_summary.namespace_id = NEW.namespace_id;
|
||||
end;
|
||||
|
||||
create trigger if not exists decrement_summary
|
||||
after update
|
||||
on parquet_file
|
||||
for each row
|
||||
when OLD.to_delete IS NULL AND NEW.to_delete IS NOT NULL
|
||||
begin
|
||||
UPDATE billing_summary
|
||||
SET total_file_size_bytes = billing_summary.total_file_size_bytes - OLD.file_size_bytes
|
||||
WHERE billing_summary.namespace_id = OLD.namespace_id;
|
||||
end;
|
||||
|
||||
INSERT INTO parquet_file
|
||||
SELECT * FROM parquet_file_temp;
|
||||
|
||||
DROP TABLE parquet_file_temp;
|
|
@ -1865,7 +1865,7 @@ pub(crate) mod test_helpers {
|
|||
|
||||
let other_params = ParquetFileParams {
|
||||
table_id: other_partition.table_id,
|
||||
partition_id: other_partition.id,
|
||||
partition_id: other_partition.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
min_time: Timestamp::new(50),
|
||||
max_time: Timestamp::new(60),
|
||||
|
@ -1978,7 +1978,7 @@ pub(crate) mod test_helpers {
|
|||
|
||||
let f1_params = ParquetFileParams {
|
||||
table_id: partition2.table_id,
|
||||
partition_id: partition2.id,
|
||||
partition_id: partition2.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
min_time: Timestamp::new(1),
|
||||
max_time: Timestamp::new(10),
|
||||
|
@ -2449,7 +2449,7 @@ pub(crate) mod test_helpers {
|
|||
let l0_five_hour_ago_file_params = ParquetFileParams {
|
||||
object_store_id: Uuid::new_v4(),
|
||||
created_at: time_five_hour_ago,
|
||||
partition_id: partition2.id,
|
||||
partition_id: partition2.transition_partition_id(),
|
||||
..parquet_file_params.clone()
|
||||
};
|
||||
repos
|
||||
|
@ -2492,7 +2492,7 @@ pub(crate) mod test_helpers {
|
|||
let l1_file_params = ParquetFileParams {
|
||||
object_store_id: Uuid::new_v4(),
|
||||
created_at: time_now,
|
||||
partition_id: partition2.id,
|
||||
partition_id: partition2.transition_partition_id(),
|
||||
compaction_level: CompactionLevel::FileNonOverlapped,
|
||||
..parquet_file_params.clone()
|
||||
};
|
||||
|
@ -2578,7 +2578,7 @@ pub(crate) mod test_helpers {
|
|||
let l2_file_params = ParquetFileParams {
|
||||
object_store_id: Uuid::new_v4(),
|
||||
created_at: time_now,
|
||||
partition_id: partition3.id,
|
||||
partition_id: partition3.transition_partition_id(),
|
||||
compaction_level: CompactionLevel::Final,
|
||||
..parquet_file_params.clone()
|
||||
};
|
||||
|
@ -2619,7 +2619,7 @@ pub(crate) mod test_helpers {
|
|||
let l0_one_hour_ago_file_params = ParquetFileParams {
|
||||
object_store_id: Uuid::new_v4(),
|
||||
created_at: time_one_hour_ago,
|
||||
partition_id: partition3.id,
|
||||
partition_id: partition3.transition_partition_id(),
|
||||
..parquet_file_params.clone()
|
||||
};
|
||||
repos
|
||||
|
@ -2720,8 +2720,7 @@ pub(crate) mod test_helpers {
|
|||
level1_file.compaction_level = CompactionLevel::FileNonOverlapped;
|
||||
|
||||
let other_partition_params = ParquetFileParams {
|
||||
partition_id: partition2.id,
|
||||
partition_hash_id: partition2.hash_id().cloned(),
|
||||
partition_id: partition2.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
..parquet_file_params.clone()
|
||||
};
|
||||
|
@ -2744,12 +2743,20 @@ pub(crate) mod test_helpers {
|
|||
expected_ids.sort();
|
||||
assert_eq!(file_ids, expected_ids);
|
||||
|
||||
// remove namespace to avoid it from affecting later tests
|
||||
repos
|
||||
.namespaces()
|
||||
.soft_delete("namespace_parquet_file_test_list_by_partiton_not_to_delete")
|
||||
// Using the catalog partition ID should return the same files, even if the Parquet file
|
||||
// records don't have the partition ID on them (which is the default now)
|
||||
let files = repos
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition.id))
|
||||
.await
|
||||
.expect("delete namespace should succeed");
|
||||
.unwrap();
|
||||
assert_eq!(files.len(), 2);
|
||||
|
||||
let mut file_ids: Vec<_> = files.into_iter().map(|f| f.id).collect();
|
||||
file_ids.sort();
|
||||
let mut expected_ids = vec![parquet_file.id, level1_file.id];
|
||||
expected_ids.sort();
|
||||
assert_eq!(file_ids, expected_ids);
|
||||
}
|
||||
|
||||
async fn test_update_to_compaction_level_1(catalog: Arc<dyn Catalog>) {
|
||||
|
|
|
@ -396,8 +396,7 @@ pub mod test_helpers {
|
|||
ParquetFileParams {
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
partition_hash_id: partition.hash_id().cloned(),
|
||||
partition_id: partition.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
min_time: Timestamp::new(1),
|
||||
max_time: Timestamp::new(10),
|
||||
|
|
|
@ -887,14 +887,28 @@ impl ParquetFileRepo for MemTxn {
|
|||
) -> Result<Vec<ParquetFile>> {
|
||||
let stage = self.stage();
|
||||
|
||||
let partition = stage
|
||||
.partitions
|
||||
.iter()
|
||||
.find(|p| match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => p
|
||||
.hash_id()
|
||||
.map(|p_hash_id| p_hash_id == hash_id)
|
||||
.unwrap_or(false),
|
||||
TransitionPartitionId::Deprecated(id) => id == &p.id,
|
||||
})
|
||||
.unwrap()
|
||||
.clone();
|
||||
|
||||
Ok(stage
|
||||
.parquet_files
|
||||
.iter()
|
||||
.filter(|f| match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => {
|
||||
f.partition_hash_id.as_ref().map_or(false, |h| h == hash_id)
|
||||
}
|
||||
TransitionPartitionId::Deprecated(id) => f.partition_id == *id,
|
||||
.filter(|f| match &f.partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => partition
|
||||
.hash_id()
|
||||
.map(|p_hash_id| p_hash_id == hash_id)
|
||||
.unwrap_or(false),
|
||||
TransitionPartitionId::Deprecated(id) => id == &partition.id,
|
||||
})
|
||||
.filter(|f| f.to_delete.is_none())
|
||||
.cloned()
|
||||
|
@ -996,17 +1010,15 @@ async fn create_parquet_file(
|
|||
ParquetFileId::new(stage.parquet_files.len() as i64 + 1),
|
||||
);
|
||||
let created_at = parquet_file.created_at;
|
||||
let partition_id = parquet_file.partition_id;
|
||||
let partition_id = parquet_file.partition_id.clone();
|
||||
stage.parquet_files.push(parquet_file);
|
||||
|
||||
// Update the new_file_at field its partition to the time of created_at
|
||||
let partition = stage
|
||||
.partitions
|
||||
.iter_mut()
|
||||
.find(|p| p.id == partition_id)
|
||||
.ok_or(Error::PartitionNotFound {
|
||||
id: TransitionPartitionId::Deprecated(partition_id),
|
||||
})?;
|
||||
.find(|p| p.transition_partition_id() == partition_id)
|
||||
.ok_or(Error::PartitionNotFound { id: partition_id })?;
|
||||
partition.new_file_at = Some(created_at);
|
||||
|
||||
Ok(stage.parquet_files.last().unwrap().clone())
|
||||
|
|
|
@ -1627,22 +1627,26 @@ RETURNING id;
|
|||
let query = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||
compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
WHERE parquet_file.partition_hash_id = $1
|
||||
INNER JOIN partition
|
||||
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||
WHERE partition.hash_id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
.bind(hash_id), // $1
|
||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||
compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
WHERE parquet_file.partition_id = $1
|
||||
INNER JOIN partition
|
||||
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||
WHERE partition.id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
|
@ -1754,7 +1758,6 @@ where
|
|||
namespace_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
partition_hash_id,
|
||||
object_store_id,
|
||||
min_time,
|
||||
max_time,
|
||||
|
@ -1766,6 +1769,11 @@ where
|
|||
max_l0_created_at,
|
||||
} = parquet_file_params;
|
||||
|
||||
let (partition_id, partition_hash_id) = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => (None, Some(hash_id)),
|
||||
TransitionPartitionId::Deprecated(id) => (Some(id), None),
|
||||
};
|
||||
|
||||
let partition_hash_id_ref = &partition_hash_id.as_ref();
|
||||
let query = sqlx::query_scalar::<_, ParquetFileId>(
|
||||
r#"
|
||||
|
@ -2203,7 +2211,10 @@ RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
|||
.create(parquet_file_params)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(parquet_file.partition_hash_id.is_none());
|
||||
assert_matches!(
|
||||
parquet_file.partition_id,
|
||||
TransitionPartitionId::Deprecated(_)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1221,8 +1221,8 @@ struct ParquetFilePod {
|
|||
id: ParquetFileId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
partition_hash_id: Option<PartitionHashId>,
|
||||
#[sqlx(flatten)]
|
||||
partition_id: TransitionPartitionId,
|
||||
object_store_id: Uuid,
|
||||
min_time: Timestamp,
|
||||
max_time: Timestamp,
|
||||
|
@ -1242,7 +1242,6 @@ impl From<ParquetFilePod> for ParquetFile {
|
|||
namespace_id: value.namespace_id,
|
||||
table_id: value.table_id,
|
||||
partition_id: value.partition_id,
|
||||
partition_hash_id: value.partition_hash_id,
|
||||
object_store_id: value.object_store_id,
|
||||
min_time: value.min_time,
|
||||
max_time: value.max_time,
|
||||
|
@ -1395,22 +1394,26 @@ RETURNING id;
|
|||
let query = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||
compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
WHERE parquet_file.partition_hash_id = $1
|
||||
INNER JOIN partition
|
||||
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||
WHERE partition.hash_id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
.bind(hash_id), // $1
|
||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
SELECT parquet_file.id, namespace_id, parquet_file.table_id, partition_id, partition_hash_id,
|
||||
object_store_id, min_time, max_time, parquet_file.to_delete, file_size_bytes, row_count,
|
||||
compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
WHERE parquet_file.partition_id = $1
|
||||
INNER JOIN partition
|
||||
ON partition.id = parquet_file.partition_id OR partition.hash_id = parquet_file.partition_hash_id
|
||||
WHERE partition.id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
|
@ -1533,7 +1536,6 @@ where
|
|||
namespace_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
partition_hash_id,
|
||||
object_store_id,
|
||||
min_time,
|
||||
max_time,
|
||||
|
@ -1545,7 +1547,10 @@ where
|
|||
max_l0_created_at,
|
||||
} = parquet_file_params;
|
||||
|
||||
let partition_hash_id_ref = &partition_hash_id.as_ref();
|
||||
let (partition_id, partition_hash_id) = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => (None, Some(hash_id)),
|
||||
TransitionPartitionId::Deprecated(id) => (Some(id), None),
|
||||
};
|
||||
let res = sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
INSERT INTO parquet_file (
|
||||
|
@ -1562,7 +1567,7 @@ RETURNING
|
|||
.bind(TRANSITION_SHARD_ID) // $1
|
||||
.bind(table_id) // $2
|
||||
.bind(partition_id) // $3
|
||||
.bind(partition_hash_id_ref) // $4
|
||||
.bind(partition_hash_id.as_ref()) // $4
|
||||
.bind(object_store_id) // $5
|
||||
.bind(min_time) // $6
|
||||
.bind(max_time) // $7
|
||||
|
@ -1811,7 +1816,10 @@ RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
|||
.create(parquet_file_params)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(parquet_file.partition_hash_id.is_none());
|
||||
assert_matches!(
|
||||
parquet_file.partition_id,
|
||||
TransitionPartitionId::Deprecated(_)
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! test_column_create_or_get_many_unchecked {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use data_types::{
|
||||
ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileId, Partition,
|
||||
PartitionHashId, PartitionId, PartitionKey, SkippedCompaction, Table, TableId, Timestamp,
|
||||
TransitionPartitionId,
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -20,8 +21,7 @@ impl ParquetFileBuilder {
|
|||
id: ParquetFileId::new(id),
|
||||
namespace_id: NamespaceId::new(0),
|
||||
table_id,
|
||||
partition_id: PartitionId::new(0),
|
||||
partition_hash_id: Some(PartitionHashId::new(
|
||||
partition_id: TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||
table_id,
|
||||
&PartitionKey::from("arbitrary"),
|
||||
)),
|
||||
|
@ -39,11 +39,11 @@ impl ParquetFileBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
/// Set the partition id
|
||||
pub fn with_partition(self, id: i64) -> Self {
|
||||
/// Set the partition identifier
|
||||
pub fn with_partition(self, partition_id: TransitionPartitionId) -> Self {
|
||||
Self {
|
||||
file: ParquetFile {
|
||||
partition_id: PartitionId::new(id),
|
||||
partition_id,
|
||||
..self.file
|
||||
},
|
||||
}
|
||||
|
|
|
@ -602,8 +602,7 @@ impl TestPartition {
|
|||
let parquet_file_params = ParquetFileParams {
|
||||
namespace_id: self.namespace.namespace.id,
|
||||
table_id: self.table.table.id,
|
||||
partition_id: self.partition.id,
|
||||
partition_hash_id: self.partition.hash_id().cloned(),
|
||||
partition_id: self.partition.transition_partition_id(),
|
||||
object_store_id: object_store_id.unwrap_or_else(Uuid::new_v4),
|
||||
min_time: Timestamp::new(min_time),
|
||||
max_time: Timestamp::new(max_time),
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
// Workaround for "unused crate" lint false positives.
|
||||
use workspace_hack as _;
|
||||
|
||||
use data_types::{PartitionHashId, PartitionKey, TableId, TransitionPartitionId};
|
||||
|
||||
mod catalog;
|
||||
pub use catalog::{
|
||||
TestCatalog, TestNamespace, TestParquetFile, TestParquetFileBuilder, TestPartition, TestTable,
|
||||
|
@ -24,3 +26,14 @@ pub use catalog::{
|
|||
|
||||
mod builders;
|
||||
pub use builders::{ParquetFileBuilder, PartitionBuilder, SkippedCompactionBuilder, TableBuilder};
|
||||
|
||||
/// Create a partition identifier from an int (which gets used as the table ID) and a partition key
|
||||
/// with the string "arbitrary". Most useful in cases where there isn't any actual catalog
|
||||
/// interaction (that is, in mocks) and when the important property of the partition identifiers is
|
||||
/// that they're either the same or different than other partition identifiers.
|
||||
pub fn partition_identifier(table_id: i64) -> TransitionPartitionId {
|
||||
TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||
TableId::new(table_id),
|
||||
&PartitionKey::from("arbitrary"),
|
||||
))
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ use generated_types::influxdata::iox::{
|
|||
},
|
||||
};
|
||||
use hyper::{Body, Request, Response};
|
||||
use ingester::{IngesterGuard, IngesterRpcInterface};
|
||||
use ingester::{GossipConfig, IngesterGuard, IngesterRpcInterface};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::exec::Executor;
|
||||
use ioxd_common::{
|
||||
|
@ -210,6 +210,14 @@ pub async fn create_ingester_server_type(
|
|||
) -> Result<Arc<dyn ServerType>> {
|
||||
let (shutdown_tx, shutdown_rx) = oneshot::channel();
|
||||
|
||||
let gossip = match ingester_config.gossip_config.gossip_bind_address {
|
||||
None => GossipConfig::Disabled,
|
||||
Some(v) => GossipConfig::Enabled {
|
||||
bind_addr: v.into(),
|
||||
peers: ingester_config.gossip_config.seed_list.clone(),
|
||||
},
|
||||
};
|
||||
|
||||
let grpc = ingester::new(
|
||||
catalog,
|
||||
Arc::clone(&metrics),
|
||||
|
@ -221,6 +229,7 @@ pub async fn create_ingester_server_type(
|
|||
ingester_config.persist_queue_depth,
|
||||
ingester_config.persist_hot_partition_cost,
|
||||
object_store,
|
||||
gossip,
|
||||
shutdown_rx.map(|v| v.expect("shutdown sender dropped without calling shutdown")),
|
||||
)
|
||||
.await?;
|
||||
|
|
|
@ -10,6 +10,7 @@ async-trait = "0.1"
|
|||
authz = { path = "../authz" }
|
||||
clap_blocks = { path = "../clap_blocks" }
|
||||
data_types = { path = "../data_types" }
|
||||
gossip = { version = "0.1.0", path = "../gossip" }
|
||||
hashbrown = { workspace = true }
|
||||
hyper = "0.14"
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
|
|
|
@ -10,7 +10,9 @@
|
|||
missing_debug_implementations,
|
||||
unused_crate_dependencies
|
||||
)]
|
||||
#![allow(clippy::default_constructed_unit_structs)]
|
||||
|
||||
use gossip::NopDispatcher;
|
||||
// Workaround for "unused crate" lint false positives.
|
||||
use workspace_hack as _;
|
||||
|
||||
|
@ -21,7 +23,7 @@ use std::{
|
|||
|
||||
use async_trait::async_trait;
|
||||
use authz::{Authorizer, AuthorizerInstrumentation, IoxAuthorizer};
|
||||
use clap_blocks::router::RouterConfig;
|
||||
use clap_blocks::{gossip::GossipConfig, router::RouterConfig};
|
||||
use data_types::NamespaceName;
|
||||
use hashbrown::HashMap;
|
||||
use hyper::{Body, Request, Response};
|
||||
|
@ -86,6 +88,10 @@ pub enum Error {
|
|||
source: Box<dyn std::error::Error>,
|
||||
addr: String,
|
||||
},
|
||||
|
||||
/// An error binding the UDP socket for gossip communication.
|
||||
#[error("failed to bind udp gossip socket: {0}")]
|
||||
GossipBind(std::io::Error),
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -218,6 +224,7 @@ pub async fn create_router_server_type(
|
|||
catalog: Arc<dyn Catalog>,
|
||||
object_store: Arc<DynObjectStore>,
|
||||
router_config: &RouterConfig,
|
||||
gossip_config: &GossipConfig,
|
||||
trace_context_header_name: String,
|
||||
) -> Result<Arc<dyn ServerType>> {
|
||||
let ingester_connections = router_config.ingester_addresses.iter().map(|addr| {
|
||||
|
@ -333,6 +340,28 @@ pub async fn create_router_server_type(
|
|||
// Record the overall request handling latency
|
||||
let handler_stack = InstrumentationDecorator::new("request", &metrics, handler_stack);
|
||||
|
||||
// Optionally initialised the gossip subsystem.
|
||||
//
|
||||
// NOTE: the handle is completely unused, but needs to live as long as the
|
||||
// server does to do anything useful (RAII), so it is placed int he
|
||||
// RpcWriteRouterServer, which doesn't need it at all.
|
||||
//
|
||||
// TODO: remove handle from RpcWriteRouterServer when using handle
|
||||
let gossip_handle = match gossip_config.gossip_bind_address {
|
||||
Some(bind_addr) => {
|
||||
let handle = gossip::Builder::new(
|
||||
gossip_config.seed_list.clone(),
|
||||
NopDispatcher::default(),
|
||||
Arc::clone(&metrics),
|
||||
)
|
||||
.bind(*bind_addr)
|
||||
.await
|
||||
.map_err(Error::GossipBind)?;
|
||||
Some(handle)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
// Initialize the HTTP API delegate
|
||||
let write_request_unifier: Result<Box<dyn WriteRequestUnifier>> = match (
|
||||
router_config.single_tenant_deployment,
|
||||
|
@ -379,8 +408,13 @@ pub async fn create_router_server_type(
|
|||
// `RpcWriteRouterServerType`.
|
||||
let grpc = RpcWriteGrpcDelegate::new(catalog, object_store);
|
||||
|
||||
let router_server =
|
||||
RpcWriteRouterServer::new(http, grpc, metrics, common_state.trace_collector());
|
||||
let router_server = RpcWriteRouterServer::new(
|
||||
http,
|
||||
grpc,
|
||||
metrics,
|
||||
common_state.trace_collector(),
|
||||
gossip_handle,
|
||||
);
|
||||
let server_type = Arc::new(RpcWriteRouterServerType::new(router_server, common_state));
|
||||
Ok(server_type)
|
||||
}
|
||||
|
|
|
@ -108,7 +108,7 @@ impl From<&ParquetFile> for ParquetFilePath {
|
|||
Self {
|
||||
namespace_id: f.namespace_id,
|
||||
table_id: f.table_id,
|
||||
partition_id: f.transition_partition_id(),
|
||||
partition_id: f.partition_id.clone(),
|
||||
object_store_id: f.object_store_id,
|
||||
}
|
||||
}
|
||||
|
@ -119,7 +119,7 @@ impl From<&ParquetFileParams> for ParquetFilePath {
|
|||
Self {
|
||||
namespace_id: f.namespace_id,
|
||||
table_id: f.table_id,
|
||||
partition_id: f.transition_partition_id(),
|
||||
partition_id: f.partition_id.clone(),
|
||||
object_store_id: f.object_store_id,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ use bytes::Bytes;
|
|||
use data_types::{
|
||||
ColumnId, ColumnSet, ColumnSummary, CompactionLevel, InfluxDbType, NamespaceId,
|
||||
ParquetFileParams, PartitionHashId, PartitionId, PartitionKey, StatValues, Statistics, TableId,
|
||||
Timestamp,
|
||||
Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use generated_types::influxdata::iox::ingester::v1 as proto;
|
||||
use iox_time::Time;
|
||||
|
@ -443,6 +443,7 @@ impl IoxMetadata {
|
|||
where
|
||||
F: for<'a> Fn(&'a str) -> ColumnId,
|
||||
{
|
||||
let partition_id = TransitionPartitionId::from((partition_id, partition_hash_id.as_ref()));
|
||||
let decoded = metadata.decode().expect("invalid IOx metadata");
|
||||
trace!(
|
||||
?partition_id,
|
||||
|
@ -487,7 +488,6 @@ impl IoxMetadata {
|
|||
namespace_id: self.namespace_id,
|
||||
table_id: self.table_id,
|
||||
partition_id,
|
||||
partition_hash_id,
|
||||
object_store_id: self.object_store_id,
|
||||
min_time,
|
||||
max_time,
|
||||
|
|
|
@ -113,11 +113,13 @@ impl CatalogCache {
|
|||
"ram_metadata",
|
||||
RamSize(ram_pool_metadata_bytes),
|
||||
Arc::clone(&metric_registry),
|
||||
&Handle::current(),
|
||||
));
|
||||
let ram_pool_data = Arc::new(ResourcePool::new(
|
||||
"ram_data",
|
||||
RamSize(ram_pool_data_bytes),
|
||||
Arc::clone(&metric_registry),
|
||||
&Handle::current(),
|
||||
));
|
||||
|
||||
let partition_cache = PartitionCache::new(
|
||||
|
|
|
@ -361,8 +361,8 @@ mod tests {
|
|||
partition.create_parquet_file(builder).await;
|
||||
let table_id = table.table.id;
|
||||
|
||||
let single_file_size = 240;
|
||||
let two_file_size = 448;
|
||||
let single_file_size = 256;
|
||||
let two_file_size = 480;
|
||||
assert!(single_file_size < two_file_size);
|
||||
|
||||
let cache = make_cache(&catalog);
|
||||
|
|
|
@ -17,7 +17,7 @@ use cache_system::{
|
|||
};
|
||||
use data_types::{
|
||||
partition_template::{build_column_values, ColumnValue},
|
||||
ColumnId, Partition, PartitionId, TransitionPartitionId,
|
||||
ColumnId, Partition, TransitionPartitionId,
|
||||
};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use iox_catalog::{interface::Catalog, partition_lookup_batch};
|
||||
|
@ -38,7 +38,7 @@ const CACHE_ID: &str = "partition";
|
|||
|
||||
type CacheT = Box<
|
||||
dyn Cache<
|
||||
K = PartitionId,
|
||||
K = TransitionPartitionId,
|
||||
V = Option<CachedPartition>,
|
||||
GetExtra = (Arc<CachedTable>, Option<Span>),
|
||||
PeekExtra = ((), Option<Span>),
|
||||
|
@ -49,7 +49,7 @@ type CacheT = Box<
|
|||
#[derive(Debug)]
|
||||
pub struct PartitionCache {
|
||||
cache: CacheT,
|
||||
remove_if_handle: RemoveIfHandle<PartitionId, Option<CachedPartition>>,
|
||||
remove_if_handle: RemoveIfHandle<TransitionPartitionId, Option<CachedPartition>>,
|
||||
flusher: Arc<dyn BatchLoaderFlusher>,
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,8 @@ impl PartitionCache {
|
|||
testing: bool,
|
||||
) -> Self {
|
||||
let loader = FunctionLoader::new(
|
||||
move |partition_ids: Vec<PartitionId>, cached_tables: Vec<Arc<CachedTable>>| {
|
||||
move |partition_ids: Vec<TransitionPartitionId>,
|
||||
cached_tables: Vec<Arc<CachedTable>>| {
|
||||
// sanity checks
|
||||
assert_eq!(partition_ids.len(), cached_tables.len());
|
||||
|
||||
|
@ -75,23 +76,20 @@ impl PartitionCache {
|
|||
// prepare output buffer
|
||||
let mut out = (0..partition_ids.len()).map(|_| None).collect::<Vec<_>>();
|
||||
let mut out_map =
|
||||
HashMap::<PartitionId, usize>::with_capacity(partition_ids.len());
|
||||
HashMap::<TransitionPartitionId, usize>::with_capacity(partition_ids.len());
|
||||
for (idx, id) in partition_ids.iter().enumerate() {
|
||||
match out_map.entry(*id) {
|
||||
Entry::Occupied(_) => unreachable!("cache system requested same partition from loader concurrently, this should have been prevented by the CacheDriver"),
|
||||
match out_map.entry(id.clone()) {
|
||||
Entry::Occupied(_) => unreachable!(
|
||||
"cache system requested same partition from loader concurrently, \
|
||||
this should have been prevented by the CacheDriver"
|
||||
),
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build `&[&TransitionPartitionId]` for batch catalog request
|
||||
let ids = partition_ids
|
||||
.iter()
|
||||
.copied()
|
||||
.map(TransitionPartitionId::Deprecated)
|
||||
.collect::<Vec<_>>();
|
||||
let ids = ids.iter().collect::<Vec<_>>();
|
||||
let ids: Vec<&TransitionPartitionId> = partition_ids.iter().collect();
|
||||
|
||||
// fetch catalog data
|
||||
let partitions = Backoff::new(&backoff_config)
|
||||
|
@ -104,7 +102,7 @@ impl PartitionCache {
|
|||
|
||||
// build output
|
||||
for p in partitions {
|
||||
let idx = out_map[&p.id];
|
||||
let idx = out_map[&p.transition_partition_id()];
|
||||
let cached_table = &cached_tables[idx];
|
||||
let p = CachedPartition::new(p, cached_table);
|
||||
out[idx] = Some(p);
|
||||
|
@ -180,7 +178,7 @@ impl PartitionCache {
|
|||
|
||||
self.remove_if_handle.remove_if_and_get(
|
||||
&self.cache,
|
||||
partition_id,
|
||||
partition_id.clone(),
|
||||
move |cached_partition| {
|
||||
let invalidates = if let Some(sort_key) =
|
||||
&cached_partition.and_then(|p| p.sort_key)
|
||||
|
@ -195,7 +193,7 @@ impl PartitionCache {
|
|||
|
||||
if invalidates {
|
||||
debug!(
|
||||
partition_id = partition_id.get(),
|
||||
partition_id = %partition_id,
|
||||
"invalidate partition cache",
|
||||
);
|
||||
}
|
||||
|
@ -217,13 +215,13 @@ impl PartitionCache {
|
|||
/// Request for [`PartitionCache::get`].
|
||||
#[derive(Debug)]
|
||||
pub struct PartitionRequest {
|
||||
pub partition_id: PartitionId,
|
||||
pub partition_id: TransitionPartitionId,
|
||||
pub sort_key_should_cover: Vec<ColumnId>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct CachedPartition {
|
||||
pub id: PartitionId,
|
||||
pub id: TransitionPartitionId,
|
||||
pub sort_key: Option<Arc<PartitionSortKey>>,
|
||||
pub column_ranges: ColumnRanges,
|
||||
}
|
||||
|
@ -299,7 +297,7 @@ impl CachedPartition {
|
|||
column_ranges.shrink_to_fit();
|
||||
|
||||
Self {
|
||||
id: partition.id,
|
||||
id: partition.transition_partition_id(),
|
||||
sort_key,
|
||||
column_ranges: Arc::new(column_ranges),
|
||||
}
|
||||
|
@ -368,7 +366,10 @@ mod tests {
|
|||
ram::test_util::test_ram_pool, test_util::assert_catalog_access_metric_count,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use data_types::{partition_template::TablePartitionTemplateOverride, ColumnType};
|
||||
use data_types::{
|
||||
partition_template::TablePartitionTemplateOverride, ColumnType, PartitionHashId,
|
||||
PartitionId, PartitionKey, TableId,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use generated_types::influxdata::iox::partition_template::v1::{
|
||||
template_part::Part, PartitionTemplate, TemplatePart,
|
||||
|
@ -419,8 +420,11 @@ mod tests {
|
|||
true,
|
||||
);
|
||||
|
||||
let p1_id = p1.transition_partition_id();
|
||||
let p2_id = p2.transition_partition_id();
|
||||
|
||||
let sort_key1a = cache
|
||||
.get_one(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
|
||||
.get_one(Arc::clone(&cached_table), &p1_id, &Vec::new(), None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
|
@ -434,24 +438,24 @@ mod tests {
|
|||
);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
|
||||
let sort_key2 = cache
|
||||
.get_one(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
|
||||
.get_one(Arc::clone(&cached_table), &p2_id, &Vec::new(), None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
assert_eq!(sort_key2, None);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
2,
|
||||
);
|
||||
|
||||
let sort_key1b = cache
|
||||
.get_one(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
|
||||
.get_one(Arc::clone(&cached_table), &p1_id, &Vec::new(), None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
|
@ -461,16 +465,37 @@ mod tests {
|
|||
));
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
2,
|
||||
);
|
||||
|
||||
// non-existing partition
|
||||
for _ in 0..2 {
|
||||
// Non-existing partition identified by partition hash ID
|
||||
let res = cache
|
||||
.get_one(
|
||||
Arc::clone(&cached_table),
|
||||
PartitionId::new(i64::MAX),
|
||||
&TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||
TableId::new(i64::MAX),
|
||||
&PartitionKey::from("bananas_not_found"),
|
||||
)),
|
||||
&[],
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(res, None);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_hash_id_batch",
|
||||
3,
|
||||
);
|
||||
|
||||
// Non-existing partition identified by deprecated catalog IDs; this part can be
|
||||
// removed when partition identification is fully transitioned to partition hash IDs
|
||||
let res = cache
|
||||
.get_one(
|
||||
Arc::clone(&cached_table),
|
||||
&TransitionPartitionId::Deprecated(PartitionId::new(i64::MAX)),
|
||||
&Vec::new(),
|
||||
None,
|
||||
)
|
||||
|
@ -479,7 +504,7 @@ mod tests {
|
|||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
3,
|
||||
1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -548,8 +573,14 @@ mod tests {
|
|||
true,
|
||||
);
|
||||
|
||||
let p1_id = p1.transition_partition_id();
|
||||
let p2_id = p2.transition_partition_id();
|
||||
let p3_id = p3.transition_partition_id();
|
||||
let p4_id = p4.transition_partition_id();
|
||||
let p5_id = p5.transition_partition_id();
|
||||
|
||||
let ranges1a = cache
|
||||
.get_one(Arc::clone(&cached_table), p1.id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p1_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.column_ranges;
|
||||
|
@ -578,12 +609,12 @@ mod tests {
|
|||
));
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
|
||||
let ranges2 = cache
|
||||
.get_one(Arc::clone(&cached_table), p2.id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p2_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.column_ranges;
|
||||
|
@ -599,12 +630,12 @@ mod tests {
|
|||
);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
2,
|
||||
);
|
||||
|
||||
let ranges3 = cache
|
||||
.get_one(Arc::clone(&cached_table), p3.id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p3_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.column_ranges;
|
||||
|
@ -629,12 +660,12 @@ mod tests {
|
|||
);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
3,
|
||||
);
|
||||
|
||||
let ranges4 = cache
|
||||
.get_one(Arc::clone(&cached_table), p4.id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p4_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.column_ranges;
|
||||
|
@ -659,12 +690,12 @@ mod tests {
|
|||
);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
4,
|
||||
);
|
||||
|
||||
let ranges5 = cache
|
||||
.get_one(Arc::clone(&cached_table), p5.id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p5_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.column_ranges;
|
||||
|
@ -680,28 +711,48 @@ mod tests {
|
|||
);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
5,
|
||||
);
|
||||
|
||||
let ranges1b = cache
|
||||
.get_one(Arc::clone(&cached_table), p1.id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p1_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.column_ranges;
|
||||
assert!(Arc::ptr_eq(&ranges1a, &ranges1b));
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
5,
|
||||
);
|
||||
|
||||
// non-existing partition
|
||||
for _ in 0..2 {
|
||||
// Non-existing partition identified by partition hash ID
|
||||
let res = cache
|
||||
.get_one(
|
||||
Arc::clone(&cached_table),
|
||||
PartitionId::new(i64::MAX),
|
||||
&TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||
TableId::new(i64::MAX),
|
||||
&PartitionKey::from("bananas_not_found"),
|
||||
)),
|
||||
&[],
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(res, None);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_hash_id_batch",
|
||||
6,
|
||||
);
|
||||
|
||||
// Non-existing partition identified by deprecated catalog IDs; this part can be
|
||||
// removed when partition identification is fully transitioned to partition hash IDs
|
||||
let res = cache
|
||||
.get_one(
|
||||
Arc::clone(&cached_table),
|
||||
&TransitionPartitionId::Deprecated(PartitionId::new(i64::MAX)),
|
||||
&[],
|
||||
None,
|
||||
)
|
||||
|
@ -710,7 +761,7 @@ mod tests {
|
|||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
6,
|
||||
1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -724,7 +775,7 @@ mod tests {
|
|||
let c1 = t.create_column("foo", ColumnType::Tag).await;
|
||||
let c2 = t.create_column("time", ColumnType::Time).await;
|
||||
let p = t.create_partition("k1").await;
|
||||
let p_id = p.partition.id;
|
||||
let p_id = p.partition.transition_partition_id();
|
||||
let p_sort_key = p.partition.sort_key();
|
||||
let cached_table = Arc::new(CachedTable {
|
||||
id: t.table.id,
|
||||
|
@ -751,41 +802,41 @@ mod tests {
|
|||
);
|
||||
|
||||
let sort_key = cache
|
||||
.get_one(Arc::clone(&cached_table), p_id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
assert_eq!(sort_key, None,);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
|
||||
// requesting nother will not expire
|
||||
assert!(p_sort_key.is_none());
|
||||
let sort_key = cache
|
||||
.get_one(Arc::clone(&cached_table), p_id, &[], None)
|
||||
.get_one(Arc::clone(&cached_table), &p_id, &[], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
assert_eq!(sort_key, None,);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
|
||||
// but requesting something will expire
|
||||
let sort_key = cache
|
||||
.get_one(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
|
||||
.get_one(Arc::clone(&cached_table), &p_id, &[c1.column.id], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
assert_eq!(sort_key, None,);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
2,
|
||||
);
|
||||
|
||||
|
@ -801,7 +852,7 @@ mod tests {
|
|||
// expire & fetch
|
||||
let p_sort_key = p.partition.sort_key();
|
||||
let sort_key = cache
|
||||
.get_one(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
|
||||
.get_one(Arc::clone(&cached_table), &p_id, &[c1.column.id], None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
|
@ -815,7 +866,7 @@ mod tests {
|
|||
);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
3,
|
||||
);
|
||||
|
||||
|
@ -827,7 +878,7 @@ mod tests {
|
|||
vec![c1.column.id, c2.column.id],
|
||||
] {
|
||||
let sort_key_2 = cache
|
||||
.get_one(Arc::clone(&cached_table), p_id, &should_cover, None)
|
||||
.get_one(Arc::clone(&cached_table), &p_id, &should_cover, None)
|
||||
.await
|
||||
.unwrap()
|
||||
.sort_key;
|
||||
|
@ -837,7 +888,7 @@ mod tests {
|
|||
));
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
3,
|
||||
);
|
||||
}
|
||||
|
@ -847,7 +898,7 @@ mod tests {
|
|||
let sort_key_2 = cache
|
||||
.get_one(
|
||||
Arc::clone(&cached_table),
|
||||
p_id,
|
||||
&p_id,
|
||||
&[c1.column.id, c3.column.id],
|
||||
None,
|
||||
)
|
||||
|
@ -861,7 +912,7 @@ mod tests {
|
|||
assert_eq!(sort_key, sort_key_2);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
4,
|
||||
);
|
||||
}
|
||||
|
@ -892,34 +943,45 @@ mod tests {
|
|||
true,
|
||||
);
|
||||
|
||||
let p1_id = p1.transition_partition_id();
|
||||
let p2_id = p2.transition_partition_id();
|
||||
|
||||
let mut res = cache
|
||||
.get(
|
||||
Arc::clone(&cached_table),
|
||||
vec![
|
||||
PartitionRequest {
|
||||
partition_id: p1.id,
|
||||
partition_id: p1_id.clone(),
|
||||
sort_key_should_cover: vec![],
|
||||
},
|
||||
PartitionRequest {
|
||||
partition_id: p2.id,
|
||||
partition_id: p2_id.clone(),
|
||||
sort_key_should_cover: vec![],
|
||||
},
|
||||
PartitionRequest {
|
||||
partition_id: p1.id,
|
||||
partition_id: p1_id.clone(),
|
||||
sort_key_should_cover: vec![],
|
||||
},
|
||||
// requesting non-existing partitions is fine, they just don't appear in
|
||||
// the output
|
||||
PartitionRequest {
|
||||
partition_id: TransitionPartitionId::Deprecated(PartitionId::new(i64::MAX)),
|
||||
sort_key_should_cover: vec![],
|
||||
},
|
||||
PartitionRequest {
|
||||
// requesting non-existing partitions is fine, they just don't appear in the output
|
||||
partition_id: PartitionId::new(i64::MAX),
|
||||
partition_id: TransitionPartitionId::Deterministic(PartitionHashId::new(
|
||||
TableId::new(i64::MAX),
|
||||
&PartitionKey::from("bananas_not_found"),
|
||||
)),
|
||||
sort_key_should_cover: vec![],
|
||||
},
|
||||
],
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
res.sort_by_key(|p| p.id);
|
||||
let ids = res.iter().map(|p| p.id).collect::<Vec<_>>();
|
||||
assert_eq!(ids, vec![p1.id, p1.id, p2.id]);
|
||||
res.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
let ids = res.into_iter().map(|p| p.id).collect::<Vec<_>>();
|
||||
assert_eq!(ids, vec![p1_id.clone(), p1_id, p2_id]);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
|
@ -1008,7 +1070,7 @@ mod tests {
|
|||
c_id: ColumnId,
|
||||
|
||||
/// Partitions within that table.
|
||||
partitions: Vec<PartitionId>,
|
||||
partitions: Vec<TransitionPartitionId>,
|
||||
}
|
||||
|
||||
impl ConcurrencyTestState {
|
||||
|
@ -1032,7 +1094,7 @@ mod tests {
|
|||
t.create_partition_with_sort_key(&format!("p{i}"), &["time"])
|
||||
.await
|
||||
.partition
|
||||
.id
|
||||
.transition_partition_id()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
|
@ -1046,7 +1108,8 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// Perform the actual [`PartitionCache::get`] call and run some basic sanity checks on the result.
|
||||
/// Perform the actual [`PartitionCache::get`] call and run some basic sanity checks on the
|
||||
/// result.
|
||||
async fn run(self, cache: Arc<PartitionCache>) {
|
||||
let Self {
|
||||
cached_table,
|
||||
|
@ -1060,15 +1123,15 @@ mod tests {
|
|||
partitions
|
||||
.iter()
|
||||
.map(|p| PartitionRequest {
|
||||
partition_id: *p,
|
||||
partition_id: p.clone(),
|
||||
sort_key_should_cover: vec![],
|
||||
})
|
||||
.collect(),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
results.sort_by_key(|p| p.id);
|
||||
let partitions_res = results.iter().map(|p| p.id).collect::<Vec<_>>();
|
||||
results.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
let partitions_res = results.iter().map(|p| p.id.clone()).collect::<Vec<_>>();
|
||||
assert_eq!(partitions, partitions_res);
|
||||
assert!(results
|
||||
.iter()
|
||||
|
@ -1086,7 +1149,7 @@ mod tests {
|
|||
async fn get_one(
|
||||
&self,
|
||||
cached_table: Arc<CachedTable>,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
sort_key_should_cover: &[ColumnId],
|
||||
span: Option<Span>,
|
||||
) -> Option<CachedPartition>;
|
||||
|
@ -1097,14 +1160,14 @@ mod tests {
|
|||
async fn get_one(
|
||||
&self,
|
||||
cached_table: Arc<CachedTable>,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
sort_key_should_cover: &[ColumnId],
|
||||
span: Option<Span>,
|
||||
) -> Option<CachedPartition> {
|
||||
self.get(
|
||||
cached_table,
|
||||
vec![PartitionRequest {
|
||||
partition_id,
|
||||
partition_id: partition_id.clone(),
|
||||
sort_key_should_cover: sort_key_should_cover.to_vec(),
|
||||
}],
|
||||
span,
|
||||
|
|
|
@ -43,12 +43,14 @@ pub mod test_util {
|
|||
use std::sync::Arc;
|
||||
|
||||
use cache_system::backend::policy::lru::ResourcePool;
|
||||
use tokio::runtime::Handle;
|
||||
|
||||
pub fn test_ram_pool() -> Arc<ResourcePool<RamSize>> {
|
||||
Arc::new(ResourcePool::new(
|
||||
"pool",
|
||||
RamSize(usize::MAX),
|
||||
Arc::new(metric::Registry::new()),
|
||||
&Handle::current(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -859,10 +859,6 @@ impl IngesterPartition {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn partition_id(&self) -> PartitionId {
|
||||
self.partition_id
|
||||
}
|
||||
|
||||
pub(crate) fn transition_partition_id(&self) -> TransitionPartitionId {
|
||||
TransitionPartitionId::from((self.partition_id, self.partition_hash_id.as_ref()))
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use data_types::{ChunkId, ChunkOrder, ColumnId, ParquetFile, PartitionId, TransitionPartitionId};
|
||||
use data_types::{ChunkId, ChunkOrder, ColumnId, ParquetFile, TransitionPartitionId};
|
||||
use futures::StreamExt;
|
||||
use hashbrown::HashSet;
|
||||
use iox_catalog::interface::Catalog;
|
||||
|
@ -56,7 +56,7 @@ impl ChunkAdapter {
|
|||
&self,
|
||||
cached_table: Arc<CachedTable>,
|
||||
files: Arc<[Arc<ParquetFile>]>,
|
||||
cached_partitions: &HashMap<PartitionId, CachedPartition>,
|
||||
cached_partitions: &HashMap<TransitionPartitionId, CachedPartition>,
|
||||
span: Option<Span>,
|
||||
) -> Vec<QuerierParquetChunk> {
|
||||
let span_recorder = SpanRecorder::new(span);
|
||||
|
@ -170,18 +170,13 @@ impl ChunkAdapter {
|
|||
|
||||
let order = ChunkOrder::new(parquet_file.file.max_l0_created_at.get());
|
||||
|
||||
let partition_id = parquet_file.file.partition_id;
|
||||
let transition_partition_id = TransitionPartitionId::from((
|
||||
partition_id,
|
||||
parquet_file.file.partition_hash_id.as_ref(),
|
||||
));
|
||||
let partition_id = parquet_file.file.partition_id.clone();
|
||||
|
||||
let meta = Arc::new(QuerierParquetChunkMeta {
|
||||
chunk_id,
|
||||
order,
|
||||
sort_key: Some(sort_key),
|
||||
partition_id,
|
||||
transition_partition_id,
|
||||
});
|
||||
|
||||
let parquet_chunk = Arc::new(ParquetChunk::new(
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
//! Querier Chunks
|
||||
|
||||
use data_types::{ChunkId, ChunkOrder, PartitionId, TransitionPartitionId};
|
||||
use data_types::{ChunkId, ChunkOrder, TransitionPartitionId};
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use iox_query::chunk_statistics::{create_chunk_statistics, ColumnRanges};
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
|
@ -25,10 +25,7 @@ pub struct QuerierParquetChunkMeta {
|
|||
sort_key: Option<SortKey>,
|
||||
|
||||
/// Partition ID.
|
||||
partition_id: PartitionId,
|
||||
|
||||
/// Transition partition ID.
|
||||
transition_partition_id: TransitionPartitionId,
|
||||
partition_id: TransitionPartitionId,
|
||||
}
|
||||
|
||||
impl QuerierParquetChunkMeta {
|
||||
|
@ -43,13 +40,8 @@ impl QuerierParquetChunkMeta {
|
|||
}
|
||||
|
||||
/// Partition ID.
|
||||
pub fn partition_id(&self) -> PartitionId {
|
||||
self.partition_id
|
||||
}
|
||||
|
||||
/// Partition ID.
|
||||
pub fn transition_partition_id(&self) -> &TransitionPartitionId {
|
||||
&self.transition_partition_id
|
||||
pub fn partition_id(&self) -> &TransitionPartitionId {
|
||||
&self.partition_id
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -251,7 +243,7 @@ pub mod tests {
|
|||
.get(
|
||||
Arc::clone(&self.cached_table),
|
||||
vec![PartitionRequest {
|
||||
partition_id: self.parquet_file.partition_id,
|
||||
partition_id: self.parquet_file.partition_id.clone(),
|
||||
sort_key_should_cover: vec![],
|
||||
}],
|
||||
None,
|
||||
|
@ -261,7 +253,7 @@ pub mod tests {
|
|||
.next()
|
||||
.unwrap();
|
||||
let cached_partitions =
|
||||
HashMap::from([(self.parquet_file.partition_id, cached_partition)]);
|
||||
HashMap::from([(self.parquet_file.partition_id.clone(), cached_partition)]);
|
||||
self.adapter
|
||||
.new_chunks(
|
||||
Arc::clone(&self.cached_table),
|
||||
|
|
|
@ -15,11 +15,11 @@ impl QueryChunk for QuerierParquetChunk {
|
|||
}
|
||||
|
||||
fn partition_id(&self) -> PartitionId {
|
||||
self.meta().partition_id()
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn transition_partition_id(&self) -> &TransitionPartitionId {
|
||||
self.meta().transition_partition_id()
|
||||
self.meta().partition_id()
|
||||
}
|
||||
|
||||
fn sort_key(&self) -> Option<&SortKey> {
|
||||
|
|
|
@ -8,7 +8,7 @@ use crate::{
|
|||
parquet::ChunkAdapter,
|
||||
IngesterConnection,
|
||||
};
|
||||
use data_types::{ColumnId, NamespaceId, ParquetFile, PartitionId, TableId};
|
||||
use data_types::{ColumnId, NamespaceId, ParquetFile, TableId, TransitionPartitionId};
|
||||
use datafusion::error::DataFusionError;
|
||||
use futures::join;
|
||||
use iox_query::{provider, provider::ChunkPruner, QueryChunk};
|
||||
|
@ -282,7 +282,7 @@ impl QuerierTable {
|
|||
let chunks = partitions
|
||||
.into_iter()
|
||||
.filter_map(|mut c| {
|
||||
let cached_partition = cached_partitions.get(&c.partition_id())?;
|
||||
let cached_partition = cached_partitions.get(&c.transition_partition_id())?;
|
||||
c.set_partition_column_ranges(&cached_partition.column_ranges);
|
||||
Some(c)
|
||||
})
|
||||
|
@ -322,16 +322,16 @@ impl QuerierTable {
|
|||
ingester_partitions: &[IngesterPartition],
|
||||
parquet_files: &[Arc<ParquetFile>],
|
||||
span: Option<Span>,
|
||||
) -> HashMap<PartitionId, CachedPartition> {
|
||||
) -> HashMap<TransitionPartitionId, CachedPartition> {
|
||||
let span_recorder = SpanRecorder::new(span);
|
||||
|
||||
let mut should_cover: HashMap<PartitionId, HashSet<ColumnId>> =
|
||||
let mut should_cover: HashMap<TransitionPartitionId, HashSet<ColumnId>> =
|
||||
HashMap::with_capacity(ingester_partitions.len());
|
||||
|
||||
// For ingester partitions we only need the column ranges -- which are static -- not the sort key. So it is
|
||||
// sufficient to collect the partition IDs.
|
||||
for p in ingester_partitions {
|
||||
should_cover.entry(p.partition_id()).or_default();
|
||||
should_cover.entry(p.transition_partition_id()).or_default();
|
||||
}
|
||||
|
||||
// For parquet files we must ensure that the -- potentially evolving -- sort key coveres the primary key.
|
||||
|
@ -342,7 +342,7 @@ impl QuerierTable {
|
|||
.collect::<HashSet<_>>();
|
||||
for f in parquet_files {
|
||||
should_cover
|
||||
.entry(f.partition_id)
|
||||
.entry(f.partition_id.clone())
|
||||
.or_default()
|
||||
.extend(f.column_set.iter().copied().filter(|id| pk.contains(id)));
|
||||
}
|
||||
|
@ -366,7 +366,7 @@ impl QuerierTable {
|
|||
)
|
||||
.await;
|
||||
|
||||
partitions.into_iter().map(|p| (p.id, p)).collect()
|
||||
partitions.into_iter().map(|p| (p.id.clone(), p)).collect()
|
||||
}
|
||||
|
||||
/// Get a chunk pruner that can be used to prune chunks retrieved via [`chunks`](Self::chunks)
|
||||
|
@ -889,7 +889,7 @@ mod tests {
|
|||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 2);
|
||||
|
@ -899,7 +899,7 @@ mod tests {
|
|||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 4);
|
||||
|
@ -912,7 +912,7 @@ mod tests {
|
|||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
|
||||
|
@ -922,7 +922,7 @@ mod tests {
|
|||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
1,
|
||||
);
|
||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 6);
|
||||
|
@ -936,7 +936,7 @@ mod tests {
|
|||
assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||
assert_catalog_access_metric_count(
|
||||
&catalog.metric_registry,
|
||||
"partition_get_by_id_batch",
|
||||
"partition_get_by_hash_id_batch",
|
||||
2,
|
||||
);
|
||||
assert_cache_access_metric_count(&catalog.metric_registry, "partition", 8);
|
||||
|
|
|
@ -15,6 +15,7 @@ dml = { path = "../dml" }
|
|||
flate2 = "1.0"
|
||||
futures = "0.3.28"
|
||||
generated_types = { path = "../generated_types" }
|
||||
gossip = { version = "0.1.0", path = "../gossip" }
|
||||
hashbrown = { workspace = true }
|
||||
hyper = "0.14"
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
|
|
|
@ -16,6 +16,9 @@ pub struct RpcWriteRouterServer<D, N> {
|
|||
|
||||
http: HttpDelegate<D, N>,
|
||||
grpc: RpcWriteGrpcDelegate,
|
||||
|
||||
// TODO: this shouldn't be here but it is here while it's unused elsewhere
|
||||
_gossip_handle: Option<gossip::GossipHandle>,
|
||||
}
|
||||
|
||||
impl<D, N> RpcWriteRouterServer<D, N> {
|
||||
|
@ -26,12 +29,14 @@ impl<D, N> RpcWriteRouterServer<D, N> {
|
|||
grpc: RpcWriteGrpcDelegate,
|
||||
metrics: Arc<metric::Registry>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
gossip_handle: Option<gossip::GossipHandle>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
trace_collector,
|
||||
http,
|
||||
grpc,
|
||||
_gossip_handle: gossip_handle,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
// Workaround for "unused crate" lint false positives.
|
||||
use workspace_hack as _;
|
||||
|
||||
use data_types::{PartitionId, TableId, TransitionPartitionId};
|
||||
use data_types::{PartitionHashId, PartitionId, TableId, TransitionPartitionId};
|
||||
use generated_types::influxdata::iox::catalog::v1::*;
|
||||
use iox_catalog::interface::{Catalog, SoftDeletedRows};
|
||||
use observability_deps::tracing::*;
|
||||
|
@ -47,14 +47,14 @@ impl catalog_service_server::CatalogService for CatalogService {
|
|||
) -> Result<Response<GetParquetFilesByPartitionIdResponse>, Status> {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let req = request.into_inner();
|
||||
let partition_id = TransitionPartitionId::Deprecated(PartitionId::new(req.partition_id));
|
||||
let partition_id = to_partition_id(req.partition_identifier)?;
|
||||
|
||||
let parquet_files = repos
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(&partition_id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
warn!(error=%e, %req.partition_id, "failed to get parquet_files for partition");
|
||||
warn!(error=%e, %partition_id, "failed to get parquet_files for partition");
|
||||
Status::not_found(e.to_string())
|
||||
})?;
|
||||
|
||||
|
@ -169,13 +169,52 @@ impl catalog_service_server::CatalogService for CatalogService {
|
|||
}
|
||||
}
|
||||
|
||||
fn to_partition_identifier(partition_id: &TransitionPartitionId) -> PartitionIdentifier {
|
||||
match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => PartitionIdentifier {
|
||||
id: Some(partition_identifier::Id::HashId(
|
||||
hash_id.as_bytes().to_owned(),
|
||||
)),
|
||||
},
|
||||
TransitionPartitionId::Deprecated(id) => PartitionIdentifier {
|
||||
id: Some(partition_identifier::Id::CatalogId(id.get())),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn to_partition_id(
|
||||
partition_identifier: Option<PartitionIdentifier>,
|
||||
) -> Result<TransitionPartitionId, Status> {
|
||||
let partition_id =
|
||||
match partition_identifier
|
||||
.and_then(|pi| pi.id)
|
||||
.ok_or(Status::invalid_argument(
|
||||
"No partition identifier specified",
|
||||
))? {
|
||||
partition_identifier::Id::HashId(bytes) => TransitionPartitionId::Deterministic(
|
||||
PartitionHashId::try_from(&bytes[..]).map_err(|e| {
|
||||
Status::invalid_argument(format!(
|
||||
"Could not parse bytes as a `PartitionHashId`: {e}"
|
||||
))
|
||||
})?,
|
||||
),
|
||||
partition_identifier::Id::CatalogId(id) => {
|
||||
TransitionPartitionId::Deprecated(PartitionId::new(id))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(partition_id)
|
||||
}
|
||||
|
||||
// converts the catalog ParquetFile to protobuf
|
||||
fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile {
|
||||
let partition_identifier = to_partition_identifier(&p.partition_id);
|
||||
|
||||
ParquetFile {
|
||||
id: p.id.get(),
|
||||
namespace_id: p.namespace_id.get(),
|
||||
table_id: p.table_id.get(),
|
||||
partition_id: p.partition_id.get(),
|
||||
partition_identifier: Some(partition_identifier),
|
||||
object_store_id: p.object_store_id.to_string(),
|
||||
min_time: p.min_time.get(),
|
||||
max_time: p.max_time.get(),
|
||||
|
@ -191,8 +230,10 @@ fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile {
|
|||
|
||||
// converts the catalog Partition to protobuf
|
||||
fn to_partition(p: data_types::Partition) -> Partition {
|
||||
let identifier = to_partition_identifier(&p.transition_partition_id());
|
||||
|
||||
Partition {
|
||||
id: p.id.get(),
|
||||
identifier: Some(identifier),
|
||||
key: p.partition_key.to_string(),
|
||||
table_id: p.table_id.get(),
|
||||
array_sort_key: p.sort_key,
|
||||
|
@ -230,8 +271,7 @@ mod tests {
|
|||
let p1params = ParquetFileParams {
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
partition_hash_id: partition.hash_id().cloned(),
|
||||
partition_id: partition.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
min_time: Timestamp::new(1),
|
||||
max_time: Timestamp::new(5),
|
||||
|
@ -248,13 +288,15 @@ mod tests {
|
|||
};
|
||||
p1 = repos.parquet_files().create(p1params).await.unwrap();
|
||||
p2 = repos.parquet_files().create(p2params).await.unwrap();
|
||||
partition_id = partition.id;
|
||||
partition_id = partition.transition_partition_id();
|
||||
Arc::clone(&catalog)
|
||||
};
|
||||
|
||||
let partition_identifier = to_partition_identifier(&partition_id);
|
||||
|
||||
let grpc = super::CatalogService::new(catalog);
|
||||
let request = GetParquetFilesByPartitionIdRequest {
|
||||
partition_id: partition_id.get(),
|
||||
partition_identifier: Some(partition_identifier),
|
||||
};
|
||||
|
||||
let tonic_response = grpc
|
||||
|
|
|
@ -75,7 +75,7 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
|
|||
let path = ParquetFilePath::new(
|
||||
parquet_file.namespace_id,
|
||||
parquet_file.table_id,
|
||||
&parquet_file.transition_partition_id(),
|
||||
&parquet_file.partition_id.clone(),
|
||||
parquet_file.object_store_id,
|
||||
);
|
||||
let path = path.object_store_path();
|
||||
|
@ -128,8 +128,7 @@ mod tests {
|
|||
let p1params = ParquetFileParams {
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
partition_hash_id: partition.hash_id().cloned(),
|
||||
partition_id: partition.transition_partition_id(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
min_time: Timestamp::new(1),
|
||||
max_time: Timestamp::new(5),
|
||||
|
@ -150,7 +149,7 @@ mod tests {
|
|||
let path = ParquetFilePath::new(
|
||||
p1.namespace_id,
|
||||
p1.table_id,
|
||||
&p1.transition_partition_id(),
|
||||
&p1.partition_id.clone(),
|
||||
p1.object_store_id,
|
||||
);
|
||||
let path = path.object_store_path();
|
||||
|
|
|
@ -25,3 +25,4 @@ sysinfo = "0.29.7"
|
|||
tempfile = "3.7.0"
|
||||
# Need the multi-threaded executor for testing
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] }
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
use std::{borrow::Cow, path::PathBuf, time::Duration};
|
||||
use std::borrow::Cow;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use metric::{Attributes, U64Gauge};
|
||||
use sysinfo::{DiskExt, RefreshKind, System, SystemExt};
|
||||
use tokio::sync::watch;
|
||||
|
||||
/// The interval at which disk metrics are updated.
|
||||
///
|
||||
|
@ -9,6 +12,32 @@ use sysinfo::{DiskExt, RefreshKind, System, SystemExt};
|
|||
/// interval.
|
||||
const UPDATE_INTERVAL: Duration = Duration::from_secs(13);
|
||||
|
||||
/// An immutable snapshot of space and usage statistics for some disk.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct DiskSpaceSnapshot {
|
||||
available_disk_space: u64,
|
||||
total_disk_space: u64,
|
||||
}
|
||||
|
||||
impl DiskSpaceSnapshot {
|
||||
/// The available space in bytes on the disk.
|
||||
pub fn available_disk_space(&self) -> u64 {
|
||||
self.available_disk_space
|
||||
}
|
||||
|
||||
/// The maximum capacity in bytes of the disk.
|
||||
pub fn total_disk_space(&self) -> u64 {
|
||||
self.total_disk_space
|
||||
}
|
||||
|
||||
/// Overall usage of the disk, as a percentage [0.0, 1.0].
|
||||
#[inline]
|
||||
pub fn disk_usage_ratio(&self) -> f64 {
|
||||
debug_assert!(self.available_disk_space <= self.total_disk_space);
|
||||
1.0 - (self.available_disk_space as f64 / self.total_disk_space as f64)
|
||||
}
|
||||
}
|
||||
|
||||
/// A periodic reporter of disk capacity / free statistics for a given
|
||||
/// directory.
|
||||
#[derive(Debug)]
|
||||
|
@ -22,12 +51,19 @@ pub struct DiskSpaceMetrics {
|
|||
/// The index into [`System::disks()`] for the disk containing the observed
|
||||
/// directory.
|
||||
disk_idx: usize,
|
||||
|
||||
/// A stream of [`DiskSpaceSnapshot`] produced by the metric reporter for
|
||||
/// consumption by any listeners.
|
||||
snapshot_tx: watch::Sender<DiskSpaceSnapshot>,
|
||||
}
|
||||
|
||||
impl DiskSpaceMetrics {
|
||||
/// Create a new [`DiskSpaceMetrics`], returning [`None`] if no disk can be
|
||||
/// found for the specified `directory`.
|
||||
pub fn new(directory: PathBuf, registry: &metric::Registry) -> Option<Self> {
|
||||
pub fn new(
|
||||
directory: PathBuf,
|
||||
registry: &metric::Registry,
|
||||
) -> Option<(Self, watch::Receiver<DiskSpaceSnapshot>)> {
|
||||
let path: Cow<'static, str> = Cow::from(directory.display().to_string());
|
||||
let mut directory = directory.canonicalize().ok()?;
|
||||
|
||||
|
@ -52,14 +88,14 @@ impl DiskSpaceMetrics {
|
|||
|
||||
// Resolve the mount point once.
|
||||
// The directory path may be `/path/to/dir` and the mount point is `/`.
|
||||
let disk_idx = loop {
|
||||
if let Some((idx, _disk)) = system
|
||||
let (disk_idx, initial_disk) = loop {
|
||||
if let Some((idx, disk)) = system
|
||||
.disks()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_idx, disk)| disk.mount_point() == directory)
|
||||
{
|
||||
break idx;
|
||||
break (idx, disk);
|
||||
}
|
||||
// The mount point for this directory could not be found.
|
||||
if !directory.pop() {
|
||||
|
@ -67,18 +103,26 @@ impl DiskSpaceMetrics {
|
|||
}
|
||||
};
|
||||
|
||||
Some(Self {
|
||||
available_disk_space,
|
||||
total_disk_space,
|
||||
system,
|
||||
disk_idx,
|
||||
})
|
||||
let (snapshot_tx, snapshot_rx) = watch::channel(DiskSpaceSnapshot {
|
||||
available_disk_space: initial_disk.available_space(),
|
||||
total_disk_space: initial_disk.total_space(),
|
||||
});
|
||||
|
||||
Some((
|
||||
Self {
|
||||
available_disk_space,
|
||||
total_disk_space,
|
||||
system,
|
||||
disk_idx,
|
||||
snapshot_tx,
|
||||
},
|
||||
snapshot_rx,
|
||||
))
|
||||
}
|
||||
|
||||
/// Start the [`DiskSpaceMetrics`] evaluation loop, blocking forever.
|
||||
pub async fn run(mut self) {
|
||||
let mut interval = tokio::time::interval(UPDATE_INTERVAL);
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
|
@ -93,6 +137,13 @@ impl DiskSpaceMetrics {
|
|||
|
||||
self.available_disk_space.set(disk.available_space());
|
||||
self.total_disk_space.set(disk.total_space());
|
||||
|
||||
// Produce and send a [`DiskSpaceSnapshot`] for any listeners
|
||||
// that might exist.
|
||||
_ = self.snapshot_tx.send(DiskSpaceSnapshot {
|
||||
available_disk_space: disk.available_space(),
|
||||
total_disk_space: disk.total_space(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -103,6 +154,7 @@ mod tests {
|
|||
|
||||
use metric::Metric;
|
||||
use tempfile::tempdir_in;
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -121,11 +173,9 @@ mod tests {
|
|||
|
||||
let registry = Arc::new(metric::Registry::new());
|
||||
|
||||
let _handle = tokio::spawn(
|
||||
DiskSpaceMetrics::new(pathbuf, ®istry)
|
||||
.expect("root always exists")
|
||||
.run(),
|
||||
);
|
||||
let (_handle, mut snapshot_rx) =
|
||||
DiskSpaceMetrics::new(pathbuf, ®istry).expect("root always exists");
|
||||
let _handle = tokio::spawn(_handle.run());
|
||||
|
||||
// Wait for the metric to be emitted and non-zero - this should be very
|
||||
// quick!
|
||||
|
@ -151,10 +201,45 @@ mod tests {
|
|||
.fetch();
|
||||
|
||||
if recorded_free_metric > 0 && recorded_total_metric > 0 {
|
||||
snapshot_rx
|
||||
.changed()
|
||||
.with_timeout_panic(Duration::from_secs(5))
|
||||
.await
|
||||
.expect("snapshot value should have changed");
|
||||
|
||||
let snapshot = *snapshot_rx.borrow();
|
||||
assert_eq!(snapshot.available_disk_space, recorded_free_metric);
|
||||
assert_eq!(snapshot.total_disk_space, recorded_total_metric);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Token test to assert disk usage ratio
|
||||
#[test]
|
||||
fn assert_disk_usage_ratio() {
|
||||
// 80% used
|
||||
let snapshot = DiskSpaceSnapshot {
|
||||
available_disk_space: 2000,
|
||||
total_disk_space: 10000,
|
||||
};
|
||||
assert_eq!(snapshot.disk_usage_ratio(), 0.8);
|
||||
|
||||
// 90% used
|
||||
let snapshot = DiskSpaceSnapshot {
|
||||
available_disk_space: 2000,
|
||||
total_disk_space: 20000,
|
||||
};
|
||||
assert_eq!(snapshot.disk_usage_ratio(), 0.9);
|
||||
|
||||
// Free!
|
||||
let snapshot = DiskSpaceSnapshot {
|
||||
available_disk_space: 42,
|
||||
total_disk_space: 42,
|
||||
};
|
||||
assert_eq!(snapshot.disk_usage_ratio(), 0.0);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue