Merge pull request #7640 from influxdata/cn/hide-shard
fix: Move shard concepts into only the catalogpull/24376/head
commit
92f1016e13
|
@ -325,7 +325,6 @@ influxdata.iox.ingester.v1.WriteService
|
|||
influxdata.iox.namespace.v1.NamespaceService
|
||||
influxdata.iox.object_store.v1.ObjectStoreService
|
||||
influxdata.iox.schema.v1.SchemaService
|
||||
influxdata.iox.sharder.v1.ShardService
|
||||
influxdata.platform.storage.IOxTesting
|
||||
influxdata.platform.storage.Storage
|
||||
```
|
||||
|
|
|
@ -213,7 +213,7 @@ impl CatalogDsnConfig {
|
|||
let mem = MemCatalog::new(metrics);
|
||||
|
||||
let mut txn = mem.start_transaction().await.context(CatalogSnafu)?;
|
||||
create_or_get_default_records(1, txn.deref_mut())
|
||||
create_or_get_default_records(txn.deref_mut())
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
txn.commit().await.context(CatalogSnafu)?;
|
||||
|
|
|
@ -385,7 +385,6 @@ fn make_parquet_files_sink(config: &Config) -> Arc<dyn ParquetFilesSink> {
|
|||
let parquet_file_sink = Arc::new(LoggingParquetFileSinkWrapper::new(
|
||||
DedicatedExecParquetFileSinkWrapper::new(
|
||||
ObjectStoreParquetFileSink::new(
|
||||
config.shard_id,
|
||||
config.parquet_store_scratchpad.clone(),
|
||||
Arc::clone(&config.time_provider),
|
||||
),
|
||||
|
|
|
@ -4,9 +4,7 @@ use std::{
|
|||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{
|
||||
ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, ShardId, Timestamp,
|
||||
};
|
||||
use data_types::{ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp};
|
||||
use datafusion::{
|
||||
arrow::{datatypes::SchemaRef, record_batch::RecordBatch},
|
||||
error::DataFusionError,
|
||||
|
@ -70,7 +68,6 @@ impl ParquetFileSink for MockParquetFileSink {
|
|||
let row_count = batches.iter().map(|b| b.num_rows()).sum::<usize>();
|
||||
let mut guard = self.records.lock().expect("not poisoned");
|
||||
let out = ((row_count > 0) || !self.filter_empty_files).then(|| ParquetFileParams {
|
||||
shard_id: ShardId::new(1),
|
||||
namespace_id: partition.namespace_id,
|
||||
table_id: partition.table.id,
|
||||
partition_id: partition.partition_id,
|
||||
|
@ -167,7 +164,6 @@ mod tests {
|
|||
.await
|
||||
.unwrap(),
|
||||
Some(ParquetFileParams {
|
||||
shard_id: ShardId::new(1),
|
||||
namespace_id: NamespaceId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
partition_id: PartitionId::new(1),
|
||||
|
@ -231,7 +227,6 @@ mod tests {
|
|||
.await
|
||||
.unwrap(),
|
||||
Some(ParquetFileParams {
|
||||
shard_id: ShardId::new(1),
|
||||
namespace_id: NamespaceId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
partition_id: PartitionId::new(1),
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::{fmt::Display, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber, ShardId};
|
||||
use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber};
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use iox_time::{Time, TimeProvider};
|
||||
use parquet_file::{
|
||||
|
@ -20,19 +20,13 @@ const MAX_SEQUENCE_NUMBER: i64 = 0;
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct ObjectStoreParquetFileSink {
|
||||
shared_id: ShardId,
|
||||
store: ParquetStorage,
|
||||
time_provider: Arc<dyn TimeProvider>,
|
||||
}
|
||||
|
||||
impl ObjectStoreParquetFileSink {
|
||||
pub fn new(
|
||||
shared_id: ShardId,
|
||||
store: ParquetStorage,
|
||||
time_provider: Arc<dyn TimeProvider>,
|
||||
) -> Self {
|
||||
pub fn new(store: ParquetStorage, time_provider: Arc<dyn TimeProvider>) -> Self {
|
||||
Self {
|
||||
shared_id,
|
||||
store,
|
||||
time_provider,
|
||||
}
|
||||
|
@ -57,7 +51,6 @@ impl ParquetFileSink for ObjectStoreParquetFileSink {
|
|||
let meta = IoxMetadata {
|
||||
object_store_id: Uuid::new_v4(),
|
||||
creation_timestamp: self.time_provider.now(),
|
||||
shard_id: self.shared_id,
|
||||
namespace_id: partition.namespace_id,
|
||||
namespace_name: partition.namespace_name.clone().into(),
|
||||
table_id: partition.table.id,
|
||||
|
|
|
@ -11,7 +11,6 @@ pub fn log_config(config: &Config) {
|
|||
// use struct unpack so we don't forget any members
|
||||
let Config {
|
||||
compaction_type,
|
||||
shard_id,
|
||||
// no need to print the internal state of the registry
|
||||
metric_registry: _,
|
||||
catalog,
|
||||
|
@ -59,7 +58,6 @@ pub fn log_config(config: &Config) {
|
|||
|
||||
info!(
|
||||
?compaction_type,
|
||||
shard_id=shard_id.get(),
|
||||
%catalog,
|
||||
%parquet_store_real,
|
||||
%parquet_store_scratchpad,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::{collections::HashSet, sync::Arc};
|
||||
|
||||
use data_types::{NamespaceId, PartitionId, ShardId, TableId};
|
||||
use data_types::{NamespaceId, PartitionId, TableId};
|
||||
use object_store::{memory::InMemory, DynObjectStore};
|
||||
use parquet_file::ParquetFilePath;
|
||||
use uuid::Uuid;
|
||||
|
@ -23,7 +23,6 @@ pub fn file_path(i: u128) -> ParquetFilePath {
|
|||
ParquetFilePath::new(
|
||||
NamespaceId::new(1),
|
||||
TableId::new(1),
|
||||
ShardId::new(1),
|
||||
PartitionId::new(1),
|
||||
Uuid::from_u128(i),
|
||||
)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
//! Config-related stuff.
|
||||
use std::{collections::HashSet, fmt::Display, num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::{PartitionId, ShardId, ShardIndex};
|
||||
use backoff::BackoffConfig;
|
||||
use data_types::PartitionId;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::exec::Executor;
|
||||
use iox_time::TimeProvider;
|
||||
|
@ -22,9 +22,6 @@ pub struct Config {
|
|||
/// Compaction type.
|
||||
pub compaction_type: CompactionType,
|
||||
|
||||
/// Shard Id
|
||||
pub shard_id: ShardId,
|
||||
|
||||
/// Metric registry.
|
||||
pub metric_registry: Arc<metric::Registry>,
|
||||
|
||||
|
@ -146,55 +143,6 @@ impl Config {
|
|||
pub fn max_compact_size_bytes(&self) -> usize {
|
||||
self.max_desired_file_size_bytes as usize * MIN_COMPACT_SIZE_MULTIPLE
|
||||
}
|
||||
|
||||
/// Fetch shard ID.
|
||||
///
|
||||
/// This is likely required to construct a [`Config`] object.
|
||||
pub async fn fetch_shard_id(
|
||||
catalog: Arc<dyn Catalog>,
|
||||
backoff_config: BackoffConfig,
|
||||
topic_name: String,
|
||||
shard_index: i32,
|
||||
) -> ShardId {
|
||||
// Get shardId from topic and shard_index
|
||||
// Fetch topic
|
||||
let topic = Backoff::new(&backoff_config)
|
||||
.retry_all_errors("topic_of_given_name", || async {
|
||||
catalog
|
||||
.repositories()
|
||||
.await
|
||||
.topics()
|
||||
.get_by_name(topic_name.as_str())
|
||||
.await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
if topic.is_none() {
|
||||
panic!("Topic {topic_name} not found");
|
||||
}
|
||||
let topic = topic.unwrap();
|
||||
|
||||
// Fetch shard
|
||||
let shard = Backoff::new(&backoff_config)
|
||||
.retry_all_errors("sahrd_of_given_index", || async {
|
||||
catalog
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.get_by_topic_id_and_shard_index(topic.id, ShardIndex::new(shard_index))
|
||||
.await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
match shard {
|
||||
Some(shard) => shard.id,
|
||||
None => {
|
||||
panic!("Topic {topic_name} and Shard Index {shard_index} not found")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shard config.
|
||||
|
|
|
@ -212,12 +212,12 @@ async fn test_compact_large_overlapes() {
|
|||
---
|
||||
- initial
|
||||
- "L1 "
|
||||
- "L1.4[6000,68000] 240s 2.66kb|------------------L1.4------------------| "
|
||||
- "L1.5[136000,136000] 300s 2.17kb |L1.5|"
|
||||
- "L1.4[6000,68000] 240s 2.65kb|------------------L1.4------------------| "
|
||||
- "L1.5[136000,136000] 300s 2.16kb |L1.5|"
|
||||
- "L2 "
|
||||
- "L2.1[8000,12000] 60s 1.8kb |L2.1| "
|
||||
- "L2.1[8000,12000] 60s 1.79kb |L2.1| "
|
||||
- "L2.2[20000,30000] 120s 2.61kb |L2.2| "
|
||||
- "L2.3[36000,36000] 180s 2.17kb |L2.3| "
|
||||
- "L2.3[36000,36000] 180s 2.16kb |L2.3| "
|
||||
"###
|
||||
);
|
||||
|
||||
|
@ -233,7 +233,7 @@ async fn test_compact_large_overlapes() {
|
|||
- "L2 "
|
||||
- "L2.6[6000,36000] 300s 2.71kb|-------L2.6-------| "
|
||||
- "L2.7[68000,68000] 300s 2.51kb |L2.7| "
|
||||
- "L2.8[136000,136000] 300s 2.55kb |L2.8|"
|
||||
- "L2.8[136000,136000] 300s 2.54kb |L2.8|"
|
||||
"###
|
||||
);
|
||||
|
||||
|
@ -323,11 +323,11 @@ async fn test_compact_large_overlape_2() {
|
|||
- initial
|
||||
- "L1 "
|
||||
- "L1.4[6000,25000] 240s 1.8kb|---L1.4----| "
|
||||
- "L1.5[28000,136000] 300s 2.65kb |----------------------------------L1.5----------------------------------| "
|
||||
- "L1.5[28000,136000] 300s 2.64kb |----------------------------------L1.5----------------------------------| "
|
||||
- "L2 "
|
||||
- "L2.1[8000,12000] 60s 1.8kb |L2.1| "
|
||||
- "L2.1[8000,12000] 60s 1.79kb |L2.1| "
|
||||
- "L2.2[20000,30000] 120s 2.61kb |L2.2| "
|
||||
- "L2.3[36000,36000] 180s 2.17kb |L2.3| "
|
||||
- "L2.3[36000,36000] 180s 2.16kb |L2.3| "
|
||||
"###
|
||||
);
|
||||
|
||||
|
@ -343,7 +343,7 @@ async fn test_compact_large_overlape_2() {
|
|||
- "L2 "
|
||||
- "L2.6[6000,36000] 300s 2.71kb|-------L2.6-------| "
|
||||
- "L2.7[68000,68000] 300s 2.51kb |L2.7| "
|
||||
- "L2.8[136000,136000] 300s 2.55kb |L2.8|"
|
||||
- "L2.8[136000,136000] 300s 2.54kb |L2.8|"
|
||||
"###
|
||||
);
|
||||
|
||||
|
|
|
@ -759,13 +759,13 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0 "
|
||||
- "L0.?[76,329] 1.04us 2.96mb|-------------------------------------L0.?--------------------------------------| "
|
||||
- "L0.?[330,356] 1.04us 322.99kb |-L0.?-| "
|
||||
- "**** Simulation run 71, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:"
|
||||
- "L0, all files 3.32mb "
|
||||
- "L0.166[357,670] 1.04us |-----------------------------------------L0.166-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.32mb total:"
|
||||
- "**** Simulation run 71, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:"
|
||||
- "L0, all files 3.66mb "
|
||||
- "L0.163[357,670] 1.04us |-----------------------------------------L0.163-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.66mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[357,658] 1.04us 3.19mb|----------------------------------------L0.?----------------------------------------| "
|
||||
- "L0.?[659,670] 1.04us 130.17kb |L0.?|"
|
||||
- "L0.?[357,658] 1.04us 3.52mb|----------------------------------------L0.?----------------------------------------| "
|
||||
- "L0.?[659,670] 1.04us 143.55kb |L0.?|"
|
||||
- "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:"
|
||||
- "L0, all files 2.36mb "
|
||||
- "L0.168[173,356] 1.04us |-----------------------------------------L0.168-----------------------------------------|"
|
||||
|
@ -815,13 +815,13 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0 "
|
||||
- "L0.?[357,658] 1.05us 3.19mb|----------------------------------------L0.?----------------------------------------| "
|
||||
- "L0.?[659,670] 1.05us 130.17kb |L0.?|"
|
||||
- "**** Simulation run 79, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:"
|
||||
- "L0, all files 3.66mb "
|
||||
- "L0.163[357,670] 1.04us |-----------------------------------------L0.163-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.66mb total:"
|
||||
- "**** Simulation run 79, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:"
|
||||
- "L0, all files 2.36mb "
|
||||
- "L0.179[173,356] 1.05us |-----------------------------------------L0.179-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 2.36mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[357,658] 1.04us 3.52mb|----------------------------------------L0.?----------------------------------------| "
|
||||
- "L0.?[659,670] 1.04us 143.55kb |L0.?|"
|
||||
- "L0.?[173,329] 1.05us 2.01mb|-----------------------------------L0.?-----------------------------------| "
|
||||
- "L0.?[330,356] 1.05us 355.83kb |---L0.?---| "
|
||||
- "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.33mb total:"
|
||||
- "L0, all files 3.33mb "
|
||||
- "L0.165[42,356] 1.04us |-----------------------------------------L0.165-----------------------------------------|"
|
||||
|
@ -829,13 +829,13 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0 "
|
||||
- "L0.?[42,329] 1.04us 3.04mb|--------------------------------------L0.?--------------------------------------| "
|
||||
- "L0.?[330,356] 1.04us 292.88kb |L0.?-| "
|
||||
- "**** Simulation run 81, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:"
|
||||
- "L0, all files 2.36mb "
|
||||
- "L0.179[173,356] 1.05us |-----------------------------------------L0.179-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 2.36mb total:"
|
||||
- "**** Simulation run 81, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:"
|
||||
- "L0, all files 3.32mb "
|
||||
- "L0.166[357,670] 1.04us |-----------------------------------------L0.166-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.32mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[173,329] 1.05us 2.01mb|-----------------------------------L0.?-----------------------------------| "
|
||||
- "L0.?[330,356] 1.05us 355.83kb |---L0.?---| "
|
||||
- "L0.?[357,658] 1.04us 3.19mb|----------------------------------------L0.?----------------------------------------| "
|
||||
- "L0.?[659,670] 1.04us 130.17kb |L0.?|"
|
||||
- "**** Simulation run 82, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4.03mb total:"
|
||||
- "L0, all files 4.03mb "
|
||||
- "L0.180[357,670] 1.05us |-----------------------------------------L0.180-----------------------------------------|"
|
||||
|
@ -963,14 +963,14 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0.?[649,658] 1.04us 131.79kb |L0.?|"
|
||||
- "**** Simulation run 98, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.52mb total:"
|
||||
- "L0, all files 3.52mb "
|
||||
- "L0.249[357,658] 1.04us |-----------------------------------------L0.249-----------------------------------------|"
|
||||
- "L0.233[357,658] 1.04us |-----------------------------------------L0.233-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.52mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[357,648] 1.04us 3.4mb|----------------------------------------L0.?-----------------------------------------| "
|
||||
- "L0.?[649,658] 1.04us 119.63kb |L0.?|"
|
||||
- "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.19mb total:"
|
||||
- "L0, all files 3.19mb "
|
||||
- "L0.233[357,658] 1.04us |-----------------------------------------L0.233-----------------------------------------|"
|
||||
- "L0.253[357,658] 1.04us |-----------------------------------------L0.253-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.19mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[357,648] 1.04us 3.08mb|----------------------------------------L0.?-----------------------------------------| "
|
||||
|
@ -1039,7 +1039,7 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0.?[671,966] 1.05us 3.14mb|---------------------------------------L0.?---------------------------------------| "
|
||||
- "L0.?[967,986] 1.05us 218.33kb |L0.?|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.249, L0.255, L0.261, L0.265"
|
||||
- " Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.253, L0.255, L0.261, L0.265"
|
||||
- " Creating 40 files"
|
||||
- "**** Simulation run 109, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[263]). 2 Input Files, 103.14mb total:"
|
||||
- "L0 "
|
||||
|
@ -1167,7 +1167,7 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0.?[264,329] 1.05us 715.93kb |-------L0.?-------| "
|
||||
- "**** Simulation run 126, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 2.01mb total:"
|
||||
- "L0, all files 2.01mb "
|
||||
- "L0.253[173,329] 1.05us |-----------------------------------------L0.253-----------------------------------------|"
|
||||
- "L0.249[173,329] 1.05us |-----------------------------------------L0.249-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 2.01mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[173,263] 1.05us 1.16mb|----------------------L0.?-----------------------| "
|
||||
|
@ -1194,7 +1194,7 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0.?[42,263] 1.05us 2.34mb|-------------------------------L0.?--------------------------------| "
|
||||
- "L0.?[264,329] 1.05us 715.93kb |-------L0.?-------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.251, L0.253, L0.257, L0.259, L0.263"
|
||||
- " Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.249, L0.251, L0.257, L0.259, L0.263"
|
||||
- " Creating 40 files"
|
||||
- "**** Simulation run 130, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[570, 876]). 9 Input Files, 229.77mb total:"
|
||||
- "L0 "
|
||||
|
@ -2050,7 +2050,7 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0.522[584,590] 1.04us 84.83kb |L0.522| "
|
||||
- "L0.455[591,648] 1.04us 702.84kb |L0.455| "
|
||||
- "L0.289[649,658] 1.04us 119.63kb |L0.289| "
|
||||
- "L0.250[659,670] 1.04us 143.55kb |L0.250| "
|
||||
- "L0.234[659,670] 1.04us 143.55kb |L0.234| "
|
||||
- "L0.523[671,870] 1.04us 2.34mb |-----L0.523-----| "
|
||||
- "L0.524[871,876] 1.04us 72.33kb |L0.524| "
|
||||
- "L0.388[877,932] 1.04us 675.04kb |L0.388| "
|
||||
|
@ -2063,7 +2063,7 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0.526[584,590] 1.04us 76.92kb |L0.526| "
|
||||
- "L0.459[591,648] 1.04us 637.32kb |L0.459| "
|
||||
- "L0.291[649,658] 1.04us 108.47kb |L0.291| "
|
||||
- "L0.234[659,670] 1.04us 130.17kb |L0.234| "
|
||||
- "L0.254[659,670] 1.04us 130.17kb |L0.254| "
|
||||
- "L0.527[671,870] 1.04us 2.12mb |-----L0.527-----| "
|
||||
- "L0.528[871,876] 1.04us 65.5kb |L0.528| "
|
||||
- "L0.392[877,966] 1.04us 982.47kb |L0.392| "
|
||||
|
@ -2119,7 +2119,7 @@ async fn random_backfill_empty_partition() {
|
|||
- "L0.344[173,263] 1.05us 1.16mb |L0.344| "
|
||||
- "L0.480[264,295] 1.05us 414.83kb |L0.480| "
|
||||
- "L0.481[296,329] 1.05us 454.98kb |L0.481| "
|
||||
- "L0.254[330,356] 1.05us 355.83kb |L0.254| "
|
||||
- "L0.250[330,356] 1.05us 355.83kb |L0.250| "
|
||||
- "L0.407[357,570] 1.05us 2.74mb |------L0.407------| "
|
||||
- "L0.543[571,583] 1.05us 160.2kb |L0.543| "
|
||||
- "L0.544[584,590] 1.05us 93.45kb |L0.544| "
|
||||
|
@ -3639,13 +3639,13 @@ async fn random_backfill_over_l2s() {
|
|||
- "L0 "
|
||||
- "L0.?[592,626] 1.03us 374.74kb|----------------L0.?-----------------| "
|
||||
- "L0.?[627,670] 1.03us 484.96kb |---------------------L0.?----------------------| "
|
||||
- "**** Simulation run 142, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:"
|
||||
- "L0, all files 817.09kb "
|
||||
- "L0.281[295,356] 1.03us |-----------------------------------------L0.281-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 817.09kb total:"
|
||||
- "**** Simulation run 142, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:"
|
||||
- "L0, all files 672.54kb "
|
||||
- "L0.279[295,356] 1.03us |-----------------------------------------L0.279-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 672.54kb total:"
|
||||
- "L0 "
|
||||
- "L0.?[295,334] 1.03us 522.4kb|-------------------------L0.?--------------------------| "
|
||||
- "L0.?[335,356] 1.03us 294.69kb |------------L0.?------------| "
|
||||
- "L0.?[295,334] 1.03us 429.99kb|-------------------------L0.?--------------------------| "
|
||||
- "L0.?[335,356] 1.03us 242.56kb |------------L0.?------------| "
|
||||
- "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677.02kb total:"
|
||||
- "L0, all files 677.02kb "
|
||||
- "L0.328[592,629] 1.03us |-----------------------------------------L0.328-----------------------------------------|"
|
||||
|
@ -3695,13 +3695,13 @@ async fn random_backfill_over_l2s() {
|
|||
- "L0 "
|
||||
- "L0.?[592,626] 1.04us 455.28kb|----------------L0.?-----------------| "
|
||||
- "L0.?[627,670] 1.04us 589.19kb |---------------------L0.?----------------------| "
|
||||
- "**** Simulation run 150, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:"
|
||||
- "L0, all files 672.54kb "
|
||||
- "L0.279[295,356] 1.03us |-----------------------------------------L0.279-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 672.54kb total:"
|
||||
- "**** Simulation run 150, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:"
|
||||
- "L0, all files 817.09kb "
|
||||
- "L0.289[295,356] 1.04us |-----------------------------------------L0.289-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 817.09kb total:"
|
||||
- "L0 "
|
||||
- "L0.?[295,334] 1.03us 429.99kb|-------------------------L0.?--------------------------| "
|
||||
- "L0.?[335,356] 1.03us 242.56kb |------------L0.?------------| "
|
||||
- "L0.?[295,334] 1.04us 522.4kb|-------------------------L0.?--------------------------| "
|
||||
- "L0.?[335,356] 1.04us 294.69kb |------------L0.?------------| "
|
||||
- "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1.02mb total:"
|
||||
- "L0, all files 1.02mb "
|
||||
- "L0.324[592,670] 1.03us |-----------------------------------------L0.324-----------------------------------------|"
|
||||
|
@ -3711,11 +3711,11 @@ async fn random_backfill_over_l2s() {
|
|||
- "L0.?[627,670] 1.03us 589.19kb |---------------------L0.?----------------------| "
|
||||
- "**** Simulation run 152, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:"
|
||||
- "L0, all files 817.09kb "
|
||||
- "L0.289[295,356] 1.04us |-----------------------------------------L0.289-----------------------------------------|"
|
||||
- "L0.281[295,356] 1.03us |-----------------------------------------L0.281-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 817.09kb total:"
|
||||
- "L0 "
|
||||
- "L0.?[295,334] 1.04us 522.4kb|-------------------------L0.?--------------------------| "
|
||||
- "L0.?[335,356] 1.04us 294.69kb |------------L0.?------------| "
|
||||
- "L0.?[295,334] 1.03us 522.4kb|-------------------------L0.?--------------------------| "
|
||||
- "L0.?[335,356] 1.03us 294.69kb |------------L0.?------------| "
|
||||
- "**** Simulation run 153, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677.02kb total:"
|
||||
- "L0, all files 677.02kb "
|
||||
- "L0.342[592,629] 1.04us |-----------------------------------------L0.342-----------------------------------------|"
|
||||
|
@ -4236,8 +4236,8 @@ async fn random_backfill_over_l2s() {
|
|||
- " Creating 32 files"
|
||||
- "**** Simulation run 223, type=split(CompactAndSplitOutput(ManySmallFiles))(split_times=[610]). 200 Input Files, 166.42mb total:"
|
||||
- "L0 "
|
||||
- "L0.402[295,334] 1.03us 429.99kb |L0.402| "
|
||||
- "L0.403[335,356] 1.03us 242.56kb |L0.403| "
|
||||
- "L0.386[295,334] 1.03us 429.99kb |L0.386| "
|
||||
- "L0.387[335,356] 1.03us 242.56kb |L0.387| "
|
||||
- "L0.319[358,591] 1.03us 2.48mb |-------L0.319-------| "
|
||||
- "L0.455[592,619] 1.03us 297.59kb |L0.455| "
|
||||
- "L0.456[620,626] 1.03us 77.15kb |L0.456| "
|
||||
|
@ -4247,8 +4247,8 @@ async fn random_backfill_over_l2s() {
|
|||
- "L0.458[904,986] 1.03us 918.23kb |L0.458|"
|
||||
- "L0.517[173,275] 1.03us 1.31mb |L0.517-| "
|
||||
- "L0.518[276,294] 1.03us 250.4kb |L0.518| "
|
||||
- "L0.386[295,334] 1.03us 522.4kb |L0.386| "
|
||||
- "L0.387[335,356] 1.03us 294.69kb |L0.387| "
|
||||
- "L0.406[295,334] 1.03us 522.4kb |L0.406| "
|
||||
- "L0.407[335,356] 1.03us 294.69kb |L0.407| "
|
||||
- "L0.216[357,357] 1.03us 0b |L0.216| "
|
||||
- "L0.323[358,591] 1.03us 3.01mb |-------L0.323-------| "
|
||||
- "L0.459[592,619] 1.03us 361.55kb |L0.459| "
|
||||
|
@ -4295,8 +4295,8 @@ async fn random_backfill_over_l2s() {
|
|||
- "L0.472[904,986] 1.04us 918.23kb |L0.472|"
|
||||
- "L0.525[173,275] 1.04us 1.31mb |L0.525-| "
|
||||
- "L0.526[276,294] 1.04us 250.4kb |L0.526| "
|
||||
- "L0.406[295,334] 1.04us 522.4kb |L0.406| "
|
||||
- "L0.407[335,356] 1.04us 294.69kb |L0.407| "
|
||||
- "L0.402[295,334] 1.04us 522.4kb |L0.402| "
|
||||
- "L0.403[335,356] 1.04us 294.69kb |L0.403| "
|
||||
- "L0.230[357,357] 1.04us 0b |L0.230| "
|
||||
- "L0.337[358,591] 1.04us 3.01mb |-------L0.337-------| "
|
||||
- "L0.473[592,619] 1.04us 361.55kb |L0.473| "
|
||||
|
|
|
@ -858,11 +858,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L0.?[171444,200000] 5ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 51, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
|
||||
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 8ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "L0.?[171443,171443] 6ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
|
||||
|
@ -879,18 +879,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L0.?[171444,200000] 10ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 6ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 7ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 8ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
|
||||
- " Creating 55 files"
|
||||
|
@ -1213,7 +1213,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L0.?[156351,160867] 6ns 208.25kb |--------L0.?--------| "
|
||||
- "**** Simulation run 95, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.176[171444,200000] 6ns|-----------------------------------------L0.176-----------------------------------------|"
|
||||
- "L0.170[171444,200000] 6ns|-----------------------------------------L0.170-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171444,198370] 6ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
|
||||
|
@ -1227,7 +1227,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L0.?[156351,160867] 7ns 208.25kb |--------L0.?--------| "
|
||||
- "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.178[171444,200000] 7ns|-----------------------------------------L0.178-----------------------------------------|"
|
||||
- "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171444,198370] 7ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
|
||||
|
@ -1241,7 +1241,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L0.?[156351,160867] 8ns 208.25kb |--------L0.?--------| "
|
||||
- "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.170[171444,200000] 8ns|-----------------------------------------L0.170-----------------------------------------|"
|
||||
- "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171444,198370] 8ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
|
||||
|
@ -1389,7 +1389,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L0.257[142887,156350] 6ns 620.71kb |---L0.257---| "
|
||||
- "L0.258[156351,160867] 6ns 208.25kb |L0.258| "
|
||||
- "L0.186[160868,171442] 6ns 487.56kb |-L0.186--| "
|
||||
- "L0.175[171443,171443] 6ns 0b |L0.175| "
|
||||
- "L0.169[171443,171443] 6ns 0b |L0.169| "
|
||||
- "L0.259[171444,198370] 6ns 1.21mb |----------L0.259----------| "
|
||||
- "L0.260[198371,200000] 6ns 75.17kb |L0.260|"
|
||||
- "L1 "
|
||||
|
@ -1404,7 +1404,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L1.?[149666,185000] 6ns 10mb |---------------L1.?----------------| "
|
||||
- "L1.?[185001,200000] 6ns 4.25mb |----L1.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.175, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
|
||||
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.169, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 116, type=split(HighL0OverlapTotalBacklog)(split_times=[142886]). 1 Input Files, 10mb total:"
|
||||
- "L1, all files 10mb "
|
||||
|
@ -1743,7 +1743,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 19.54mb total:"
|
||||
- "L0 "
|
||||
- "L0.190[160868,171442] 7ns 487.56kb |----L0.190----| "
|
||||
- "L0.177[171443,171443] 7ns 0b |L0.177| "
|
||||
- "L0.175[171443,171443] 7ns 0b |L0.175| "
|
||||
- "L0.309[171444,185000] 7ns 625.13kb |------L0.309------| "
|
||||
- "L0.310[185001,198370] 7ns 616.55kb |------L0.310------| "
|
||||
- "L0.264[198371,200000] 7ns 75.17kb |L0.264|"
|
||||
|
@ -1756,7 +1756,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------| "
|
||||
- "L1.?[170978,200000] 7ns 9.54mb |------------------L1.?-------------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 8 files: L0.177, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
|
||||
- " Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
|
||||
- " Creating 2 files"
|
||||
- "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 487.56kb total:"
|
||||
- "L0, all files 487.56kb "
|
||||
|
@ -1924,7 +1924,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L0.266[156351,160867] 8ns 208.25kb |L0.266| "
|
||||
- "L0.387[160868,170977] 8ns 466.12kb |---L0.387----| "
|
||||
- "L0.388[170978,171442] 8ns 21.44kb |L0.388| "
|
||||
- "L0.169[171443,171443] 8ns 0b |L0.169| "
|
||||
- "L0.177[171443,171443] 8ns 0b |L0.177| "
|
||||
- "L0.313[171444,185000] 8ns 625.13kb |------L0.313------| "
|
||||
- "L0.314[185001,198370] 8ns 616.55kb |------L0.314------| "
|
||||
- "L0.268[198371,200000] 8ns 75.17kb |L0.268|"
|
||||
|
@ -1937,7 +1937,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
|
|||
- "L1.?[167315,194064] 8ns 10mb |-----------------L1.?-----------------| "
|
||||
- "L1.?[194065,200000] 8ns 2.22mb |-L1.?-| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 13 files: L0.159, L0.169, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
|
||||
- " Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466.12kb total:"
|
||||
- "L0, all files 466.12kb "
|
||||
|
@ -2812,11 +2812,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L0.?[171444,200000] 5ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 51, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
|
||||
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 8ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "L0.?[171443,171443] 6ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
|
||||
|
@ -2833,18 +2833,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L0.?[171444,200000] 10ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 6ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 7ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171443,171443] 8ns 0b|L0.?| "
|
||||
- "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
|
||||
- " Creating 55 files"
|
||||
|
@ -3167,7 +3167,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L0.?[156351,160867] 6ns 208.25kb |--------L0.?--------| "
|
||||
- "**** Simulation run 95, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.176[171444,200000] 6ns|-----------------------------------------L0.176-----------------------------------------|"
|
||||
- "L0.170[171444,200000] 6ns|-----------------------------------------L0.170-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171444,198370] 6ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
|
||||
|
@ -3181,7 +3181,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L0.?[156351,160867] 7ns 208.25kb |--------L0.?--------| "
|
||||
- "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.178[171444,200000] 7ns|-----------------------------------------L0.178-----------------------------------------|"
|
||||
- "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171444,198370] 7ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
|
||||
|
@ -3195,7 +3195,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L0.?[156351,160867] 8ns 208.25kb |--------L0.?--------| "
|
||||
- "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
|
||||
- "L0, all files 1.29mb "
|
||||
- "L0.170[171444,200000] 8ns|-----------------------------------------L0.170-----------------------------------------|"
|
||||
- "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[171444,198370] 8ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
|
||||
|
@ -3343,7 +3343,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L0.257[142887,156350] 6ns 620.71kb |---L0.257---| "
|
||||
- "L0.258[156351,160867] 6ns 208.25kb |L0.258| "
|
||||
- "L0.186[160868,171442] 6ns 487.56kb |-L0.186--| "
|
||||
- "L0.175[171443,171443] 6ns 0b |L0.175| "
|
||||
- "L0.169[171443,171443] 6ns 0b |L0.169| "
|
||||
- "L0.259[171444,198370] 6ns 1.21mb |----------L0.259----------| "
|
||||
- "L0.260[198371,200000] 6ns 75.17kb |L0.260|"
|
||||
- "L1 "
|
||||
|
@ -3358,7 +3358,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L1.?[149666,185000] 6ns 10mb |---------------L1.?----------------| "
|
||||
- "L1.?[185001,200000] 6ns 4.25mb |----L1.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.175, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
|
||||
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.169, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 116, type=split(HighL0OverlapTotalBacklog)(split_times=[142886]). 1 Input Files, 10mb total:"
|
||||
- "L1, all files 10mb "
|
||||
|
@ -3697,7 +3697,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 19.54mb total:"
|
||||
- "L0 "
|
||||
- "L0.190[160868,171442] 7ns 487.56kb |----L0.190----| "
|
||||
- "L0.177[171443,171443] 7ns 0b |L0.177| "
|
||||
- "L0.175[171443,171443] 7ns 0b |L0.175| "
|
||||
- "L0.309[171444,185000] 7ns 625.13kb |------L0.309------| "
|
||||
- "L0.310[185001,198370] 7ns 616.55kb |------L0.310------| "
|
||||
- "L0.264[198371,200000] 7ns 75.17kb |L0.264|"
|
||||
|
@ -3710,7 +3710,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------| "
|
||||
- "L1.?[170978,200000] 7ns 9.54mb |------------------L1.?-------------------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 8 files: L0.177, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
|
||||
- " Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
|
||||
- " Creating 2 files"
|
||||
- "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 487.56kb total:"
|
||||
- "L0, all files 487.56kb "
|
||||
|
@ -3878,7 +3878,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L0.266[156351,160867] 8ns 208.25kb |L0.266| "
|
||||
- "L0.387[160868,170977] 8ns 466.12kb |---L0.387----| "
|
||||
- "L0.388[170978,171442] 8ns 21.44kb |L0.388| "
|
||||
- "L0.169[171443,171443] 8ns 0b |L0.169| "
|
||||
- "L0.177[171443,171443] 8ns 0b |L0.177| "
|
||||
- "L0.313[171444,185000] 8ns 625.13kb |------L0.313------| "
|
||||
- "L0.314[185001,198370] 8ns 616.55kb |------L0.314------| "
|
||||
- "L0.268[198371,200000] 8ns 75.17kb |L0.268|"
|
||||
|
@ -3891,7 +3891,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
|
|||
- "L1.?[167315,194064] 8ns 10mb |-----------------L1.?-----------------| "
|
||||
- "L1.?[194065,200000] 8ns 2.22mb |-L1.?-| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 13 files: L0.159, L0.169, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
|
||||
- " Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
|
||||
- " Creating 3 files"
|
||||
- "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466.12kb total:"
|
||||
- "L0, all files 466.12kb "
|
||||
|
|
|
@ -73,43 +73,43 @@ async fn many_l0_files_different_created_order() {
|
|||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0, all files 2.55kb "
|
||||
- "L0, all files 2.54kb "
|
||||
- "L0.1[10,22] 1ns |---------L0.1----------| "
|
||||
- "L0.2[30,42] 2ns |---------L0.2----------| "
|
||||
- "L0.3[20,32] 3ns |---------L0.3----------| "
|
||||
- "L0.4[40,52] 4ns |---------L0.4----------| "
|
||||
- "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:"
|
||||
- "L0, all files 2.55kb "
|
||||
- "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:"
|
||||
- "L0, all files 2.54kb "
|
||||
- "L0.1[10,22] 1ns |-------------L0.1--------------| "
|
||||
- "L0.2[30,42] 2ns |-------------L0.2--------------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:"
|
||||
- "L0, all files 5.1kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:"
|
||||
- "L0, all files 5.09kb "
|
||||
- "L0.?[10,42] 2ns |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.1, L0.2"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:"
|
||||
- "L0, all files 2.55kb "
|
||||
- "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:"
|
||||
- "L0, all files 2.54kb "
|
||||
- "L0.3[20,32] 3ns |-------------L0.3--------------| "
|
||||
- "L0.4[40,52] 4ns |-------------L0.4--------------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:"
|
||||
- "L0, all files 5.1kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:"
|
||||
- "L0, all files 5.09kb "
|
||||
- "L0.?[20,52] 4ns |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.3, L0.4"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.2kb total:"
|
||||
- "L0, all files 5.1kb "
|
||||
- "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.18kb total:"
|
||||
- "L0, all files 5.09kb "
|
||||
- "L0.6[20,52] 4ns |-------------------------------L0.6-------------------------------| "
|
||||
- "L0.5[10,42] 2ns |-------------------------------L0.5-------------------------------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 10.2kb total:"
|
||||
- "L1, all files 10.2kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 10.18kb total:"
|
||||
- "L1, all files 10.18kb "
|
||||
- "L1.?[10,52] 4ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.5, L0.6"
|
||||
- " Creating 1 files"
|
||||
- "**** Final Output Files (20.39kb written)"
|
||||
- "L1, all files 10.2kb "
|
||||
- "**** Final Output Files (20.36kb written)"
|
||||
- "L1, all files 10.18kb "
|
||||
- "L1.7[10,52] 4ns |------------------------------------------L1.7------------------------------------------|"
|
||||
"###
|
||||
);
|
||||
|
@ -183,43 +183,43 @@ async fn many_l1_files_different_created_order() {
|
|||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L1, all files 2.55kb "
|
||||
- "L1, all files 2.54kb "
|
||||
- "L1.1[11,20] 1ns |-------L1.1-------| "
|
||||
- "L1.2[31,40] 2ns |-------L1.2-------| "
|
||||
- "L1.3[21,30] 3ns |-------L1.3-------| "
|
||||
- "L1.4[41,50] 4ns |-------L1.4-------| "
|
||||
- "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:"
|
||||
- "L1, all files 2.55kb "
|
||||
- "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:"
|
||||
- "L1, all files 2.54kb "
|
||||
- "L1.1[11,20] 1ns |------------------L1.1------------------| "
|
||||
- "L1.3[21,30] 3ns |------------------L1.3------------------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:"
|
||||
- "L1, all files 5.1kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:"
|
||||
- "L1, all files 5.09kb "
|
||||
- "L1.?[11,30] 3ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L1.1, L1.3"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:"
|
||||
- "L1, all files 2.55kb "
|
||||
- "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:"
|
||||
- "L1, all files 2.54kb "
|
||||
- "L1.2[31,40] 2ns |------------------L1.2------------------| "
|
||||
- "L1.4[41,50] 4ns |------------------L1.4------------------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:"
|
||||
- "L1, all files 5.1kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:"
|
||||
- "L1, all files 5.09kb "
|
||||
- "L1.?[31,50] 4ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L1.2, L1.4"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.2kb total:"
|
||||
- "L1, all files 5.1kb "
|
||||
- "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.18kb total:"
|
||||
- "L1, all files 5.09kb "
|
||||
- "L1.6[31,50] 4ns |------------------L1.6-------------------| "
|
||||
- "L1.5[11,30] 3ns |------------------L1.5-------------------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 10.2kb total:"
|
||||
- "L2, all files 10.2kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 10.18kb total:"
|
||||
- "L2, all files 10.18kb "
|
||||
- "L2.?[11,50] 4ns |------------------------------------------L2.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L1.5, L1.6"
|
||||
- " Creating 1 files"
|
||||
- "**** Final Output Files (20.39kb written)"
|
||||
- "L2, all files 10.2kb "
|
||||
- "**** Final Output Files (20.36kb written)"
|
||||
- "L2, all files 10.18kb "
|
||||
- "L2.7[11,50] 4ns |------------------------------------------L2.7------------------------------------------|"
|
||||
"###
|
||||
);
|
||||
|
@ -291,43 +291,43 @@ async fn many_l0_files_different_created_order_non_overlap() {
|
|||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0, all files 2.55kb "
|
||||
- "L0, all files 2.54kb "
|
||||
- "L0.1[11,20] 1ns |-------L0.1-------| "
|
||||
- "L0.2[31,40] 2ns |-------L0.2-------| "
|
||||
- "L0.3[21,30] 3ns |-------L0.3-------| "
|
||||
- "L0.4[41,50] 4ns |-------L0.4-------| "
|
||||
- "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:"
|
||||
- "L0, all files 2.55kb "
|
||||
- "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:"
|
||||
- "L0, all files 2.54kb "
|
||||
- "L0.1[11,20] 1ns |----------L0.1-----------| "
|
||||
- "L0.2[31,40] 2ns |----------L0.2-----------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:"
|
||||
- "L0, all files 5.1kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:"
|
||||
- "L0, all files 5.09kb "
|
||||
- "L0.?[11,40] 2ns |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.1, L0.2"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:"
|
||||
- "L0, all files 2.55kb "
|
||||
- "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:"
|
||||
- "L0, all files 2.54kb "
|
||||
- "L0.3[21,30] 3ns |----------L0.3-----------| "
|
||||
- "L0.4[41,50] 4ns |----------L0.4-----------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:"
|
||||
- "L0, all files 5.1kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:"
|
||||
- "L0, all files 5.09kb "
|
||||
- "L0.?[21,50] 4ns |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.3, L0.4"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.2kb total:"
|
||||
- "L0, all files 5.1kb "
|
||||
- "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.18kb total:"
|
||||
- "L0, all files 5.09kb "
|
||||
- "L0.6[21,50] 4ns |------------------------------L0.6------------------------------| "
|
||||
- "L0.5[11,40] 2ns |------------------------------L0.5------------------------------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 10.2kb total:"
|
||||
- "L1, all files 10.2kb "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 10.18kb total:"
|
||||
- "L1, all files 10.18kb "
|
||||
- "L1.?[11,50] 4ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L0.5, L0.6"
|
||||
- " Creating 1 files"
|
||||
- "**** Final Output Files (20.39kb written)"
|
||||
- "L1, all files 10.2kb "
|
||||
- "**** Final Output Files (20.36kb written)"
|
||||
- "L1, all files 10.18kb "
|
||||
- "L1.7[11,50] 4ns |------------------------------------------L1.7------------------------------------------|"
|
||||
"###
|
||||
);
|
||||
|
|
|
@ -37,7 +37,7 @@ use compactor2::{
|
|||
config::{CompactionType, Config, PartitionsSourceConfig},
|
||||
hardcoded_components, Components, PanicDataFusionPlanner, PartitionInfo,
|
||||
};
|
||||
use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId, TRANSITION_SHARD_NUMBER};
|
||||
use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId};
|
||||
use datafusion::arrow::record_batch::RecordBatch;
|
||||
use datafusion_util::config::register_iox_object_store;
|
||||
use futures::TryStreamExt;
|
||||
|
@ -45,7 +45,7 @@ use iox_catalog::interface::Catalog;
|
|||
use iox_query::exec::ExecutorType;
|
||||
use iox_tests::{
|
||||
ParquetFileBuilder, TestCatalog, TestNamespace, TestParquetFileBuilder, TestPartition,
|
||||
TestShard, TestTable,
|
||||
TestTable,
|
||||
};
|
||||
use iox_time::{MockProvider, Time, TimeProvider};
|
||||
use object_store::{path::Path, DynObjectStore};
|
||||
|
@ -54,7 +54,6 @@ use schema::sort::SortKey;
|
|||
use tracker::AsyncSemaphoreMetrics;
|
||||
|
||||
// Default values for the test setup builder
|
||||
const SHARD_INDEX: i32 = TRANSITION_SHARD_NUMBER;
|
||||
const PARTITION_THRESHOLD: Duration = Duration::from_secs(10 * 60); // 10min
|
||||
const MAX_DESIRE_FILE_SIZE: u64 = 100 * 1024;
|
||||
const PERCENTAGE_MAX_FILE_SIZE: u16 = 5;
|
||||
|
@ -70,7 +69,6 @@ pub struct TestSetupBuilder<const WITH_FILES: bool> {
|
|||
config: Config,
|
||||
catalog: Arc<TestCatalog>,
|
||||
ns: Arc<TestNamespace>,
|
||||
shard: Arc<TestShard>,
|
||||
table: Arc<TestTable>,
|
||||
partition: Arc<TestPartition>,
|
||||
files: Vec<ParquetFile>,
|
||||
|
@ -88,7 +86,6 @@ impl TestSetupBuilder<false> {
|
|||
pub async fn new() -> Self {
|
||||
let catalog = TestCatalog::new();
|
||||
let ns = catalog.create_namespace_1hr_retention("ns").await;
|
||||
let shard = ns.create_shard(SHARD_INDEX).await;
|
||||
let table = ns.create_table("table").await;
|
||||
table.create_column("field_int", ColumnType::I64).await;
|
||||
table.create_column("tag1", ColumnType::Tag).await;
|
||||
|
@ -96,10 +93,7 @@ impl TestSetupBuilder<false> {
|
|||
table.create_column("tag3", ColumnType::Tag).await;
|
||||
table.create_column("time", ColumnType::Time).await;
|
||||
|
||||
let partition = table
|
||||
.with_shard(&shard)
|
||||
.create_partition("2022-07-13")
|
||||
.await;
|
||||
let partition = table.create_partition("2022-07-13").await;
|
||||
|
||||
// The sort key comes from the catalog and should be the union of all tags the
|
||||
// ingester has seen
|
||||
|
@ -122,7 +116,6 @@ impl TestSetupBuilder<false> {
|
|||
|
||||
let config = Config {
|
||||
compaction_type: Default::default(),
|
||||
shard_id: shard.shard.id,
|
||||
metric_registry: catalog.metric_registry(),
|
||||
catalog: catalog.catalog(),
|
||||
parquet_store_real: catalog.parquet_store.clone(),
|
||||
|
@ -162,7 +155,6 @@ impl TestSetupBuilder<false> {
|
|||
config,
|
||||
catalog,
|
||||
ns,
|
||||
shard,
|
||||
table,
|
||||
partition,
|
||||
files: vec![],
|
||||
|
@ -299,7 +291,6 @@ impl TestSetupBuilder<false> {
|
|||
config: self.config,
|
||||
catalog: self.catalog,
|
||||
ns: self.ns,
|
||||
shard: self.shard,
|
||||
table: self.table,
|
||||
partition: self.partition,
|
||||
files,
|
||||
|
@ -333,7 +324,6 @@ impl TestSetupBuilder<false> {
|
|||
config: self.config.clone(),
|
||||
catalog: Arc::clone(&self.catalog),
|
||||
ns: Arc::clone(&self.ns),
|
||||
shard: Arc::clone(&self.shard),
|
||||
table: Arc::clone(&self.table),
|
||||
partition: Arc::clone(&self.partition),
|
||||
files,
|
||||
|
@ -368,7 +358,6 @@ impl TestSetupBuilder<false> {
|
|||
config: self.config.clone(),
|
||||
catalog: Arc::clone(&self.catalog),
|
||||
ns: Arc::clone(&self.ns),
|
||||
shard: Arc::clone(&self.shard),
|
||||
table: Arc::clone(&self.table),
|
||||
partition: Arc::clone(&self.partition),
|
||||
files,
|
||||
|
|
|
@ -8,7 +8,7 @@ use std::{
|
|||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{
|
||||
ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, SequenceNumber, ShardId, Timestamp,
|
||||
ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, SequenceNumber, Timestamp,
|
||||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use iox_time::Time;
|
||||
|
@ -202,7 +202,6 @@ impl SimulatedFile {
|
|||
} = self;
|
||||
|
||||
ParquetFileParams {
|
||||
shard_id: ShardId::new(1),
|
||||
namespace_id: partition_info.namespace_id,
|
||||
table_id: partition_info.table.id,
|
||||
partition_id: partition_info.partition_id,
|
||||
|
|
|
@ -24,7 +24,6 @@ use schema::{
|
|||
builder::SchemaBuilder, sort::SortKey, InfluxColumnType, InfluxFieldType, Schema,
|
||||
TIME_COLUMN_NAME,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
use sqlx::postgres::PgHasArrayType;
|
||||
use std::{
|
||||
borrow::Borrow,
|
||||
|
@ -38,13 +37,6 @@ use std::{
|
|||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Magic number to be used shard indices and shard ids in "kafkaless".
|
||||
pub const TRANSITION_SHARD_NUMBER: i32 = 1234;
|
||||
/// In kafkaless mode all new persisted data uses this shard id.
|
||||
pub const TRANSITION_SHARD_ID: ShardId = ShardId::new(TRANSITION_SHARD_NUMBER as i64);
|
||||
/// In kafkaless mode all new persisted data uses this shard index.
|
||||
pub const TRANSITION_SHARD_INDEX: ShardIndex = ShardIndex::new(TRANSITION_SHARD_NUMBER);
|
||||
|
||||
/// Compaction levels
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, sqlx::Type)]
|
||||
#[repr(i16)]
|
||||
|
@ -215,61 +207,6 @@ impl PgHasArrayType for ColumnId {
|
|||
}
|
||||
}
|
||||
|
||||
/// Unique ID for a `Shard`, assigned by the catalog. Joins to other catalog tables to uniquely
|
||||
/// identify shards independently of the underlying write buffer implementation.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
pub struct ShardId(i64);
|
||||
|
||||
#[allow(missing_docs)]
|
||||
impl ShardId {
|
||||
pub const fn new(v: i64) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
pub fn get(&self) -> i64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// The index of the shard in the set of shards. When Kafka is used as the write buffer, this is
|
||||
/// the Kafka Partition ID. Used by the router and write buffer to shard requests to a particular
|
||||
/// index in a set of shards.
|
||||
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
#[serde(transparent)]
|
||||
pub struct ShardIndex(i32);
|
||||
|
||||
#[allow(missing_docs)]
|
||||
impl ShardIndex {
|
||||
pub const fn new(v: i32) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
pub fn get(&self) -> i32 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardIndex {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for ShardIndex {
|
||||
type Err = std::num::ParseIntError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let v: i32 = s.parse()?;
|
||||
Ok(Self(v))
|
||||
}
|
||||
}
|
||||
|
||||
/// Unique ID for a `Partition`
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type, sqlx::FromRow)]
|
||||
#[sqlx(transparent)]
|
||||
|
@ -769,24 +706,6 @@ pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType {
|
|||
}
|
||||
}
|
||||
|
||||
/// Data object for a shard. Only one shard record can exist for a given topic and shard
|
||||
/// index (enforced via uniqueness constraint).
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, sqlx::FromRow)]
|
||||
pub struct Shard {
|
||||
/// the id of the shard, assigned by the catalog
|
||||
pub id: ShardId,
|
||||
/// the topic the shard is reading from
|
||||
pub topic_id: TopicId,
|
||||
/// the shard index of the shard the sequence numbers are coming from, sharded by the router
|
||||
/// and write buffer
|
||||
pub shard_index: ShardIndex,
|
||||
/// The minimum unpersisted sequence number. Because different tables
|
||||
/// can be persisted at different times, it is possible some data has been persisted
|
||||
/// with a higher sequence number than this. However, all data with a sequence number
|
||||
/// lower than this must have been persisted to Parquet.
|
||||
pub min_unpersisted_sequence_number: SequenceNumber,
|
||||
}
|
||||
|
||||
/// Defines an partition via an arbitrary string within a table within
|
||||
/// a namespace.
|
||||
///
|
||||
|
@ -880,8 +799,6 @@ impl sqlx::Decode<'_, sqlx::Sqlite> for PartitionKey {
|
|||
pub struct Partition {
|
||||
/// the id of the partition
|
||||
pub id: PartitionId,
|
||||
/// the shard the data in the partition arrived from
|
||||
pub shard_id: ShardId,
|
||||
/// the table the partition is under
|
||||
pub table_id: TableId,
|
||||
/// the string key of the partition
|
||||
|
@ -1020,8 +937,6 @@ impl Deref for ColumnSet {
|
|||
pub struct ParquetFile {
|
||||
/// the id of the file in the catalog
|
||||
pub id: ParquetFileId,
|
||||
/// the shard that sequenced writes that went into this file
|
||||
pub shard_id: ShardId,
|
||||
/// the namespace
|
||||
pub namespace_id: NamespaceId,
|
||||
/// the table
|
||||
|
@ -1084,7 +999,6 @@ impl ParquetFile {
|
|||
pub fn from_params(params: ParquetFileParams, id: ParquetFileId) -> Self {
|
||||
Self {
|
||||
id,
|
||||
shard_id: params.shard_id,
|
||||
namespace_id: params.namespace_id,
|
||||
table_id: params.table_id,
|
||||
partition_id: params.partition_id,
|
||||
|
@ -1122,8 +1036,6 @@ impl ParquetFile {
|
|||
/// Data for a parquet file to be inserted into the catalog.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ParquetFileParams {
|
||||
/// the shard that sequenced writes that went into this file
|
||||
pub shard_id: ShardId,
|
||||
/// the namespace
|
||||
pub namespace_id: NamespaceId,
|
||||
/// the table
|
||||
|
@ -1155,7 +1067,6 @@ pub struct ParquetFileParams {
|
|||
impl From<ParquetFile> for ParquetFileParams {
|
||||
fn from(value: ParquetFile) -> Self {
|
||||
Self {
|
||||
shard_id: value.shard_id,
|
||||
namespace_id: value.namespace_id,
|
||||
table_id: value.table_id,
|
||||
partition_id: value.partition_id,
|
||||
|
|
|
@ -138,7 +138,7 @@ mod tests {
|
|||
use chrono::TimeZone;
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileParams,
|
||||
PartitionId, SequenceNumber, ShardId, ShardIndex, TableId, Timestamp,
|
||||
PartitionId, SequenceNumber, TableId, Timestamp,
|
||||
};
|
||||
use iox_catalog::{interface::Catalog, mem::MemCatalog};
|
||||
use object_store::path::Path;
|
||||
|
@ -167,19 +167,13 @@ mod tests {
|
|||
.create_or_get("test_table", namespace.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1))
|
||||
.await
|
||||
.unwrap();
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get("one".into(), shard.id, table.id)
|
||||
.create_or_get("one".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: partition.table_id,
|
||||
partition_id: partition.id,
|
||||
|
@ -213,7 +207,6 @@ mod tests {
|
|||
let location = ParquetFilePath::new(
|
||||
file_in_catalog.namespace_id,
|
||||
file_in_catalog.table_id,
|
||||
file_in_catalog.shard_id,
|
||||
file_in_catalog.partition_id,
|
||||
file_in_catalog.object_store_id,
|
||||
)
|
||||
|
@ -241,7 +234,6 @@ mod tests {
|
|||
let location = ParquetFilePath::new(
|
||||
NamespaceId::new(1),
|
||||
TableId::new(2),
|
||||
ShardId::new(3),
|
||||
PartitionId::new(4),
|
||||
Uuid::new_v4(),
|
||||
)
|
||||
|
@ -287,7 +279,6 @@ mod tests {
|
|||
let location = ParquetFilePath::new(
|
||||
file_in_catalog.namespace_id,
|
||||
file_in_catalog.table_id,
|
||||
file_in_catalog.shard_id,
|
||||
file_in_catalog.partition_id,
|
||||
file_in_catalog.object_store_id,
|
||||
)
|
||||
|
@ -315,7 +306,6 @@ mod tests {
|
|||
let location = ParquetFilePath::new(
|
||||
NamespaceId::new(1),
|
||||
TableId::new(2),
|
||||
ShardId::new(3),
|
||||
PartitionId::new(4),
|
||||
Uuid::new_v4(),
|
||||
)
|
||||
|
|
|
@ -64,7 +64,7 @@ mod tests {
|
|||
use super::*;
|
||||
use bytes::Bytes;
|
||||
use chrono::Utc;
|
||||
use data_types::{NamespaceId, PartitionId, ShardId, TableId};
|
||||
use data_types::{NamespaceId, PartitionId, TableId};
|
||||
use object_store::path::Path;
|
||||
use parquet_file::ParquetFilePath;
|
||||
use std::time::Duration;
|
||||
|
@ -146,7 +146,6 @@ mod tests {
|
|||
ParquetFilePath::new(
|
||||
NamespaceId::new(1),
|
||||
TableId::new(2),
|
||||
ShardId::new(3),
|
||||
PartitionId::new(4),
|
||||
Uuid::new_v4(),
|
||||
)
|
||||
|
|
|
@ -28,7 +28,6 @@ fn main() -> Result<()> {
|
|||
/// - `influxdata.iox.predicate.v1.rs`
|
||||
/// - `influxdata.iox.querier.v1.rs`
|
||||
/// - `influxdata.iox.schema.v1.rs`
|
||||
/// - `influxdata.iox.sharder.v1.rs`
|
||||
/// - `influxdata.iox.wal.v1.rs`
|
||||
/// - `influxdata.iox.write.v1.rs`
|
||||
/// - `influxdata.iox.write_buffer.v1.rs`
|
||||
|
@ -44,7 +43,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
let predicate_path = root.join("influxdata/iox/predicate/v1");
|
||||
let querier_path = root.join("influxdata/iox/querier/v1");
|
||||
let schema_path = root.join("influxdata/iox/schema/v1");
|
||||
let sharder_path = root.join("influxdata/iox/sharder/v1");
|
||||
let wal_path = root.join("influxdata/iox/wal/v1");
|
||||
let write_buffer_path = root.join("influxdata/iox/write_buffer/v1");
|
||||
let storage_path = root.join("influxdata/platform/storage");
|
||||
|
@ -71,7 +69,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
root.join("grpc/health/v1/service.proto"),
|
||||
root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"),
|
||||
schema_path.join("service.proto"),
|
||||
sharder_path.join("sharder.proto"),
|
||||
wal_path.join("wal.proto"),
|
||||
write_buffer_path.join("write_buffer.proto"),
|
||||
storage_path.join("predicate.proto"),
|
||||
|
|
|
@ -16,6 +16,9 @@ message IoxMetadata {
|
|||
// Renamed to shard_id
|
||||
reserved 5;
|
||||
reserved "sequencer_id";
|
||||
// shard_id was removed
|
||||
reserved 17;
|
||||
reserved "shard_id";
|
||||
|
||||
// Object store ID. Used in the parquet filename. 16 bytes in big-endian order.
|
||||
bytes object_store_id = 1;
|
||||
|
@ -29,9 +32,6 @@ message IoxMetadata {
|
|||
// Unique name of the namespace.
|
||||
string namespace_name = 4;
|
||||
|
||||
// Unique shard ID.
|
||||
int64 shard_id = 17;
|
||||
|
||||
// Unique table ID.
|
||||
int64 table_id = 6;
|
||||
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.sharder.v1;
|
||||
option go_package = "github.com/influxdata/iox/sharder/v1";
|
||||
|
||||
service ShardService {
|
||||
// Shard the given inputs to a Catalog ID for the destination Shard
|
||||
// (Shard ID).
|
||||
rpc MapToShard(MapToShardRequest) returns (MapToShardResponse);
|
||||
}
|
||||
|
||||
message MapToShardRequest {
|
||||
// The input values to map onto a Shard.
|
||||
string table_name = 1;
|
||||
string namespace_name = 2;
|
||||
}
|
||||
|
||||
message MapToShardResponse {
|
||||
int64 shard_id = 1;
|
||||
int32 shard_index = 2;
|
||||
}
|
|
@ -167,16 +167,6 @@ pub mod influxdata {
|
|||
}
|
||||
}
|
||||
|
||||
pub mod sharder {
|
||||
pub mod v1 {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.sharder.v1.rs"));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.iox.sharder.v1.serde.rs"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
pub mod wal {
|
||||
pub mod v1 {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.wal.v1.rs"));
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
use self::generated_types::{shard_service_client::ShardServiceClient, *};
|
||||
use crate::{AggregateTSMMeasurement, AggregateTSMSchema};
|
||||
use chrono::{format::StrftimeItems, offset::FixedOffset, DateTime, Duration};
|
||||
use data_types::{
|
||||
ColumnType, Namespace, NamespaceName, NamespaceSchema, OrgBucketMappingError, Partition,
|
||||
PartitionKey, QueryPoolId, ShardId, TableSchema, TopicId,
|
||||
PartitionKey, QueryPoolId, TableSchema, TopicId,
|
||||
};
|
||||
use influxdb_iox_client::connection::{Connection, GrpcConnection};
|
||||
use iox_catalog::interface::{
|
||||
get_schema_by_name, CasFailure, Catalog, RepoCollection, SoftDeletedRows,
|
||||
};
|
||||
|
@ -16,10 +14,6 @@ use schema::{
|
|||
use std::{collections::HashMap, fmt::Write, ops::DerefMut, sync::Arc};
|
||||
use thiserror::Error;
|
||||
|
||||
pub mod generated_types {
|
||||
pub use generated_types::influxdata::iox::sharder::v1::*;
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum UpdateCatalogError {
|
||||
#[error("Error returned from the Catalog: {0}")]
|
||||
|
@ -45,9 +39,6 @@ pub enum UpdateCatalogError {
|
|||
|
||||
#[error("Time calculation error when deriving partition key: {0}")]
|
||||
PartitionKeyCalculationError(String),
|
||||
|
||||
#[error("Error fetching shard ID from shard service: {0}")]
|
||||
ShardServiceError(#[from] tonic::Status),
|
||||
}
|
||||
|
||||
/// Given a merged schema, update the IOx catalog to either merge that schema into the existing one
|
||||
|
@ -61,7 +52,6 @@ pub async fn update_iox_catalog<'a>(
|
|||
topic: &'a str,
|
||||
query_pool_name: &'a str,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
connection: Connection,
|
||||
) -> Result<(), UpdateCatalogError> {
|
||||
let namespace_name =
|
||||
NamespaceName::from_org_and_bucket(&merged_tsm_schema.org_id, &merged_tsm_schema.bucket_id)
|
||||
|
@ -103,18 +93,8 @@ pub async fn update_iox_catalog<'a>(
|
|||
return Err(UpdateCatalogError::CatalogError(e));
|
||||
}
|
||||
};
|
||||
// initialise a client of the shard service in the router. we will use it to find out which
|
||||
// shard a table/namespace combo would shard to, without exposing the implementation
|
||||
// details of the sharding
|
||||
let mut shard_client = ShardServiceClient::new(connection.into_grpc_connection());
|
||||
update_catalog_schema_with_merged(
|
||||
namespace_name.as_str(),
|
||||
iox_schema,
|
||||
merged_tsm_schema,
|
||||
repos.deref_mut(),
|
||||
&mut shard_client,
|
||||
)
|
||||
.await?;
|
||||
|
||||
update_catalog_schema_with_merged(iox_schema, merged_tsm_schema, repos.deref_mut()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -179,11 +159,9 @@ where
|
|||
/// This is basically the same as iox_catalog::validate_mutable_batch() but operates on
|
||||
/// AggregateTSMSchema instead of a MutableBatch (we don't have any data, only a schema)
|
||||
async fn update_catalog_schema_with_merged<R>(
|
||||
namespace_name: &str,
|
||||
iox_schema: NamespaceSchema,
|
||||
merged_tsm_schema: &AggregateTSMSchema,
|
||||
repos: &mut R,
|
||||
shard_client: &mut ShardServiceClient<GrpcConnection>,
|
||||
) -> Result<(), UpdateCatalogError>
|
||||
where
|
||||
R: RepoCollection + ?Sized,
|
||||
|
@ -290,19 +268,12 @@ where
|
|||
// date, but this is what the router logic currently does so that would need to change too.
|
||||
let partition_keys =
|
||||
get_partition_keys_for_range(measurement.earliest_time, measurement.latest_time)?;
|
||||
let response = shard_client
|
||||
.map_to_shard(tonic::Request::new(MapToShardRequest {
|
||||
table_name: measurement_name.clone(),
|
||||
namespace_name: namespace_name.to_string(),
|
||||
}))
|
||||
.await?;
|
||||
let shard_id = ShardId::new(response.into_inner().shard_id);
|
||||
for partition_key in partition_keys {
|
||||
// create the partition if it doesn't exist; new partitions get an empty sort key which
|
||||
// gets matched as `None`` in the code below
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(partition_key, shard_id, table.id)
|
||||
.create_or_get(partition_key, table.id)
|
||||
.await
|
||||
.map_err(UpdateCatalogError::CatalogError)?;
|
||||
// get the sort key from the partition, if it exists. create it or update it as
|
||||
|
@ -418,88 +389,12 @@ fn datetime_to_partition_key(
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{generated_types::shard_service_server::ShardService, *};
|
||||
use super::*;
|
||||
use crate::{AggregateTSMField, AggregateTSMTag};
|
||||
use assert_matches::assert_matches;
|
||||
use client_util::connection::Builder;
|
||||
use data_types::{PartitionId, TableId};
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
use parking_lot::RwLock;
|
||||
use std::{collections::HashSet, net::SocketAddr};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::wrappers::TcpListenerStream;
|
||||
use tonic::transport::Server;
|
||||
|
||||
struct MockShardService {
|
||||
requests: Arc<RwLock<Vec<MapToShardRequest>>>,
|
||||
reply_with: MapToShardResponse,
|
||||
}
|
||||
|
||||
impl MockShardService {
|
||||
pub fn new(response: MapToShardResponse) -> Self {
|
||||
MockShardService {
|
||||
requests: Arc::new(RwLock::new(vec![])),
|
||||
reply_with: response,
|
||||
}
|
||||
}
|
||||
|
||||
/// Use to replace the next reply with the given response (not currently used but would be
|
||||
/// handy for expanded tests)
|
||||
#[allow(dead_code)]
|
||||
pub fn with_reply(mut self, response: MapToShardResponse) -> Self {
|
||||
self.reply_with = response;
|
||||
self
|
||||
}
|
||||
|
||||
/// Get all the requests that were made to the mock (not currently used but would be handy
|
||||
/// for expanded tests)
|
||||
#[allow(dead_code)]
|
||||
pub fn get_requests(&self) -> Arc<RwLock<Vec<MapToShardRequest>>> {
|
||||
Arc::clone(&self.requests)
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl ShardService for MockShardService {
|
||||
async fn map_to_shard(
|
||||
&self,
|
||||
request: tonic::Request<MapToShardRequest>,
|
||||
) -> Result<tonic::Response<MapToShardResponse>, tonic::Status> {
|
||||
self.requests.write().push(request.into_inner());
|
||||
Ok(tonic::Response::new(self.reply_with.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_test_shard_service(
|
||||
response: MapToShardResponse,
|
||||
) -> (
|
||||
Connection,
|
||||
JoinHandle<()>,
|
||||
Arc<RwLock<Vec<MapToShardRequest>>>,
|
||||
) {
|
||||
let bind_addr = SocketAddr::new(
|
||||
std::net::IpAddr::V4(std::net::Ipv4Addr::new(127, 0, 0, 1)),
|
||||
0,
|
||||
);
|
||||
let socket = tokio::net::TcpListener::bind(bind_addr)
|
||||
.await
|
||||
.expect("failed to bind to socket in test");
|
||||
let bind_addr = socket.local_addr().unwrap();
|
||||
let sharder = MockShardService::new(response);
|
||||
let requests = Arc::clone(&sharder.get_requests());
|
||||
let server =
|
||||
Server::builder().add_service(shard_service_server::ShardServiceServer::new(sharder));
|
||||
let server = async move {
|
||||
let stream = TcpListenerStream::new(socket);
|
||||
server.serve_with_incoming(stream).await.ok();
|
||||
};
|
||||
let join_handle = tokio::task::spawn(server);
|
||||
let connection = Builder::default()
|
||||
.build(format!("http://{bind_addr}"))
|
||||
.await
|
||||
.expect("failed to connect to server");
|
||||
(connection, join_handle, requests)
|
||||
}
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[tokio::test]
|
||||
async fn needs_creating() {
|
||||
|
@ -513,11 +408,6 @@ mod tests {
|
|||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.expect("topic created");
|
||||
let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse {
|
||||
shard_id: 0,
|
||||
shard_index: 0,
|
||||
})
|
||||
.await;
|
||||
|
||||
let json = r#"
|
||||
{
|
||||
|
@ -543,7 +433,6 @@ mod tests {
|
|||
"iox-shared",
|
||||
"iox-shared",
|
||||
Arc::clone(&catalog),
|
||||
connection,
|
||||
)
|
||||
.await
|
||||
.expect("schema update worked");
|
||||
|
@ -602,11 +491,6 @@ mod tests {
|
|||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.expect("topic created");
|
||||
let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse {
|
||||
shard_id: 0,
|
||||
shard_index: 0,
|
||||
})
|
||||
.await;
|
||||
|
||||
// create namespace, table and columns for weather measurement
|
||||
let namespace = txn
|
||||
|
@ -666,7 +550,6 @@ mod tests {
|
|||
"iox-shared",
|
||||
"iox-shared",
|
||||
Arc::clone(&catalog),
|
||||
connection,
|
||||
)
|
||||
.await
|
||||
.expect("schema update worked");
|
||||
|
@ -710,11 +593,6 @@ mod tests {
|
|||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.expect("topic created");
|
||||
let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse {
|
||||
shard_id: 0,
|
||||
shard_index: 0,
|
||||
})
|
||||
.await;
|
||||
|
||||
// create namespace, table and columns for weather measurement
|
||||
let namespace = txn
|
||||
|
@ -767,7 +645,6 @@ mod tests {
|
|||
"iox-shared",
|
||||
"iox-shared",
|
||||
Arc::clone(&catalog),
|
||||
connection,
|
||||
)
|
||||
.await
|
||||
.expect_err("should fail catalog update");
|
||||
|
@ -790,11 +667,6 @@ mod tests {
|
|||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.expect("topic created");
|
||||
let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse {
|
||||
shard_id: 0,
|
||||
shard_index: 0,
|
||||
})
|
||||
.await;
|
||||
|
||||
// create namespace, table and columns for weather measurement
|
||||
let namespace = txn
|
||||
|
@ -846,7 +718,6 @@ mod tests {
|
|||
"iox-shared",
|
||||
"iox-shared",
|
||||
Arc::clone(&catalog),
|
||||
connection,
|
||||
)
|
||||
.await
|
||||
.expect_err("should fail catalog update");
|
||||
|
@ -856,85 +727,6 @@ mod tests {
|
|||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn shard_lookup() {
|
||||
// init a test catalog stack
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
|
||||
catalog
|
||||
.repositories()
|
||||
.await
|
||||
.topics()
|
||||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.expect("topic created");
|
||||
let (connection, _join_handle, requests) = create_test_shard_service(MapToShardResponse {
|
||||
shard_id: 0,
|
||||
shard_index: 0,
|
||||
})
|
||||
.await;
|
||||
|
||||
let json = r#"
|
||||
{
|
||||
"org_id": "1234",
|
||||
"bucket_id": "5678",
|
||||
"measurements": {
|
||||
"cpu": {
|
||||
"tags": [
|
||||
{ "name": "host", "values": ["server", "desktop"] }
|
||||
],
|
||||
"fields": [
|
||||
{ "name": "usage", "types": ["Float"] }
|
||||
],
|
||||
"earliest_time": "2022-01-01T00:00:00.00Z",
|
||||
"latest_time": "2022-07-07T06:00:00.00Z"
|
||||
},
|
||||
"weather": {
|
||||
"tags": [
|
||||
],
|
||||
"fields": [
|
||||
{ "name": "temperature", "types": ["Integer"] }
|
||||
],
|
||||
"earliest_time": "2022-01-01T00:00:00.00Z",
|
||||
"latest_time": "2022-07-07T06:00:00.00Z"
|
||||
}
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let agg_schema: AggregateTSMSchema = json.try_into().unwrap();
|
||||
update_iox_catalog(
|
||||
&agg_schema,
|
||||
"iox-shared",
|
||||
"iox-shared",
|
||||
Arc::clone(&catalog),
|
||||
connection,
|
||||
)
|
||||
.await
|
||||
.expect("schema update worked");
|
||||
// check that a request was made for the two shard lookups for the tables
|
||||
let requests = requests.read();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let cpu_req = requests
|
||||
.iter()
|
||||
.find(|r| r.table_name == "cpu")
|
||||
.expect("cpu request missing from mock");
|
||||
assert_eq!(
|
||||
(cpu_req.namespace_name.as_str(), cpu_req.table_name.as_str()),
|
||||
("1234_5678", "cpu"),
|
||||
);
|
||||
let weather_req = requests
|
||||
.iter()
|
||||
.find(|r| r.table_name == "weather")
|
||||
.expect("weather request missing from mock");
|
||||
assert_eq!(
|
||||
(
|
||||
weather_req.namespace_name.as_str(),
|
||||
weather_req.table_name.as_str()
|
||||
),
|
||||
("1234_5678", "weather"),
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn partition_keys_from_datetime_range_midday_to_midday() {
|
||||
let earliest_time = DateTime::parse_from_rfc3339("2022-10-30T12:00:00+00:00")
|
||||
|
@ -1230,7 +1022,6 @@ mod tests {
|
|||
};
|
||||
let partition = Partition {
|
||||
id: PartitionId::new(1),
|
||||
shard_id: ShardId::new(1),
|
||||
table_id: TableId::new(1),
|
||||
persisted_sequence_number: None,
|
||||
partition_key: PartitionKey::from("2022-06-21"),
|
||||
|
@ -1278,7 +1069,6 @@ mod tests {
|
|||
};
|
||||
let partition = Partition {
|
||||
id: PartitionId::new(1),
|
||||
shard_id: ShardId::new(1),
|
||||
table_id: TableId::new(1),
|
||||
persisted_sequence_number: None,
|
||||
partition_key: PartitionKey::from("2022-06-21"),
|
||||
|
@ -1326,7 +1116,6 @@ mod tests {
|
|||
};
|
||||
let partition = Partition {
|
||||
id: PartitionId::new(1),
|
||||
shard_id: ShardId::new(1),
|
||||
table_id: TableId::new(1),
|
||||
persisted_sequence_number: None,
|
||||
partition_key: PartitionKey::from("2022-06-21"),
|
||||
|
@ -1376,7 +1165,6 @@ mod tests {
|
|||
};
|
||||
let partition = Partition {
|
||||
id: PartitionId::new(1),
|
||||
shard_id: ShardId::new(1),
|
||||
table_id: TableId::new(1),
|
||||
persisted_sequence_number: None,
|
||||
partition_key: PartitionKey::from("2022-06-21"),
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use influxdb_iox_client::connection::Connection;
|
||||
use thiserror::Error;
|
||||
|
||||
mod schema;
|
||||
|
@ -23,9 +22,9 @@ pub enum Command {
|
|||
}
|
||||
|
||||
/// Handle variants of the schema command.
|
||||
pub async fn command(connection: Connection, config: Config) -> Result<(), ImportError> {
|
||||
pub async fn command(config: Config) -> Result<(), ImportError> {
|
||||
match config.command {
|
||||
Command::Schema(schema_config) => schema::command(connection, *schema_config)
|
||||
Command::Schema(schema_config) => schema::command(*schema_config)
|
||||
.await
|
||||
.map_err(ImportError::SchemaError),
|
||||
}
|
||||
|
|
|
@ -10,7 +10,6 @@ use clap_blocks::{
|
|||
catalog_dsn::CatalogDsnConfig,
|
||||
object_store::{make_object_store, ObjectStoreConfig},
|
||||
};
|
||||
use influxdb_iox_client::connection::Connection;
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use object_store::{path::Path, DynObjectStore};
|
||||
use object_store_metrics::ObjectStoreMetrics;
|
||||
|
@ -133,7 +132,7 @@ pub struct MergeConfig {
|
|||
}
|
||||
|
||||
/// Entry-point for the schema command
|
||||
pub async fn command(connection: Connection, config: Config) -> Result<(), SchemaCommandError> {
|
||||
pub async fn command(config: Config) -> Result<(), SchemaCommandError> {
|
||||
match config {
|
||||
Config::Merge(merge_config) => {
|
||||
let time_provider = Arc::new(SystemProvider::new()) as Arc<dyn TimeProvider>;
|
||||
|
@ -198,7 +197,6 @@ pub async fn command(connection: Connection, config: Config) -> Result<(), Schem
|
|||
&merge_config.topic,
|
||||
&merge_config.query_pool_name,
|
||||
Arc::clone(&catalog),
|
||||
connection.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@ use clap_blocks::object_store::{make_object_store, ObjectStoreType};
|
|||
use clap_blocks::{catalog_dsn::CatalogDsnConfig, object_store::ObjectStoreConfig};
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, ColumnType, NamespaceId, NamespaceSchema as CatalogNamespaceSchema,
|
||||
ParquetFile as CatalogParquetFile, ParquetFileParams, PartitionId, SequenceNumber, ShardId,
|
||||
TableId, Timestamp, TRANSITION_SHARD_INDEX,
|
||||
ParquetFile as CatalogParquetFile, ParquetFileParams, PartitionId, SequenceNumber, TableId,
|
||||
Timestamp,
|
||||
};
|
||||
use futures::future::join_all;
|
||||
use influxdb_iox_client::{
|
||||
|
@ -172,7 +172,6 @@ pub async fn command(connection: Connection, config: Config) -> Result<(), Error
|
|||
let path = ParquetFilePath::new(
|
||||
parquet_file.namespace_id,
|
||||
parquet_file.table_id,
|
||||
parquet_file.shard_id,
|
||||
parquet_file.partition_id,
|
||||
parquet_file.object_store_id,
|
||||
);
|
||||
|
@ -242,11 +241,6 @@ async fn load_schema(
|
|||
let mut repos = catalog.repositories().await;
|
||||
let topic = repos.topics().create_or_get(TOPIC_NAME).await?;
|
||||
let query_pool = repos.query_pools().create_or_get(QUERY_POOL).await?;
|
||||
// ensure there's a shard for this partition so it can be used later
|
||||
let _shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, TRANSITION_SHARD_INDEX)
|
||||
.await?;
|
||||
|
||||
let namespace = match repos
|
||||
.namespaces()
|
||||
|
@ -307,27 +301,16 @@ async fn load_partition(
|
|||
remote_partition: &Partition,
|
||||
) -> Result<PartitionMapping, Error> {
|
||||
let mut repos = catalog.repositories().await;
|
||||
let topic = repos
|
||||
.topics()
|
||||
.get_by_name(TOPIC_NAME)
|
||||
.await?
|
||||
.expect("topic should have been inserted earlier");
|
||||
let shard = repos
|
||||
.shards()
|
||||
.get_by_topic_id_and_shard_index(topic.id, TRANSITION_SHARD_INDEX)
|
||||
.await?
|
||||
.expect("shard should have been inserted earlier");
|
||||
let table = schema
|
||||
.tables
|
||||
.get(table_name)
|
||||
.expect("table should have been loaded");
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(remote_partition.key.clone().into(), shard.id, table.id)
|
||||
.create_or_get(remote_partition.key.clone().into(), table.id)
|
||||
.await?;
|
||||
|
||||
Ok(PartitionMapping {
|
||||
shard_id: shard.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
remote_partition_id: remote_partition.id,
|
||||
|
@ -353,7 +336,6 @@ async fn load_parquet_files(
|
|||
None => {
|
||||
println!("creating file {uuid} in catalog");
|
||||
let params = ParquetFileParams {
|
||||
shard_id: partition_mapping.shard_id,
|
||||
namespace_id,
|
||||
table_id: partition_mapping.table_id,
|
||||
partition_id: partition_mapping.partition_id,
|
||||
|
@ -382,9 +364,8 @@ async fn load_parquet_files(
|
|||
Ok(files)
|
||||
}
|
||||
|
||||
// keeps a mapping of the locally created partition and shard to the remote partition id
|
||||
// keeps a mapping of the locally created partition to the remote partition id
|
||||
struct PartitionMapping {
|
||||
shard_id: ShardId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
remote_partition_id: i64,
|
||||
|
@ -518,7 +499,6 @@ mod tests {
|
|||
async fn load_parquet_files() {
|
||||
let metrics = Arc::new(metric::Registry::new());
|
||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
|
||||
let shard;
|
||||
let namespace;
|
||||
let table;
|
||||
let partition;
|
||||
|
@ -527,11 +507,6 @@ mod tests {
|
|||
let mut repos = catalog.repositories().await;
|
||||
let topic = repos.topics().create_or_get(TOPIC_NAME).await.unwrap();
|
||||
let query_pool = repos.query_pools().create_or_get(QUERY_POOL).await.unwrap();
|
||||
shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, TRANSITION_SHARD_INDEX)
|
||||
.await
|
||||
.unwrap();
|
||||
namespace = repos
|
||||
.namespaces()
|
||||
.create("load_parquet_files", None, topic.id, query_pool.id)
|
||||
|
@ -544,13 +519,12 @@ mod tests {
|
|||
.unwrap();
|
||||
partition = repos
|
||||
.partitions()
|
||||
.create_or_get("1970-01-01".into(), shard.id, table.id)
|
||||
.create_or_get("1970-01-01".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let partition_mapping = PartitionMapping {
|
||||
shard_id: shard.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
remote_partition_id: 4,
|
||||
|
@ -589,12 +563,11 @@ mod tests {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
// the inserted parquet file should have shard, namespace, table, and partition ids
|
||||
// the inserted parquet file should have namespace, table, and partition ids
|
||||
// that match with the ones in the catalog, not the remote. The other values should
|
||||
// match those of the remote.
|
||||
let expected = vec![CatalogParquetFile {
|
||||
id: ParquetFileId::new(1),
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
|
|
|
@ -375,8 +375,7 @@ fn main() -> Result<(), std::io::Error> {
|
|||
}
|
||||
Some(Command::Import(config)) => {
|
||||
let _tracing_guard = handle_init_logs(init_simple_logs(log_verbose_count));
|
||||
let connection = connection(grpc_host).await;
|
||||
if let Err(e) = commands::import::command(connection, config).await {
|
||||
if let Err(e) = commands::import::command(config).await {
|
||||
eprintln!("{e}");
|
||||
std::process::exit(ReturnCode::Failure as _)
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ pub(crate) mod name_resolver;
|
|||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, ShardId, TableId};
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use dml::DmlOperation;
|
||||
use metric::U64Counter;
|
||||
use observability_deps::tracing::warn;
|
||||
|
@ -52,7 +52,7 @@ impl std::fmt::Display for NamespaceName {
|
|||
}
|
||||
}
|
||||
|
||||
/// Data of a Namespace that belongs to a given Shard
|
||||
/// Data of a Namespace
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NamespaceData<O> {
|
||||
namespace_id: NamespaceId,
|
||||
|
@ -77,8 +77,6 @@ pub(crate) struct NamespaceData<O> {
|
|||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
|
||||
post_write_observer: Arc<O>,
|
||||
|
||||
transition_shard_id: ShardId,
|
||||
}
|
||||
|
||||
impl<O> NamespaceData<O> {
|
||||
|
@ -90,7 +88,6 @@ impl<O> NamespaceData<O> {
|
|||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
post_write_observer: Arc<O>,
|
||||
metrics: &metric::Registry,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Self {
|
||||
let table_count = metrics
|
||||
.register_metric::<U64Counter>(
|
||||
|
@ -107,7 +104,6 @@ impl<O> NamespaceData<O> {
|
|||
table_count,
|
||||
partition_provider,
|
||||
post_write_observer,
|
||||
transition_shard_id,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -144,10 +140,7 @@ where
|
|||
type Error = mutable_batch::Error;
|
||||
|
||||
async fn apply(&self, op: DmlOperation) -> Result<(), Self::Error> {
|
||||
let sequence_number = op
|
||||
.meta()
|
||||
.sequence()
|
||||
.expect("applying unsequenced op");
|
||||
let sequence_number = op.meta().sequence().expect("applying unsequenced op");
|
||||
|
||||
match op {
|
||||
DmlOperation::Write(write) => {
|
||||
|
@ -166,7 +159,6 @@ where
|
|||
Arc::clone(&self.namespace_name),
|
||||
Arc::clone(&self.partition_provider),
|
||||
Arc::clone(&self.post_write_observer),
|
||||
self.transition_shard_id,
|
||||
))
|
||||
});
|
||||
|
||||
|
@ -230,7 +222,6 @@ where
|
|||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::TRANSITION_SHARD_ID;
|
||||
use metric::{Attributes, Metric};
|
||||
|
||||
use super::*;
|
||||
|
@ -264,7 +255,6 @@ mod tests {
|
|||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
&metrics,
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Assert the namespace name was stored
|
||||
|
|
|
@ -102,13 +102,11 @@ pub(crate) mod mock {
|
|||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::ShardIndex;
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::populate_catalog;
|
||||
|
||||
const SHARD_INDEX: ShardIndex = ShardIndex::new(24);
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "platanos";
|
||||
|
||||
|
@ -119,9 +117,8 @@ mod tests {
|
|||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (_shard_id, ns_id, _table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
// Populate the catalog with the namespace / table
|
||||
let (ns_id, _table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
let fetcher = Arc::new(NamespaceNameResolver::new(
|
||||
Duration::from_secs(10),
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::{collections::VecDeque, sync::Arc};
|
|||
|
||||
use data_types::{
|
||||
sequence_number_set::SequenceNumberSet, NamespaceId, PartitionId, PartitionKey, SequenceNumber,
|
||||
ShardId, TableId,
|
||||
TableId,
|
||||
};
|
||||
use mutable_batch::MutableBatch;
|
||||
use observability_deps::tracing::*;
|
||||
|
@ -41,8 +41,7 @@ impl SortKeyState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Data of an IOx Partition of a given Table of a Namespace that belongs to a
|
||||
/// given Shard
|
||||
/// Data of an IOx Partition of a given Table of a Namespace
|
||||
#[derive(Debug)]
|
||||
pub struct PartitionData {
|
||||
/// The catalog ID of the partition this buffer is for.
|
||||
|
@ -92,8 +91,6 @@ pub struct PartitionData {
|
|||
/// The number of persist operations completed over the lifetime of this
|
||||
/// [`PartitionData`].
|
||||
completed_persistence_count: u64,
|
||||
|
||||
transition_shard_id: ShardId,
|
||||
}
|
||||
|
||||
impl PartitionData {
|
||||
|
@ -107,7 +104,6 @@ impl PartitionData {
|
|||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
sort_key: SortKeyState,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Self {
|
||||
Self {
|
||||
partition_id: id,
|
||||
|
@ -121,7 +117,6 @@ impl PartitionData {
|
|||
persisting: VecDeque::with_capacity(1),
|
||||
started_persistence_count: BatchIdent::default(),
|
||||
completed_persistence_count: 0,
|
||||
transition_shard_id,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -305,11 +300,6 @@ impl PartitionData {
|
|||
&self.partition_key
|
||||
}
|
||||
|
||||
/// Return the transition_shard_id for this partition.
|
||||
pub(crate) fn transition_shard_id(&self) -> ShardId {
|
||||
self.transition_shard_id
|
||||
}
|
||||
|
||||
/// Return the [`NamespaceId`] this partition is a part of.
|
||||
pub(crate) fn namespace_id(&self) -> NamespaceId {
|
||||
self.namespace_id
|
||||
|
@ -347,7 +337,6 @@ mod tests {
|
|||
use arrow_util::assert_batches_eq;
|
||||
use assert_matches::assert_matches;
|
||||
use backoff::BackoffConfig;
|
||||
use data_types::ShardIndex;
|
||||
use datafusion::{
|
||||
physical_expr::PhysicalSortExpr,
|
||||
physical_plan::{expressions::col, memory::MemoryExec, ExecutionPlan},
|
||||
|
@ -944,15 +933,14 @@ mod tests {
|
|||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, _ns_id, table_id) =
|
||||
populate_catalog(&*catalog, ShardIndex::new(1), "bananas", "platanos").await;
|
||||
// Populate the catalog with the namespace / table
|
||||
let (_ns_id, table_id) = populate_catalog(&*catalog, "bananas", "platanos").await;
|
||||
|
||||
let partition_id = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get("test".into(), shard_id, table_id)
|
||||
.create_or_get("test".into(), table_id)
|
||||
.await
|
||||
.expect("should create")
|
||||
.id;
|
||||
|
|
|
@ -2,9 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
|
|||
|
||||
use async_trait::async_trait;
|
||||
use backoff::BackoffConfig;
|
||||
use data_types::{
|
||||
NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId,
|
||||
};
|
||||
use data_types::{NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, TableId};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
|
@ -166,7 +164,6 @@ where
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
// Use the cached PartitionKey instead of the caller's partition_key,
|
||||
// instead preferring to reuse the already-shared Arc<str> in the cache.
|
||||
|
@ -196,7 +193,6 @@ where
|
|||
table_id,
|
||||
table_name,
|
||||
SortKeyState::Deferred(Arc::new(sort_key_resolver)),
|
||||
transition_shard_id,
|
||||
)));
|
||||
}
|
||||
|
||||
|
@ -210,7 +206,6 @@ where
|
|||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
transition_shard_id,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
@ -221,7 +216,6 @@ mod tests {
|
|||
// Harmless in tests - saves a bunch of extra vars.
|
||||
#![allow(clippy::await_holding_lock)]
|
||||
|
||||
use data_types::{ShardId, TRANSITION_SHARD_ID};
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
|
||||
use super::*;
|
||||
|
@ -264,7 +258,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -302,7 +295,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -354,7 +346,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -385,7 +376,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
other_table,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ use std::sync::Arc;
|
|||
|
||||
use async_trait::async_trait;
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::{NamespaceId, Partition, PartitionKey, ShardId, TableId};
|
||||
use data_types::{NamespaceId, Partition, PartitionKey, TableId};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
|
@ -43,13 +43,12 @@ impl CatalogPartitionResolver {
|
|||
&self,
|
||||
partition_key: PartitionKey,
|
||||
table_id: TableId,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Result<Partition, iox_catalog::interface::Error> {
|
||||
self.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(partition_key, transition_shard_id, table_id)
|
||||
.create_or_get(partition_key, table_id)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
@ -63,18 +62,16 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
debug!(
|
||||
%partition_key,
|
||||
%table_id,
|
||||
%table_name,
|
||||
%transition_shard_id,
|
||||
"upserting partition in catalog"
|
||||
);
|
||||
let p = Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("resolve partition", || {
|
||||
self.get(partition_key.clone(), table_id, transition_shard_id)
|
||||
self.get(partition_key.clone(), table_id)
|
||||
})
|
||||
.await
|
||||
.expect("retry forever");
|
||||
|
@ -90,7 +87,6 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
table_id,
|
||||
table_name,
|
||||
SortKeyState::Provided(p.sort_key()),
|
||||
transition_shard_id,
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
@ -103,7 +99,6 @@ mod tests {
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::ShardIndex;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -117,7 +112,7 @@ mod tests {
|
|||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
let (shard_id, namespace_id, table_id) = {
|
||||
let (namespace_id, table_id) = {
|
||||
let mut repos = catalog.repositories().await;
|
||||
let t = repos.topics().create_or_get("platanos").await.unwrap();
|
||||
let q = repos.query_pools().create_or_get("platanos").await.unwrap();
|
||||
|
@ -127,19 +122,13 @@ mod tests {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&t, ShardIndex::new(0))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = repos
|
||||
.tables()
|
||||
.create_or_get(TABLE_NAME, ns.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
(shard.id, ns.id, table.id)
|
||||
(ns.id, table.id)
|
||||
};
|
||||
|
||||
let callers_partition_key = PartitionKey::from(PARTITION_KEY);
|
||||
|
@ -156,7 +145,6 @@ mod tests {
|
|||
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
|
||||
TableName::from(TABLE_NAME)
|
||||
})),
|
||||
shard_id,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ use std::{
|
|||
|
||||
use arrow::compute::kernels::partition;
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, ShardId, TableId};
|
||||
use data_types::{NamespaceId, PartitionKey, TableId};
|
||||
use futures::{future::Shared, FutureExt};
|
||||
use hashbrown::{hash_map::Entry, HashMap};
|
||||
use parking_lot::Mutex;
|
||||
|
@ -147,7 +147,6 @@ where
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let key = Key {
|
||||
namespace_id,
|
||||
|
@ -172,7 +171,6 @@ where
|
|||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
transition_shard_id,
|
||||
));
|
||||
|
||||
// Make the future poll-able by many callers, all of which
|
||||
|
@ -236,7 +234,6 @@ async fn do_fetch<T>(
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>>
|
||||
where
|
||||
T: PartitionProvider + 'static,
|
||||
|
@ -257,7 +254,6 @@ where
|
|||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
transition_shard_id,
|
||||
)
|
||||
.await
|
||||
})
|
||||
|
@ -275,7 +271,7 @@ mod tests {
|
|||
};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{PartitionId, TRANSITION_SHARD_ID};
|
||||
use data_types::PartitionId;
|
||||
use futures::Future;
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
|
@ -314,7 +310,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>()
|
||||
|
@ -349,7 +344,6 @@ mod tests {
|
|||
_namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
_table_id: TableId,
|
||||
_table_name: Arc<DeferredLoad<TableName>>,
|
||||
_transition_shard_id: ShardId,
|
||||
) -> core::pin::Pin<
|
||||
Box<
|
||||
dyn core::future::Future<Output = Arc<Mutex<PartitionData>>>
|
||||
|
@ -390,7 +384,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
let pa_2 = layer.get_partition(
|
||||
ARBITRARY_PARTITION_KEY.clone(),
|
||||
|
@ -398,7 +391,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
|
@ -419,7 +411,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.with_timeout_panic(Duration::from_secs(5))
|
||||
.await;
|
||||
|
@ -450,7 +441,6 @@ mod tests {
|
|||
_namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
_table_id: TableId,
|
||||
_table_name: Arc<DeferredLoad<TableName>>,
|
||||
_transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let waker = self.wait.notified();
|
||||
let permit = self.sem.acquire().await.unwrap();
|
||||
|
@ -491,7 +481,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, ShardId, TableId};
|
||||
use data_types::{NamespaceId, PartitionKey, TableId};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use super::r#trait::PartitionProvider;
|
||||
|
@ -54,7 +54,6 @@ impl PartitionProvider for MockPartitionProvider {
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
_transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let p = self
|
||||
.partitions
|
||||
|
|
|
@ -59,12 +59,9 @@ impl SortKeyResolver {
|
|||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::ShardIndex;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::populate_catalog;
|
||||
|
||||
const SHARD_INDEX: ShardIndex = ShardIndex::new(24);
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "platanos";
|
||||
const PARTITION_KEY: &str = "platanos";
|
||||
|
@ -76,15 +73,14 @@ mod tests {
|
|||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (shard_id, _ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
// Populate the catalog with the namespace / table
|
||||
let (_ns_id, table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
let partition_id = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(PARTITION_KEY.into(), shard_id, table_id)
|
||||
.create_or_get(PARTITION_KEY.into(), table_id)
|
||||
.await
|
||||
.expect("should create")
|
||||
.id;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::{fmt::Debug, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, ShardId, TableId};
|
||||
use data_types::{NamespaceId, PartitionKey, TableId};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use crate::{
|
||||
|
@ -25,7 +25,6 @@ pub(crate) trait PartitionProvider: Send + Sync + Debug {
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>>;
|
||||
}
|
||||
|
||||
|
@ -41,7 +40,6 @@ where
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
(**self)
|
||||
.get_partition(
|
||||
|
@ -50,7 +48,6 @@ where
|
|||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
transition_shard_id,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
@ -60,8 +57,6 @@ where
|
|||
mod tests {
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use data_types::{PartitionId, ShardId, TRANSITION_SHARD_ID};
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
|
||||
|
@ -85,7 +80,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::{fmt::Debug, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, ShardId, TableId};
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use dml::DmlOperation;
|
||||
use metric::U64Counter;
|
||||
use parking_lot::Mutex;
|
||||
|
@ -103,7 +103,6 @@ pub(crate) struct BufferTree<O> {
|
|||
namespace_count: U64Counter,
|
||||
|
||||
post_write_observer: Arc<O>,
|
||||
transition_shard_id: ShardId,
|
||||
}
|
||||
|
||||
impl<O> BufferTree<O>
|
||||
|
@ -117,7 +116,6 @@ where
|
|||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
post_write_observer: Arc<O>,
|
||||
metrics: Arc<metric::Registry>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Self {
|
||||
let namespace_count = metrics
|
||||
.register_metric::<U64Counter>(
|
||||
|
@ -134,7 +132,6 @@ where
|
|||
partition_provider,
|
||||
post_write_observer,
|
||||
namespace_count,
|
||||
transition_shard_id,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,7 +182,6 @@ where
|
|||
Arc::clone(&self.partition_provider),
|
||||
Arc::clone(&self.post_write_observer),
|
||||
&self.metrics,
|
||||
self.transition_shard_id,
|
||||
))
|
||||
});
|
||||
|
||||
|
@ -234,7 +230,7 @@ mod tests {
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{PartitionId, PartitionKey, TRANSITION_SHARD_ID};
|
||||
use data_types::{PartitionId, PartitionKey};
|
||||
use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use metric::{Attributes, Metric};
|
||||
|
@ -274,7 +270,6 @@ mod tests {
|
|||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
&metrics,
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Assert the namespace name was stored
|
||||
|
@ -351,7 +346,6 @@ mod tests {
|
|||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Write the provided DmlWrites
|
||||
|
@ -628,7 +622,6 @@ mod tests {
|
|||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::clone(&metrics),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Write data to partition p1, in the arbitrary table
|
||||
|
@ -725,7 +718,6 @@ mod tests {
|
|||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::clone(&Arc::new(metric::Registry::default())),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
assert_eq!(buf.partitions().count(), 0);
|
||||
|
@ -808,7 +800,6 @@ mod tests {
|
|||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Query the empty tree
|
||||
|
@ -894,7 +885,6 @@ mod tests {
|
|||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Write data to partition p1, in the arbitrary table
|
||||
|
|
|
@ -5,7 +5,7 @@ pub(crate) mod name_resolver;
|
|||
use std::{fmt::Debug, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, SequenceNumber, ShardId, TableId};
|
||||
use data_types::{NamespaceId, PartitionKey, SequenceNumber, TableId};
|
||||
use datafusion_util::MemoryStream;
|
||||
use mutable_batch::MutableBatch;
|
||||
use parking_lot::Mutex;
|
||||
|
@ -66,7 +66,7 @@ impl PartialEq<str> for TableName {
|
|||
}
|
||||
}
|
||||
|
||||
/// Data of a Table in a given Namesapce that belongs to a given Shard
|
||||
/// Data of a Table in a given Namesapce
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct TableData<O> {
|
||||
table_id: TableId,
|
||||
|
@ -84,7 +84,6 @@ pub(crate) struct TableData<O> {
|
|||
partition_data: ArcMap<PartitionKey, Mutex<PartitionData>>,
|
||||
|
||||
post_write_observer: Arc<O>,
|
||||
transition_shard_id: ShardId,
|
||||
}
|
||||
|
||||
impl<O> TableData<O> {
|
||||
|
@ -100,7 +99,6 @@ impl<O> TableData<O> {
|
|||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
post_write_observer: Arc<O>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_id,
|
||||
|
@ -110,7 +108,6 @@ impl<O> TableData<O> {
|
|||
partition_data: Default::default(),
|
||||
partition_provider,
|
||||
post_write_observer,
|
||||
transition_shard_id,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -171,7 +168,6 @@ where
|
|||
Arc::clone(&self.namespace_name),
|
||||
self.table_id,
|
||||
Arc::clone(&self.table_name),
|
||||
self.transition_shard_id,
|
||||
)
|
||||
.await;
|
||||
// Add the partition to the map.
|
||||
|
@ -262,7 +258,6 @@ where
|
|||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::TRANSITION_SHARD_ID;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
|
||||
use super::*;
|
||||
|
@ -292,7 +287,6 @@ mod tests {
|
|||
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
let batch = lines_to_batches(
|
||||
|
|
|
@ -103,13 +103,11 @@ pub(crate) mod mock {
|
|||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::ShardIndex;
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::populate_catalog;
|
||||
|
||||
const SHARD_INDEX: ShardIndex = ShardIndex::new(24);
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "platanos";
|
||||
|
||||
|
@ -120,9 +118,8 @@ mod tests {
|
|||
let catalog: Arc<dyn Catalog> =
|
||||
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Populate the catalog with the shard / namespace / table
|
||||
let (_shard_id, _ns_id, table_id) =
|
||||
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
// Populate the catalog with the namespace / table
|
||||
let (_ns_id, table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
let fetcher = Arc::new(TableNameResolver::new(
|
||||
Duration::from_secs(10),
|
||||
|
|
|
@ -46,7 +46,6 @@ use crate::{
|
|||
server::grpc::GrpcDelegate,
|
||||
timestamp_oracle::TimestampOracle,
|
||||
wal::{rotate_task::periodic_rotation, wal_sink::WalSink},
|
||||
TRANSITION_SHARD_INDEX,
|
||||
};
|
||||
|
||||
use self::graceful_shutdown::graceful_shutdown_handler;
|
||||
|
@ -235,23 +234,6 @@ pub async fn new<F>(
|
|||
where
|
||||
F: Future<Output = CancellationToken> + Send + 'static,
|
||||
{
|
||||
// Create the transition shard.
|
||||
let mut txn = catalog
|
||||
.start_transaction()
|
||||
.await
|
||||
.expect("start transaction");
|
||||
let topic = txn
|
||||
.topics()
|
||||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.expect("get topic");
|
||||
let transition_shard = txn
|
||||
.shards()
|
||||
.create_or_get(&topic, TRANSITION_SHARD_INDEX)
|
||||
.await
|
||||
.expect("create transition shard");
|
||||
txn.commit().await.expect("commit transition shard");
|
||||
|
||||
// Initialise a random ID for this ingester instance.
|
||||
let ingester_id = IngesterId::new();
|
||||
|
||||
|
@ -336,7 +318,6 @@ where
|
|||
partition_provider,
|
||||
Arc::new(hot_partition_persister),
|
||||
Arc::clone(&metrics),
|
||||
transition_shard.id,
|
||||
));
|
||||
|
||||
// Initialise the WAL
|
||||
|
|
|
@ -199,8 +199,6 @@
|
|||
missing_docs
|
||||
)]
|
||||
|
||||
use data_types::TRANSITION_SHARD_INDEX;
|
||||
|
||||
/// A macro to conditionally prepend `pub` to the inner tokens for benchmarking
|
||||
/// purposes, should the `benches` feature be enabled.
|
||||
///
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use data_types::{NamespaceId, PartitionId, PartitionKey, ShardId, TableId};
|
||||
use data_types::{NamespaceId, PartitionId, PartitionKey, TableId};
|
||||
use observability_deps::tracing::*;
|
||||
use parking_lot::Mutex;
|
||||
use schema::sort::SortKey;
|
||||
|
@ -87,8 +87,6 @@ pub(super) struct Context {
|
|||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
|
||||
transition_shard_id: ShardId,
|
||||
|
||||
// The partition key for this partition
|
||||
partition_key: PartitionKey,
|
||||
|
||||
|
@ -173,7 +171,6 @@ impl Context {
|
|||
enqueued_at,
|
||||
dequeued_at: Instant::now(),
|
||||
permit,
|
||||
transition_shard_id: guard.transition_shard_id(),
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -306,8 +303,4 @@ impl Context {
|
|||
pub(super) fn table_name(&self) -> &DeferredLoad<TableName> {
|
||||
self.table_name.as_ref()
|
||||
}
|
||||
|
||||
pub(super) fn transition_shard_id(&self) -> ShardId {
|
||||
self.transition_shard_id
|
||||
}
|
||||
}
|
||||
|
|
|
@ -475,7 +475,6 @@ mod tests {
|
|||
use std::{sync::Arc, task::Poll, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::TRANSITION_SHARD_ID;
|
||||
use dml::DmlOperation;
|
||||
use futures::Future;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
|
@ -526,7 +525,6 @@ mod tests {
|
|||
),
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Default::default(),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
buffer_tree
|
||||
|
|
|
@ -15,7 +15,7 @@ mod tests {
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{CompactionLevel, ParquetFile, SequenceNumber, TRANSITION_SHARD_ID};
|
||||
use data_types::{CompactionLevel, ParquetFile, SequenceNumber};
|
||||
use dml::DmlOperation;
|
||||
use futures::TryStreamExt;
|
||||
use iox_catalog::{
|
||||
|
@ -48,7 +48,6 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_NAME_PROVIDER, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_NAME_PROVIDER,
|
||||
},
|
||||
TRANSITION_SHARD_INDEX,
|
||||
};
|
||||
|
||||
use super::handle::PersistHandle;
|
||||
|
@ -62,13 +61,8 @@ mod tests {
|
|||
/// partition entry exists (by driving the buffer tree to create it).
|
||||
async fn partition_with_write(catalog: Arc<dyn Catalog>) -> Arc<Mutex<PartitionData>> {
|
||||
// Create the namespace in the catalog and it's the schema
|
||||
let (_shard_id, namespace_id, table_id) = populate_catalog(
|
||||
&*catalog,
|
||||
TRANSITION_SHARD_INDEX,
|
||||
&ARBITRARY_NAMESPACE_NAME,
|
||||
&ARBITRARY_TABLE_NAME,
|
||||
)
|
||||
.await;
|
||||
let (namespace_id, table_id) =
|
||||
populate_catalog(&*catalog, &ARBITRARY_NAMESPACE_NAME, &ARBITRARY_TABLE_NAME).await;
|
||||
|
||||
// Init the buffer tree
|
||||
let buf = BufferTree::new(
|
||||
|
@ -77,7 +71,6 @@ mod tests {
|
|||
Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))),
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
let write = make_write_op(
|
||||
|
@ -448,13 +441,7 @@ mod tests {
|
|||
assert_eq!(files.len(), 2, "expected two uploaded files");
|
||||
|
||||
// Ensure the catalog record points at a valid file in object storage.
|
||||
let want_path = ParquetFilePath::new(
|
||||
namespace_id,
|
||||
table_id,
|
||||
TRANSITION_SHARD_ID,
|
||||
partition_id,
|
||||
object_store_id,
|
||||
)
|
||||
let want_path = ParquetFilePath::new(namespace_id, table_id, partition_id, object_store_id)
|
||||
.object_store_path();
|
||||
let file = files
|
||||
.into_iter()
|
||||
|
|
|
@ -257,7 +257,6 @@ where
|
|||
let iox_metadata = IoxMetadata {
|
||||
object_store_id,
|
||||
creation_timestamp: time_now,
|
||||
shard_id: ctx.transition_shard_id(),
|
||||
namespace_id: ctx.namespace_id(),
|
||||
namespace_name: Arc::clone(&*ctx.namespace_name().get().await),
|
||||
table_id: ctx.table_id(),
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
use std::{collections::BTreeMap, sync::Arc, time::Duration};
|
||||
|
||||
use data_types::{
|
||||
NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, ShardId, ShardIndex,
|
||||
TableId, TRANSITION_SHARD_ID,
|
||||
};
|
||||
use data_types::{NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, TableId};
|
||||
use dml::{DmlMeta, DmlWrite};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use lazy_static::lazy_static;
|
||||
|
@ -117,7 +114,6 @@ impl PartitionDataBuilder {
|
|||
self.table_name
|
||||
.unwrap_or_else(|| Arc::clone(&*DEFER_TABLE_NAME_1_SEC)),
|
||||
self.sort_key.unwrap_or(SortKeyState::Provided(None)),
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -127,7 +123,6 @@ impl PartitionDataBuilder {
|
|||
pub(crate) fn arbitrary_partition() -> Partition {
|
||||
Partition {
|
||||
id: ARBITRARY_PARTITION_ID,
|
||||
shard_id: TRANSITION_SHARD_ID,
|
||||
table_id: ARBITRARY_TABLE_ID,
|
||||
partition_key: ARBITRARY_PARTITION_KEY.clone(),
|
||||
sort_key: Default::default(),
|
||||
|
@ -285,10 +280,9 @@ pub(crate) fn make_write_op(
|
|||
|
||||
pub(crate) async fn populate_catalog(
|
||||
catalog: &dyn Catalog,
|
||||
shard_index: ShardIndex,
|
||||
namespace: &str,
|
||||
table: &str,
|
||||
) -> (ShardId, NamespaceId, TableId) {
|
||||
) -> (NamespaceId, TableId) {
|
||||
let mut c = catalog.repositories().await;
|
||||
let topic = c.topics().create_or_get("kafka-topic").await.unwrap();
|
||||
let query_pool = c.query_pools().create_or_get("query-pool").await.unwrap();
|
||||
|
@ -299,14 +293,8 @@ pub(crate) async fn populate_catalog(
|
|||
.unwrap()
|
||||
.id;
|
||||
let table_id = c.tables().create_or_get(table, ns_id).await.unwrap().id;
|
||||
let shard_id = c
|
||||
.shards()
|
||||
.create_or_get(&topic, shard_index)
|
||||
.await
|
||||
.unwrap()
|
||||
.id;
|
||||
|
||||
(shard_id, ns_id, table_id)
|
||||
(ns_id, table_id)
|
||||
}
|
||||
|
||||
/// Assert `a` and `b` have identical metadata, and that when converting
|
||||
|
|
|
@ -4,8 +4,7 @@ use async_trait::async_trait;
|
|||
use data_types::{
|
||||
Column, ColumnSchema, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceSchema,
|
||||
ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool,
|
||||
QueryPoolId, SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId,
|
||||
TableSchema, Timestamp, TopicId, TopicMetadata,
|
||||
QueryPoolId, SkippedCompaction, Table, TableId, TableSchema, Timestamp, TopicId, TopicMetadata,
|
||||
};
|
||||
use iox_time::TimeProvider;
|
||||
use snafu::{OptionExt, Snafu};
|
||||
|
@ -301,9 +300,6 @@ pub trait RepoCollection: Send + Sync + Debug {
|
|||
/// Repository for [columns](data_types::Column).
|
||||
fn columns(&mut self) -> &mut dyn ColumnRepo;
|
||||
|
||||
/// Repository for [shards](data_types::Shard).
|
||||
fn shards(&mut self) -> &mut dyn ShardRepo;
|
||||
|
||||
/// Repository for [partitions](data_types::Partition).
|
||||
fn partitions(&mut self) -> &mut dyn PartitionRepo;
|
||||
|
||||
|
@ -437,48 +433,12 @@ pub trait ColumnRepo: Send + Sync {
|
|||
async fn list(&mut self) -> Result<Vec<Column>>;
|
||||
}
|
||||
|
||||
/// Functions for working with shards in the catalog
|
||||
#[async_trait]
|
||||
pub trait ShardRepo: Send + Sync {
|
||||
/// create a shard record for the topic and shard index or return the existing record
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
topic: &TopicMetadata,
|
||||
shard_index: ShardIndex,
|
||||
) -> Result<Shard>;
|
||||
|
||||
/// get the shard record by `TopicId` and `ShardIndex`
|
||||
async fn get_by_topic_id_and_shard_index(
|
||||
&mut self,
|
||||
topic_id: TopicId,
|
||||
shard_index: ShardIndex,
|
||||
) -> Result<Option<Shard>>;
|
||||
|
||||
/// list all shards
|
||||
async fn list(&mut self) -> Result<Vec<Shard>>;
|
||||
|
||||
/// list all shards for a given topic
|
||||
async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result<Vec<Shard>>;
|
||||
|
||||
/// updates the `min_unpersisted_sequence_number` for a shard
|
||||
async fn update_min_unpersisted_sequence_number(
|
||||
&mut self,
|
||||
shard: ShardId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Functions for working with IOx partitions in the catalog. Note that these are how IOx splits up
|
||||
/// data within a namespace, which is different than Kafka partitions.
|
||||
#[async_trait]
|
||||
pub trait PartitionRepo: Send + Sync {
|
||||
/// create or get a partition record for the given partition key, shard and table
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
key: PartitionKey,
|
||||
shard_id: ShardId,
|
||||
table_id: TableId,
|
||||
) -> Result<Partition>;
|
||||
async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition>;
|
||||
|
||||
/// get partition by ID
|
||||
async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
|
||||
|
@ -580,8 +540,8 @@ pub trait ParquetFileRepo: Send + Sync {
|
|||
///
|
||||
/// Returns the deleted IDs only.
|
||||
///
|
||||
/// This deletion is limited to a certain (backend-specific) number of files to avoid overlarge changes. The caller
|
||||
/// MAY call this method again if the result was NOT empty.
|
||||
/// This deletion is limited to a certain (backend-specific) number of files to avoid overlarge
|
||||
/// changes. The caller MAY call this method again if the result was NOT empty.
|
||||
async fn delete_old_ids_only(&mut self, older_than: Timestamp) -> Result<Vec<ParquetFileId>>;
|
||||
|
||||
/// List parquet files for a given partition that are NOT marked as
|
||||
|
@ -827,17 +787,12 @@ pub async fn list_schemas(
|
|||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_helpers {
|
||||
use crate::{
|
||||
validate_or_insert_schema, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
|
||||
SHARED_TOPIC_ID,
|
||||
};
|
||||
use crate::{validate_or_insert_schema, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES};
|
||||
|
||||
use super::*;
|
||||
use ::test_helpers::{assert_contains, tracing::TracingCapture};
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, CompactionLevel, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX,
|
||||
};
|
||||
use data_types::{ColumnId, ColumnSet, CompactionLevel, SequenceNumber};
|
||||
use futures::Future;
|
||||
use metric::{Attributes, DurationHistogram, Metric};
|
||||
use std::{collections::BTreeSet, ops::DerefMut, sync::Arc, time::Duration};
|
||||
|
@ -891,23 +846,6 @@ pub(crate) mod test_helpers {
|
|||
async fn test_setup(catalog: Arc<dyn Catalog>) {
|
||||
catalog.setup().await.expect("first catalog setup");
|
||||
catalog.setup().await.expect("second catalog setup");
|
||||
|
||||
let transition_shard = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.get_by_topic_id_and_shard_index(SHARED_TOPIC_ID, TRANSITION_SHARD_INDEX)
|
||||
.await
|
||||
.expect("transition shard");
|
||||
|
||||
assert_matches!(
|
||||
transition_shard,
|
||||
Some(Shard {
|
||||
id,
|
||||
shard_index,
|
||||
..
|
||||
}) if id == TRANSITION_SHARD_ID && shard_index == TRANSITION_SHARD_INDEX
|
||||
);
|
||||
}
|
||||
|
||||
async fn test_topic(catalog: Arc<dyn Catalog>) {
|
||||
|
@ -1560,29 +1498,19 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("test_table", namespace.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1))
|
||||
.await
|
||||
.unwrap();
|
||||
let other_shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(2))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut created = BTreeMap::new();
|
||||
for key in ["foo", "bar"] {
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shard.id, table.id)
|
||||
.create_or_get(key.into(), table.id)
|
||||
.await
|
||||
.expect("failed to create partition");
|
||||
created.insert(partition.id, partition);
|
||||
}
|
||||
let other_partition = repos
|
||||
.partitions()
|
||||
.create_or_get("asdf".into(), other_shard.id, table.id)
|
||||
.create_or_get("asdf".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -1859,24 +1787,18 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("other", namespace.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1))
|
||||
.await
|
||||
.unwrap();
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get("one".into(), shard.id, table.id)
|
||||
.create_or_get("one".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let other_partition = repos
|
||||
.partitions()
|
||||
.create_or_get("one".into(), shard.id, other_table.id)
|
||||
.create_or_get("one".into(), other_table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: partition.table_id,
|
||||
partition_id: partition.id,
|
||||
|
@ -2049,7 +1971,7 @@ pub(crate) mod test_helpers {
|
|||
.unwrap();
|
||||
let partition2 = repos
|
||||
.partitions()
|
||||
.create_or_get("foo".into(), shard.id, table2.id)
|
||||
.create_or_get("foo".into(), table2.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let files = repos
|
||||
|
@ -2285,24 +2207,18 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("test_table", namespace_2.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1))
|
||||
.await
|
||||
.unwrap();
|
||||
let partition_1 = repos
|
||||
.partitions()
|
||||
.create_or_get("one".into(), shard.id, table_1.id)
|
||||
.create_or_get("one".into(), table_1.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let partition_2 = repos
|
||||
.partitions()
|
||||
.create_or_get("one".into(), shard.id, table_2.id)
|
||||
.create_or_get("one".into(), table_2.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let parquet_file_params_1 = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace_1.id,
|
||||
table_id: table_1.id,
|
||||
partition_id: partition_1.id,
|
||||
|
@ -2318,7 +2234,6 @@ pub(crate) mod test_helpers {
|
|||
max_l0_created_at: Timestamp::new(1),
|
||||
};
|
||||
let parquet_file_params_2 = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace_2.id,
|
||||
table_id: table_2.id,
|
||||
partition_id: partition_2.id,
|
||||
|
@ -2374,11 +2289,6 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("test_table_for_new_file_between", namespace.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(101))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// param for the tests
|
||||
let time_now = Timestamp::from(catalog.time_provider().now());
|
||||
|
@ -2401,7 +2311,7 @@ pub(crate) mod test_helpers {
|
|||
// The DB has 1 partition but it does not have any file
|
||||
let partition1 = repos
|
||||
.partitions()
|
||||
.create_or_get("one".into(), shard.id, table.id)
|
||||
.create_or_get("one".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let partitions = repos
|
||||
|
@ -2413,7 +2323,6 @@ pub(crate) mod test_helpers {
|
|||
|
||||
// create files for partition one
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: partition1.table_id,
|
||||
partition_id: partition1.id,
|
||||
|
@ -2504,7 +2413,7 @@ pub(crate) mod test_helpers {
|
|||
// Partition two without any file
|
||||
let partition2 = repos
|
||||
.partitions()
|
||||
.create_or_get("two".into(), shard.id, table.id)
|
||||
.create_or_get("two".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
// should return partition one only
|
||||
|
@ -2612,7 +2521,7 @@ pub(crate) mod test_helpers {
|
|||
// Partition three without any file
|
||||
let partition3 = repos
|
||||
.partitions()
|
||||
.create_or_get("three".into(), shard.id, table.id)
|
||||
.create_or_get("three".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
// should return partition one and two only
|
||||
|
@ -2754,28 +2663,15 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("test_table", namespace.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(100))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(
|
||||
"test_list_by_partiton_not_to_delete_one".into(),
|
||||
shard.id,
|
||||
table.id,
|
||||
)
|
||||
.create_or_get("test_list_by_partiton_not_to_delete_one".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let partition2 = repos
|
||||
.partitions()
|
||||
.create_or_get(
|
||||
"test_list_by_partiton_not_to_delete_two".into(),
|
||||
shard.id,
|
||||
table.id,
|
||||
)
|
||||
.create_or_get("test_list_by_partiton_not_to_delete_two".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -2783,7 +2679,6 @@ pub(crate) mod test_helpers {
|
|||
let max_time = Timestamp::new(10);
|
||||
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: partition.table_id,
|
||||
partition_id: partition.id,
|
||||
|
@ -2883,18 +2778,9 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("update_table", namespace.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1000))
|
||||
.await
|
||||
.unwrap();
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(
|
||||
"test_update_to_compaction_level_1_one".into(),
|
||||
shard.id,
|
||||
table.id,
|
||||
)
|
||||
.create_or_get("test_update_to_compaction_level_1_one".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -2904,12 +2790,10 @@ pub(crate) mod test_helpers {
|
|||
|
||||
// Create a file with times entirely within the window
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: partition.table_id,
|
||||
partition_id: partition.id,
|
||||
object_store_id: Uuid::new_v4(),
|
||||
|
||||
max_sequence_number: SequenceNumber::new(140),
|
||||
min_time: query_min_time + 1,
|
||||
max_time: query_max_time - 1,
|
||||
|
@ -2988,21 +2872,15 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("column_test_1", table_1.id, ColumnType::Tag)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1))
|
||||
.await
|
||||
.unwrap();
|
||||
let partition_1 = repos
|
||||
.partitions()
|
||||
.create_or_get("test_delete_namespace_one".into(), shard.id, table_1.id)
|
||||
.create_or_get("test_delete_namespace_one".into(), table_1.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// parquet files
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
namespace_id: namespace_1.id,
|
||||
shard_id: shard.id,
|
||||
table_id: partition_1.table_id,
|
||||
partition_id: partition_1.id,
|
||||
object_store_id: Uuid::new_v4(),
|
||||
|
@ -3051,21 +2929,15 @@ pub(crate) mod test_helpers {
|
|||
.create_or_get("column_test_2", table_2.id, ColumnType::Tag)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1))
|
||||
.await
|
||||
.unwrap();
|
||||
let partition_2 = repos
|
||||
.partitions()
|
||||
.create_or_get("test_delete_namespace_two".into(), shard.id, table_2.id)
|
||||
.create_or_get("test_delete_namespace_two".into(), table_2.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// parquet files
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
namespace_id: namespace_2.id,
|
||||
shard_id: shard.id,
|
||||
table_id: partition_2.table_id,
|
||||
partition_id: partition_2.id,
|
||||
object_store_id: Uuid::new_v4(),
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
use data_types::{SequenceNumber, TopicId};
|
||||
|
||||
/// Magic number to be used shard indices and shard ids in "kafkaless".
|
||||
pub(crate) const TRANSITION_SHARD_NUMBER: i32 = 1234;
|
||||
/// In kafkaless mode all new persisted data uses this shard id.
|
||||
pub(crate) const TRANSITION_SHARD_ID: ShardId = ShardId::new(TRANSITION_SHARD_NUMBER as i64);
|
||||
/// In kafkaless mode all new persisted data uses this shard index.
|
||||
pub(crate) const TRANSITION_SHARD_INDEX: ShardIndex = ShardIndex::new(TRANSITION_SHARD_NUMBER);
|
||||
|
||||
/// Unique ID for a `Shard`, assigned by the catalog. Joins to other catalog tables to uniquely
|
||||
/// identify shards independently of the underlying write buffer implementation.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
pub(crate) struct ShardId(i64);
|
||||
|
||||
#[allow(missing_docs)]
|
||||
impl ShardId {
|
||||
pub(crate) const fn new(v: i64) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// The index of the shard in the set of shards. When Kafka is used as the write buffer, this is
|
||||
/// the Kafka Partition ID. Used by the router and write buffer to shard requests to a particular
|
||||
/// index in a set of shards.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
pub(crate) struct ShardIndex(i32);
|
||||
|
||||
#[allow(missing_docs)]
|
||||
impl ShardIndex {
|
||||
pub(crate) const fn new(v: i32) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardIndex {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for ShardIndex {
|
||||
type Err = std::num::ParseIntError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let v: i32 = s.parse()?;
|
||||
Ok(Self(v))
|
||||
}
|
||||
}
|
||||
|
||||
/// Data object for a shard. Only one shard record can exist for a given topic and shard
|
||||
/// index (enforced via uniqueness constraint).
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, sqlx::FromRow)]
|
||||
pub(crate) struct Shard {
|
||||
/// the id of the shard, assigned by the catalog
|
||||
pub(crate) id: ShardId,
|
||||
/// the topic the shard is reading from
|
||||
pub(crate) topic_id: TopicId,
|
||||
/// the shard index of the shard the sequence numbers are coming from, sharded by the router
|
||||
/// and write buffer
|
||||
pub(crate) shard_index: ShardIndex,
|
||||
/// The minimum unpersisted sequence number. Because different tables
|
||||
/// can be persisted at different times, it is possible some data has been persisted
|
||||
/// with a higher sequence number than this. However, all data with a sequence number
|
||||
/// lower than this must have been persisted to Parquet.
|
||||
pub(crate) min_unpersisted_sequence_number: SequenceNumber,
|
||||
}
|
||||
|
||||
/// Shard index plus offset
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub(crate) struct Sequence {
|
||||
/// The shard index
|
||||
pub(crate) shard_index: ShardIndex,
|
||||
/// The sequence number
|
||||
pub(crate) sequence_number: SequenceNumber,
|
||||
}
|
|
@ -14,15 +14,9 @@
|
|||
)]
|
||||
|
||||
use crate::interface::{ColumnTypeMismatchSnafu, Error, RepoCollection, Result, Transaction};
|
||||
use data_types::{
|
||||
ColumnType, NamespaceSchema, QueryPool, Shard, ShardId, ShardIndex, TableSchema, TopicId,
|
||||
TopicMetadata,
|
||||
};
|
||||
use data_types::{ColumnType, NamespaceSchema, QueryPool, TableSchema, TopicId, TopicMetadata};
|
||||
use mutable_batch::MutableBatch;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{BTreeMap, HashMap},
|
||||
};
|
||||
use std::{borrow::Cow, collections::HashMap};
|
||||
use thiserror::Error;
|
||||
|
||||
const SHARED_TOPIC_NAME: &str = "iox-shared";
|
||||
|
@ -38,6 +32,7 @@ pub const DEFAULT_MAX_COLUMNS_PER_TABLE: i32 = 200;
|
|||
pub const DEFAULT_RETENTION_PERIOD: Option<i64> = None;
|
||||
|
||||
pub mod interface;
|
||||
pub(crate) mod kafkaless_transition;
|
||||
pub mod mem;
|
||||
pub mod metrics;
|
||||
pub mod postgres;
|
||||
|
@ -209,37 +204,28 @@ where
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Creates or gets records in the catalog for the shared topic, query pool, and shards
|
||||
/// for each of the partitions.
|
||||
/// Creates or gets records in the catalog for the shared topic and query pool for each of the
|
||||
/// partitions.
|
||||
///
|
||||
/// Used in tests and when creating an in-memory catalog.
|
||||
pub async fn create_or_get_default_records(
|
||||
shard_count: i32,
|
||||
txn: &mut dyn Transaction,
|
||||
) -> Result<(TopicMetadata, QueryPool, BTreeMap<ShardId, Shard>)> {
|
||||
) -> Result<(TopicMetadata, QueryPool)> {
|
||||
let topic = txn.topics().create_or_get(SHARED_TOPIC_NAME).await?;
|
||||
let query_pool = txn.query_pools().create_or_get(SHARED_QUERY_POOL).await?;
|
||||
|
||||
let mut shards = BTreeMap::new();
|
||||
// Start at 0 to match the one write buffer shard index used in all-in-one mode
|
||||
for shard_index in 0..shard_count {
|
||||
let shard = txn
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(shard_index))
|
||||
.await?;
|
||||
shards.insert(shard.id, shard);
|
||||
}
|
||||
|
||||
Ok((topic, query_pool, shards))
|
||||
Ok((topic, query_pool))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use super::*;
|
||||
use crate::interface::{get_schema_by_name, SoftDeletedRows};
|
||||
use crate::mem::MemCatalog;
|
||||
use crate::{
|
||||
interface::{get_schema_by_name, SoftDeletedRows},
|
||||
mem::MemCatalog,
|
||||
};
|
||||
|
||||
// Generate a test that simulates multiple, sequential writes in `lp` and
|
||||
// asserts the resulting schema.
|
||||
|
@ -265,8 +251,7 @@ mod tests {
|
|||
let metrics = Arc::new(metric::Registry::default());
|
||||
let repo = MemCatalog::new(metrics);
|
||||
let mut txn = repo.start_transaction().await.unwrap();
|
||||
let (topic, query_pool, _) = create_or_get_default_records(
|
||||
2,
|
||||
let (topic, query_pool) = create_or_get_default_records(
|
||||
txn.deref_mut()
|
||||
).await.unwrap();
|
||||
|
||||
|
|
|
@ -5,9 +5,10 @@ use crate::{
|
|||
interface::{
|
||||
sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo, ColumnTypeMismatchSnafu,
|
||||
Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, QueryPoolRepo, RepoCollection,
|
||||
Result, ShardRepo, SoftDeletedRows, TableRepo, TopicMetadataRepo, Transaction,
|
||||
Result, SoftDeletedRows, TableRepo, TopicMetadataRepo, Transaction,
|
||||
MAX_PARQUET_FILES_SELECTED_ONCE,
|
||||
},
|
||||
kafkaless_transition::{Shard, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
|
||||
metrics::MetricDecorator,
|
||||
DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
|
||||
};
|
||||
|
@ -15,8 +16,7 @@ use async_trait::async_trait;
|
|||
use data_types::{
|
||||
Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
|
||||
ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
|
||||
SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp,
|
||||
TopicId, TopicMetadata, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX,
|
||||
SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
|
||||
};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use observability_deps::tracing::warn;
|
||||
|
@ -248,10 +248,6 @@ impl RepoCollection for MemTxn {
|
|||
self
|
||||
}
|
||||
|
||||
fn shards(&mut self) -> &mut dyn ShardRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn partitions(&mut self) -> &mut dyn PartitionRepo {
|
||||
self
|
||||
}
|
||||
|
@ -688,105 +684,20 @@ impl ColumnRepo for MemTxn {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ShardRepo for MemTxn {
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
topic: &TopicMetadata,
|
||||
_shard_index: ShardIndex,
|
||||
) -> Result<Shard> {
|
||||
let stage = self.stage();
|
||||
|
||||
// Temporary: only ever create the transition shard, no matter what is asked. Shards are
|
||||
// going away completely soon.
|
||||
let shard = match stage
|
||||
.shards
|
||||
.iter()
|
||||
.find(|s| s.topic_id == topic.id && s.shard_index == TRANSITION_SHARD_INDEX)
|
||||
{
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let shard = Shard {
|
||||
id: TRANSITION_SHARD_ID,
|
||||
topic_id: topic.id,
|
||||
shard_index: TRANSITION_SHARD_INDEX,
|
||||
min_unpersisted_sequence_number: SequenceNumber::new(0),
|
||||
};
|
||||
stage.shards.push(shard);
|
||||
stage.shards.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(*shard)
|
||||
}
|
||||
|
||||
async fn get_by_topic_id_and_shard_index(
|
||||
&mut self,
|
||||
topic_id: TopicId,
|
||||
shard_index: ShardIndex,
|
||||
) -> Result<Option<Shard>> {
|
||||
let stage = self.stage();
|
||||
|
||||
let shard = stage
|
||||
.shards
|
||||
.iter()
|
||||
.find(|s| s.topic_id == topic_id && s.shard_index == shard_index)
|
||||
.cloned();
|
||||
Ok(shard)
|
||||
}
|
||||
|
||||
async fn list(&mut self) -> Result<Vec<Shard>> {
|
||||
let stage = self.stage();
|
||||
|
||||
Ok(stage.shards.clone())
|
||||
}
|
||||
|
||||
async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result<Vec<Shard>> {
|
||||
let stage = self.stage();
|
||||
|
||||
let shards: Vec<_> = stage
|
||||
.shards
|
||||
.iter()
|
||||
.filter(|s| s.topic_id == topic.id)
|
||||
.cloned()
|
||||
.collect();
|
||||
Ok(shards)
|
||||
}
|
||||
|
||||
async fn update_min_unpersisted_sequence_number(
|
||||
&mut self,
|
||||
shard_id: ShardId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<()> {
|
||||
let stage = self.stage();
|
||||
|
||||
if let Some(s) = stage.shards.iter_mut().find(|s| s.id == shard_id) {
|
||||
s.min_unpersisted_sequence_number = sequence_number
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionRepo for MemTxn {
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
key: PartitionKey,
|
||||
shard_id: ShardId,
|
||||
table_id: TableId,
|
||||
) -> Result<Partition> {
|
||||
async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition> {
|
||||
let stage = self.stage();
|
||||
|
||||
let partition =
|
||||
match stage.partitions.iter().find(|p| {
|
||||
p.partition_key == key && p.shard_id == shard_id && p.table_id == table_id
|
||||
}) {
|
||||
let partition = match stage
|
||||
.partitions
|
||||
.iter()
|
||||
.find(|p| p.partition_key == key && p.table_id == table_id)
|
||||
{
|
||||
Some(p) => p,
|
||||
None => {
|
||||
let p = Partition {
|
||||
id: PartitionId::new(stage.partitions.len() as i64 + 1),
|
||||
shard_id,
|
||||
table_id,
|
||||
partition_key: key,
|
||||
sort_key: vec![],
|
||||
|
|
|
@ -2,15 +2,14 @@
|
|||
|
||||
use crate::interface::{
|
||||
sealed::TransactionFinalize, CasFailure, ColumnRepo, NamespaceRepo, ParquetFileRepo,
|
||||
PartitionRepo, QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo,
|
||||
PartitionRepo, QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo,
|
||||
TopicMetadataRepo,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use data_types::{
|
||||
Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId,
|
||||
ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
|
||||
SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp,
|
||||
TopicId, TopicMetadata,
|
||||
SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
|
||||
};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use metric::{DurationHistogram, Metric};
|
||||
|
@ -48,7 +47,6 @@ where
|
|||
+ NamespaceRepo
|
||||
+ TableRepo
|
||||
+ ColumnRepo
|
||||
+ ShardRepo
|
||||
+ PartitionRepo
|
||||
+ ParquetFileRepo
|
||||
+ Debug,
|
||||
|
@ -74,10 +72,6 @@ where
|
|||
self
|
||||
}
|
||||
|
||||
fn shards(&mut self) -> &mut dyn ShardRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn partitions(&mut self) -> &mut dyn PartitionRepo {
|
||||
self
|
||||
}
|
||||
|
@ -215,21 +209,10 @@ decorate!(
|
|||
]
|
||||
);
|
||||
|
||||
decorate!(
|
||||
impl_trait = ShardRepo,
|
||||
methods = [
|
||||
"shard_create_or_get" = create_or_get(&mut self, topic: &TopicMetadata, shard_index: ShardIndex) -> Result<Shard>;
|
||||
"shard_get_by_topic_id_and_shard_index" = get_by_topic_id_and_shard_index(&mut self, topic_id: TopicId, shard_index: ShardIndex) -> Result<Option<Shard>>;
|
||||
"shard_list" = list(&mut self) -> Result<Vec<Shard>>;
|
||||
"shard_list_by_topic" = list_by_topic(&mut self, topic: &TopicMetadata) -> Result<Vec<Shard>>;
|
||||
"shard_update_min_unpersisted_sequence_number" = update_min_unpersisted_sequence_number(&mut self, shard_id: ShardId, sequence_number: SequenceNumber) -> Result<()>;
|
||||
]
|
||||
);
|
||||
|
||||
decorate!(
|
||||
impl_trait = PartitionRepo,
|
||||
methods = [
|
||||
"partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, shard_id: ShardId, table_id: TableId) -> Result<Partition>;
|
||||
"partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition>;
|
||||
"partition_get_by_id" = get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
|
||||
"partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
|
||||
"partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>;
|
||||
|
|
|
@ -4,9 +4,10 @@ use crate::{
|
|||
interface::{
|
||||
self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo,
|
||||
ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo,
|
||||
QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo,
|
||||
TopicMetadataRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
|
||||
QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo, TopicMetadataRepo,
|
||||
Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
|
||||
},
|
||||
kafkaless_transition::{TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
|
||||
metrics::MetricDecorator,
|
||||
DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
|
||||
};
|
||||
|
@ -14,8 +15,7 @@ use async_trait::async_trait;
|
|||
use data_types::{
|
||||
Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId,
|
||||
ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
|
||||
SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp,
|
||||
TopicId, TopicMetadata, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX,
|
||||
SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
|
||||
};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use observability_deps::tracing::{debug, info, warn};
|
||||
|
@ -547,10 +547,6 @@ impl RepoCollection for PostgresTxn {
|
|||
self
|
||||
}
|
||||
|
||||
fn shards(&mut self) -> &mut dyn ShardRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn partitions(&mut self) -> &mut dyn PartitionRepo {
|
||||
self
|
||||
}
|
||||
|
@ -1086,109 +1082,9 @@ RETURNING *;
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ShardRepo for PostgresTxn {
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
topic: &TopicMetadata,
|
||||
shard_index: ShardIndex,
|
||||
) -> Result<Shard> {
|
||||
sqlx::query_as::<_, Shard>(
|
||||
r#"
|
||||
INSERT INTO shard
|
||||
( topic_id, shard_index, min_unpersisted_sequence_number )
|
||||
VALUES
|
||||
( $1, $2, 0 )
|
||||
ON CONFLICT ON CONSTRAINT shard_unique
|
||||
DO UPDATE SET topic_id = shard.topic_id
|
||||
RETURNING *;;
|
||||
"#,
|
||||
)
|
||||
.bind(topic.id) // $1
|
||||
.bind(shard_index) // $2
|
||||
.fetch_one(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_fk_violation(&e) {
|
||||
Error::ForeignKeyViolation { source: e }
|
||||
} else {
|
||||
Error::SqlxError { source: e }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async fn get_by_topic_id_and_shard_index(
|
||||
&mut self,
|
||||
topic_id: TopicId,
|
||||
shard_index: ShardIndex,
|
||||
) -> Result<Option<Shard>> {
|
||||
let rec = sqlx::query_as::<_, Shard>(
|
||||
r#"
|
||||
SELECT *
|
||||
FROM shard
|
||||
WHERE topic_id = $1
|
||||
AND shard_index = $2;
|
||||
"#,
|
||||
)
|
||||
.bind(topic_id) // $1
|
||||
.bind(shard_index) // $2
|
||||
.fetch_one(&mut self.inner)
|
||||
.await;
|
||||
|
||||
if let Err(sqlx::Error::RowNotFound) = rec {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let shard = rec.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(Some(shard))
|
||||
}
|
||||
|
||||
async fn list(&mut self) -> Result<Vec<Shard>> {
|
||||
sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard;"#)
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result<Vec<Shard>> {
|
||||
sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard WHERE topic_id = $1;"#)
|
||||
.bind(topic.id) // $1
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn update_min_unpersisted_sequence_number(
|
||||
&mut self,
|
||||
shard_id: ShardId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<()> {
|
||||
let _ = sqlx::query(
|
||||
r#"
|
||||
UPDATE shard
|
||||
SET min_unpersisted_sequence_number = $1
|
||||
WHERE id = $2;
|
||||
"#,
|
||||
)
|
||||
.bind(sequence_number.get()) // $1
|
||||
.bind(shard_id) // $2
|
||||
.execute(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionRepo for PostgresTxn {
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
key: PartitionKey,
|
||||
shard_id: ShardId,
|
||||
table_id: TableId,
|
||||
) -> Result<Partition> {
|
||||
async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition> {
|
||||
// Note: since sort_key is now an array, we must explicitly insert '{}' which is an empty
|
||||
// array rather than NULL which sqlx will throw `UnexpectedNullError` while is is doing
|
||||
// `ColumnDecode`
|
||||
|
@ -1201,11 +1097,11 @@ VALUES
|
|||
( $1, $2, $3, '{}')
|
||||
ON CONFLICT ON CONSTRAINT partition_key_unique
|
||||
DO UPDATE SET partition_key = partition.partition_key
|
||||
RETURNING *;
|
||||
RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(key) // $1
|
||||
.bind(shard_id) // $2
|
||||
.bind(TRANSITION_SHARD_ID) // $2
|
||||
.bind(table_id) // $3
|
||||
.fetch_one(&mut self.inner)
|
||||
.await
|
||||
|
@ -1217,20 +1113,17 @@ RETURNING *;
|
|||
}
|
||||
})?;
|
||||
|
||||
// If the partition_key_unique constraint was hit because there was an
|
||||
// existing record for (table_id, partition_key) ensure the partition
|
||||
// key in the DB is mapped to the same shard_id the caller
|
||||
// requested.
|
||||
assert_eq!(
|
||||
v.shard_id, shard_id,
|
||||
"attempted to overwrite partition with different shard ID"
|
||||
);
|
||||
|
||||
Ok(v)
|
||||
}
|
||||
|
||||
async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>> {
|
||||
let rec = sqlx::query_as::<_, Partition>(r#"SELECT * FROM partition WHERE id = $1;"#)
|
||||
let rec = sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
|
||||
FROM partition
|
||||
WHERE id = $1;
|
||||
"#,
|
||||
)
|
||||
.bind(partition_id) // $1
|
||||
.fetch_one(&mut self.inner)
|
||||
.await;
|
||||
|
@ -1247,7 +1140,7 @@ RETURNING *;
|
|||
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
|
||||
sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
SELECT *
|
||||
SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
|
||||
FROM partition
|
||||
WHERE table_id = $1;
|
||||
"#,
|
||||
|
@ -1288,7 +1181,7 @@ WHERE table_id = $1;
|
|||
UPDATE partition
|
||||
SET sort_key = $1
|
||||
WHERE id = $2 AND sort_key = $3
|
||||
RETURNING *;
|
||||
RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(new_sort_key) // $1
|
||||
|
@ -1461,7 +1354,6 @@ RETURNING *
|
|||
impl ParquetFileRepo for PostgresTxn {
|
||||
async fn create(&mut self, parquet_file_params: ParquetFileParams) -> Result<ParquetFile> {
|
||||
let ParquetFileParams {
|
||||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
|
@ -1484,10 +1376,13 @@ INSERT INTO parquet_file (
|
|||
max_sequence_number, min_time, max_time, file_size_bytes,
|
||||
row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at )
|
||||
VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 )
|
||||
RETURNING *;
|
||||
RETURNING
|
||||
id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at;
|
||||
"#,
|
||||
)
|
||||
.bind(shard_id) // $1
|
||||
.bind(TRANSITION_SHARD_ID) // $1
|
||||
.bind(table_id) // $2
|
||||
.bind(partition_id) // $3
|
||||
.bind(object_store_id) // $4
|
||||
|
@ -1563,16 +1458,14 @@ RETURNING id;
|
|||
&mut self,
|
||||
namespace_id: NamespaceId,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
// Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
|
||||
// `parquet_metadata` column!!
|
||||
sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT parquet_file.id, parquet_file.shard_id, parquet_file.namespace_id,
|
||||
SELECT parquet_file.id, parquet_file.namespace_id,
|
||||
parquet_file.table_id, parquet_file.partition_id, parquet_file.object_store_id,
|
||||
parquet_file.max_sequence_number, parquet_file.min_time,
|
||||
parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes,
|
||||
parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, parquet_file.column_set,
|
||||
parquet_file.max_l0_created_at
|
||||
parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at,
|
||||
parquet_file.column_set, parquet_file.max_l0_created_at
|
||||
FROM parquet_file
|
||||
INNER JOIN table_name on table_name.id = parquet_file.table_id
|
||||
WHERE table_name.namespace_id = $1
|
||||
|
@ -1586,11 +1479,9 @@ WHERE table_name.namespace_id = $1
|
|||
}
|
||||
|
||||
async fn list_by_table_not_to_delete(&mut self, table_id: TableId) -> Result<Vec<ParquetFile>> {
|
||||
// Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
|
||||
// `parquet_metadata` column!!
|
||||
sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
|
||||
SELECT id, namespace_id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
|
@ -1650,11 +1541,9 @@ RETURNING id;
|
|||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
// Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
|
||||
// `parquet_metadata` column!!
|
||||
sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
|
||||
SELECT id, namespace_id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
|
@ -1720,11 +1609,9 @@ RETURNING id;
|
|||
&mut self,
|
||||
object_store_id: Uuid,
|
||||
) -> Result<Option<ParquetFile>> {
|
||||
// Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
|
||||
// `parquet_metadata` column!!
|
||||
let rec = sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
|
||||
SELECT id, namespace_id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
|
@ -1783,7 +1670,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::create_or_get_default_records;
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{ColumnId, ColumnSet};
|
||||
use data_types::{ColumnId, ColumnSet, SequenceNumber};
|
||||
use metric::{Attributes, DurationHistogram, Metric};
|
||||
use rand::Rng;
|
||||
use sqlx::migrate::MigrateDatabase;
|
||||
|
@ -1911,9 +1798,6 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_catalog() {
|
||||
// If running an integration test on your laptop, this requires that you have Postgres
|
||||
// running and that you've done the sqlx migrations. See the README in this crate for
|
||||
// info to set it up.
|
||||
maybe_skip_integration!();
|
||||
|
||||
let postgres = setup_db().await;
|
||||
|
@ -1964,23 +1848,13 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_partition_create_or_get_idempotent() {
|
||||
// If running an integration test on your laptop, this requires that you have Postgres running
|
||||
//
|
||||
// This is a command to run this test on your laptop
|
||||
// TEST_INTEGRATION=1 TEST_INFLUXDB_IOX_CATALOG_DSN=postgres:postgres://$USER@localhost/iox_shared RUST_BACKTRACE=1 cargo test --package iox_catalog --lib -- postgres::tests::test_partition_create_or_get_idempotent --exact --nocapture
|
||||
//
|
||||
// If you do not have Postgres's iox_shared db, here are commands to install Postgres (on mac) and create iox_shared db
|
||||
// brew install postgresql
|
||||
// initdb pg
|
||||
// createdb iox_shared
|
||||
|
||||
maybe_skip_integration!();
|
||||
|
||||
let postgres = setup_db().await;
|
||||
|
||||
let postgres: Arc<dyn Catalog> = Arc::new(postgres);
|
||||
let mut txn = postgres.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
|
||||
let (kafka, query) = create_or_get_default_records(txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
@ -2003,100 +1877,27 @@ mod tests {
|
|||
.id;
|
||||
|
||||
let key = "bananas";
|
||||
let shard_id = *shards.keys().next().expect("no shard");
|
||||
|
||||
let a = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shard_id, table_id)
|
||||
.create_or_get(key.into(), table_id)
|
||||
.await
|
||||
.expect("should create OK");
|
||||
|
||||
// Call create_or_get for the same (key, table_id, shard_id)
|
||||
// triplet, setting the same shard ID to ensure the write is
|
||||
// idempotent.
|
||||
// Call create_or_get for the same (key, table_id) pair, to ensure the write is idempotent.
|
||||
let b = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shard_id, table_id)
|
||||
.create_or_get(key.into(), table_id)
|
||||
.await
|
||||
.expect("idempotent write should succeed");
|
||||
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic = "attempted to overwrite partition"]
|
||||
async fn test_partition_create_or_get_no_overwrite() {
|
||||
// If running an integration test on your laptop, this requires that you have Postgres
|
||||
// running and that you've done the sqlx migrations. See the README in this crate for
|
||||
// info to set it up.
|
||||
maybe_skip_integration!("attempted to overwrite partition");
|
||||
|
||||
let postgres = setup_db().await;
|
||||
|
||||
let postgres: Arc<dyn Catalog> = Arc::new(postgres);
|
||||
let mut txn = postgres.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, _) = create_or_get_default_records(2, txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
||||
let namespace_id = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.namespaces()
|
||||
.create("ns3", None, kafka.id, query.id)
|
||||
.await
|
||||
.expect("namespace create failed")
|
||||
.id;
|
||||
let table_id = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.tables()
|
||||
.create_or_get("table", namespace_id)
|
||||
.await
|
||||
.expect("create table failed")
|
||||
.id;
|
||||
|
||||
let key = "bananas";
|
||||
|
||||
let shards = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.list()
|
||||
.await
|
||||
.expect("failed to list shards");
|
||||
assert!(
|
||||
shards.len() > 1,
|
||||
"expected more shards to be created, got {}",
|
||||
shards.len()
|
||||
);
|
||||
|
||||
let a = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shards[0].id, table_id)
|
||||
.await
|
||||
.expect("should create OK");
|
||||
|
||||
// Call create_or_get for the same (key, table_id) tuple, setting a
|
||||
// different shard ID
|
||||
let b = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shards[1].id, table_id)
|
||||
.await
|
||||
.expect("result should not be evaluated");
|
||||
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_dsn_file() {
|
||||
assert_eq!(
|
||||
|
@ -2190,9 +1991,6 @@ mod tests {
|
|||
paste::paste! {
|
||||
#[tokio::test]
|
||||
async fn [<test_column_create_or_get_many_unchecked_ $name>]() {
|
||||
// If running an integration test on your laptop, this requires that you have
|
||||
// Postgres running and that you've done the sqlx migrations. See the README in
|
||||
// this crate for info to set it up.
|
||||
maybe_skip_integration!();
|
||||
|
||||
let postgres = setup_db().await;
|
||||
|
@ -2200,7 +1998,7 @@ mod tests {
|
|||
|
||||
let postgres: Arc<dyn Catalog> = Arc::new(postgres);
|
||||
let mut txn = postgres.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, _shards) = create_or_get_default_records(1, txn.deref_mut())
|
||||
let (kafka, query) = create_or_get_default_records(txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
@ -2362,19 +2160,6 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_billing_summary_on_parqet_file_creation() {
|
||||
// If running an integration test on your laptop, this requires that you have Postgres running
|
||||
//
|
||||
// This is a command to run this test on your laptop
|
||||
// TEST_INTEGRATION=1 TEST_INFLUXDB_IOX_CATALOG_DSN=postgres:postgres://$USER@localhost/iox_shared RUST_BACKTRACE=1 cargo test --package iox_catalog --lib -- postgres::tests::test_billing_summary_on_parqet_file_creation --exact --nocapture
|
||||
//
|
||||
// If you do not have Postgres's iox_shared db, here are commands to install Postgres (on mac) and create iox_shared db
|
||||
// brew install postgresql
|
||||
// initdb pg
|
||||
// createdb iox_shared
|
||||
//
|
||||
// Or if you're on Linux or otherwise don't mind using Docker:
|
||||
// ./scripts/docker_catalog.sh
|
||||
|
||||
maybe_skip_integration!();
|
||||
|
||||
let postgres = setup_db().await;
|
||||
|
@ -2382,7 +2167,7 @@ mod tests {
|
|||
|
||||
let postgres: Arc<dyn Catalog> = Arc::new(postgres);
|
||||
let mut txn = postgres.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
|
||||
let (kafka, query) = create_or_get_default_records(txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
@ -2405,13 +2190,12 @@ mod tests {
|
|||
.id;
|
||||
|
||||
let key = "bananas";
|
||||
let shard_id = *shards.keys().next().expect("no shard");
|
||||
|
||||
let partition_id = postgres
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shard_id, table_id)
|
||||
.create_or_get(key.into(), table_id)
|
||||
.await
|
||||
.expect("should create OK")
|
||||
.id;
|
||||
|
@ -2421,7 +2205,6 @@ mod tests {
|
|||
let time_provider = Arc::new(SystemProvider::new());
|
||||
let time_now = Timestamp::from(time_provider.now());
|
||||
let mut p1 = ParquetFileParams {
|
||||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
|
|
|
@ -4,9 +4,10 @@ use crate::{
|
|||
interface::{
|
||||
self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo,
|
||||
ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo,
|
||||
QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo,
|
||||
TopicMetadataRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
|
||||
QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo, TopicMetadataRepo,
|
||||
Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
|
||||
},
|
||||
kafkaless_transition::{TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
|
||||
metrics::MetricDecorator,
|
||||
DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
|
||||
};
|
||||
|
@ -14,8 +15,7 @@ use async_trait::async_trait;
|
|||
use data_types::{
|
||||
Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
|
||||
ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
|
||||
SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp,
|
||||
TopicId, TopicMetadata, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX,
|
||||
SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::ops::Deref;
|
||||
|
@ -318,10 +318,6 @@ impl RepoCollection for SqliteTxn {
|
|||
self
|
||||
}
|
||||
|
||||
fn shards(&mut self) -> &mut dyn ShardRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn partitions(&mut self) -> &mut dyn PartitionRepo {
|
||||
self
|
||||
}
|
||||
|
@ -866,108 +862,12 @@ RETURNING *;
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ShardRepo for SqliteTxn {
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
topic: &TopicMetadata,
|
||||
shard_index: ShardIndex,
|
||||
) -> Result<Shard> {
|
||||
sqlx::query_as::<_, Shard>(
|
||||
r#"
|
||||
INSERT INTO shard
|
||||
( topic_id, shard_index, min_unpersisted_sequence_number )
|
||||
VALUES
|
||||
( $1, $2, 0 )
|
||||
ON CONFLICT (topic_id, shard_index)
|
||||
DO UPDATE SET topic_id = shard.topic_id
|
||||
RETURNING *;
|
||||
"#,
|
||||
)
|
||||
.bind(topic.id) // $1
|
||||
.bind(shard_index) // $2
|
||||
.fetch_one(self.inner.get_mut())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_fk_violation(&e) {
|
||||
Error::ForeignKeyViolation { source: e }
|
||||
} else {
|
||||
Error::SqlxError { source: e }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async fn get_by_topic_id_and_shard_index(
|
||||
&mut self,
|
||||
topic_id: TopicId,
|
||||
shard_index: ShardIndex,
|
||||
) -> Result<Option<Shard>> {
|
||||
let rec = sqlx::query_as::<_, Shard>(
|
||||
r#"
|
||||
SELECT *
|
||||
FROM shard
|
||||
WHERE topic_id = $1
|
||||
AND shard_index = $2;
|
||||
"#,
|
||||
)
|
||||
.bind(topic_id) // $1
|
||||
.bind(shard_index) // $2
|
||||
.fetch_one(self.inner.get_mut())
|
||||
.await;
|
||||
|
||||
if let Err(sqlx::Error::RowNotFound) = rec {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let shard = rec.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(Some(shard))
|
||||
}
|
||||
|
||||
async fn list(&mut self) -> Result<Vec<Shard>> {
|
||||
sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard;"#)
|
||||
.fetch_all(self.inner.get_mut())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result<Vec<Shard>> {
|
||||
sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard WHERE topic_id = $1;"#)
|
||||
.bind(topic.id) // $1
|
||||
.fetch_all(self.inner.get_mut())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn update_min_unpersisted_sequence_number(
|
||||
&mut self,
|
||||
shard_id: ShardId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<()> {
|
||||
let _ = sqlx::query(
|
||||
r#"
|
||||
UPDATE shard
|
||||
SET min_unpersisted_sequence_number = $1
|
||||
WHERE id = $2;
|
||||
"#,
|
||||
)
|
||||
.bind(sequence_number.get()) // $1
|
||||
.bind(shard_id) // $2
|
||||
.execute(self.inner.get_mut())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// We can't use [`Partition`], as uses Vec<String> which the Sqlite
|
||||
// driver cannot serialise
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
|
||||
struct PartitionPod {
|
||||
id: PartitionId,
|
||||
shard_id: ShardId,
|
||||
table_id: TableId,
|
||||
partition_key: PartitionKey,
|
||||
sort_key: Json<Vec<String>>,
|
||||
|
@ -979,7 +879,6 @@ impl From<PartitionPod> for Partition {
|
|||
fn from(value: PartitionPod) -> Self {
|
||||
Self {
|
||||
id: value.id,
|
||||
shard_id: value.shard_id,
|
||||
table_id: value.table_id,
|
||||
partition_key: value.partition_key,
|
||||
sort_key: value.sort_key.0,
|
||||
|
@ -991,12 +890,7 @@ impl From<PartitionPod> for Partition {
|
|||
|
||||
#[async_trait]
|
||||
impl PartitionRepo for SqliteTxn {
|
||||
async fn create_or_get(
|
||||
&mut self,
|
||||
key: PartitionKey,
|
||||
shard_id: ShardId,
|
||||
table_id: TableId,
|
||||
) -> Result<Partition> {
|
||||
async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition> {
|
||||
// Note: since sort_key is now an array, we must explicitly insert '{}' which is an empty
|
||||
// array rather than NULL which sqlx will throw `UnexpectedNullError` while is is doing
|
||||
// `ColumnDecode`
|
||||
|
@ -1009,11 +903,11 @@ VALUES
|
|||
( $1, $2, $3, '[]')
|
||||
ON CONFLICT (table_id, partition_key)
|
||||
DO UPDATE SET partition_key = partition.partition_key
|
||||
RETURNING *;
|
||||
RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(key) // $1
|
||||
.bind(shard_id) // $2
|
||||
.bind(TRANSITION_SHARD_ID) // $2
|
||||
.bind(table_id) // $3
|
||||
.fetch_one(self.inner.get_mut())
|
||||
.await
|
||||
|
@ -1025,20 +919,17 @@ RETURNING *;
|
|||
}
|
||||
})?;
|
||||
|
||||
// If the partition_key_unique constraint was hit because there was an
|
||||
// existing record for (table_id, partition_key) ensure the partition
|
||||
// key in the DB is mapped to the same shard_id the caller
|
||||
// requested.
|
||||
assert_eq!(
|
||||
v.shard_id, shard_id,
|
||||
"attempted to overwrite partition with different shard ID"
|
||||
);
|
||||
|
||||
Ok(v.into())
|
||||
}
|
||||
|
||||
async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>> {
|
||||
let rec = sqlx::query_as::<_, PartitionPod>(r#"SELECT * FROM partition WHERE id = $1;"#)
|
||||
let rec = sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
|
||||
FROM partition
|
||||
WHERE id = $1;
|
||||
"#,
|
||||
)
|
||||
.bind(partition_id) // $1
|
||||
.fetch_one(self.inner.get_mut())
|
||||
.await;
|
||||
|
@ -1055,7 +946,7 @@ RETURNING *;
|
|||
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
|
||||
Ok(sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
SELECT *
|
||||
SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
|
||||
FROM partition
|
||||
WHERE table_id = $1;
|
||||
"#,
|
||||
|
@ -1099,7 +990,7 @@ WHERE table_id = $1;
|
|||
UPDATE partition
|
||||
SET sort_key = $1
|
||||
WHERE id = $2 AND sort_key = $3
|
||||
RETURNING *;
|
||||
RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(Json(new_sort_key)) // $1
|
||||
|
@ -1237,7 +1128,12 @@ RETURNING *
|
|||
|
||||
async fn most_recent_n(&mut self, n: usize) -> Result<Vec<Partition>> {
|
||||
Ok(sqlx::query_as::<_, PartitionPod>(
|
||||
r#"SELECT * FROM partition ORDER BY id DESC LIMIT $1;"#,
|
||||
r#"
|
||||
SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
|
||||
FROM partition
|
||||
ORDER BY id DESC
|
||||
LIMIT $1;
|
||||
"#,
|
||||
)
|
||||
.bind(n as i64) // $1
|
||||
.fetch_all(self.inner.get_mut())
|
||||
|
@ -1285,7 +1181,6 @@ fn to_column_set(v: &Json<Vec<i64>>) -> ColumnSet {
|
|||
#[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
|
||||
struct ParquetFilePod {
|
||||
id: ParquetFileId,
|
||||
shard_id: ShardId,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
|
@ -1306,7 +1201,6 @@ impl From<ParquetFilePod> for ParquetFile {
|
|||
fn from(value: ParquetFilePod) -> Self {
|
||||
Self {
|
||||
id: value.id,
|
||||
shard_id: value.shard_id,
|
||||
namespace_id: value.namespace_id,
|
||||
table_id: value.table_id,
|
||||
partition_id: value.partition_id,
|
||||
|
@ -1329,7 +1223,6 @@ impl From<ParquetFilePod> for ParquetFile {
|
|||
impl ParquetFileRepo for SqliteTxn {
|
||||
async fn create(&mut self, parquet_file_params: ParquetFileParams) -> Result<ParquetFile> {
|
||||
let ParquetFileParams {
|
||||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
|
@ -1352,10 +1245,13 @@ INSERT INTO parquet_file (
|
|||
max_sequence_number, min_time, max_time, file_size_bytes,
|
||||
row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at )
|
||||
VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 )
|
||||
RETURNING *;
|
||||
RETURNING
|
||||
id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at;
|
||||
"#,
|
||||
)
|
||||
.bind(shard_id) // $1
|
||||
.bind(TRANSITION_SHARD_ID) // $1
|
||||
.bind(table_id) // $2
|
||||
.bind(partition_id) // $3
|
||||
.bind(object_store_id) // $4
|
||||
|
@ -1435,12 +1331,11 @@ RETURNING id;
|
|||
// `parquet_metadata` column!!
|
||||
Ok(sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT parquet_file.id, parquet_file.shard_id, parquet_file.namespace_id,
|
||||
parquet_file.table_id, parquet_file.partition_id, parquet_file.object_store_id,
|
||||
parquet_file.max_sequence_number, parquet_file.min_time,
|
||||
parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes,
|
||||
parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, parquet_file.column_set,
|
||||
parquet_file.max_l0_created_at
|
||||
SELECT parquet_file.id, parquet_file.namespace_id, parquet_file.table_id,
|
||||
parquet_file.partition_id, parquet_file.object_store_id, parquet_file.max_sequence_number,
|
||||
parquet_file.min_time, parquet_file.max_time, parquet_file.to_delete,
|
||||
parquet_file.file_size_bytes, parquet_file.row_count, parquet_file.compaction_level,
|
||||
parquet_file.created_at, parquet_file.column_set, parquet_file.max_l0_created_at
|
||||
FROM parquet_file
|
||||
INNER JOIN table_name on table_name.id = parquet_file.table_id
|
||||
WHERE table_name.namespace_id = $1
|
||||
|
@ -1457,11 +1352,9 @@ WHERE table_name.namespace_id = $1
|
|||
}
|
||||
|
||||
async fn list_by_table_not_to_delete(&mut self, table_id: TableId) -> Result<Vec<ParquetFile>> {
|
||||
// Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
|
||||
// `parquet_metadata` column!!
|
||||
Ok(sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
|
||||
SELECT id, namespace_id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
|
@ -1527,11 +1420,9 @@ RETURNING id;
|
|||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
// Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
|
||||
// `parquet_metadata` column!!
|
||||
Ok(sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
|
||||
SELECT id, namespace_id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
|
@ -1600,11 +1491,9 @@ RETURNING id;
|
|||
&mut self,
|
||||
object_store_id: Uuid,
|
||||
) -> Result<Option<ParquetFile>> {
|
||||
// Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
|
||||
// `parquet_metadata` column!!
|
||||
let rec = sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
|
||||
SELECT id, namespace_id, table_id, partition_id, object_store_id,
|
||||
max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
|
||||
row_count, compaction_level, created_at, column_set, max_l0_created_at
|
||||
FROM parquet_file
|
||||
|
@ -1707,7 +1596,7 @@ mod tests {
|
|||
|
||||
let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
|
||||
let mut txn = sqlite.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
|
||||
let (kafka, query) = create_or_get_default_records(txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
@ -1730,95 +1619,27 @@ mod tests {
|
|||
.id;
|
||||
|
||||
let key = "bananas";
|
||||
let shard_id = *shards.keys().next().expect("no shard");
|
||||
|
||||
let a = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shard_id, table_id)
|
||||
.create_or_get(key.into(), table_id)
|
||||
.await
|
||||
.expect("should create OK");
|
||||
|
||||
// Call create_or_get for the same (key, table_id, shard_id)
|
||||
// triplet, setting the same shard ID to ensure the write is
|
||||
// idempotent.
|
||||
// Call create_or_get for the same (key, table_id) pair, to ensure the write is idempotent.
|
||||
let b = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shard_id, table_id)
|
||||
.create_or_get(key.into(), table_id)
|
||||
.await
|
||||
.expect("idempotent write should succeed");
|
||||
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic = "attempted to overwrite partition"]
|
||||
async fn test_partition_create_or_get_no_overwrite() {
|
||||
let sqlite = setup_db().await;
|
||||
|
||||
let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
|
||||
let mut txn = sqlite.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, _) = create_or_get_default_records(2, txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
||||
let namespace_id = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.namespaces()
|
||||
.create("ns3", None, kafka.id, query.id)
|
||||
.await
|
||||
.expect("namespace create failed")
|
||||
.id;
|
||||
let table_id = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.tables()
|
||||
.create_or_get("table", namespace_id)
|
||||
.await
|
||||
.expect("create table failed")
|
||||
.id;
|
||||
|
||||
let key = "bananas";
|
||||
|
||||
let shards = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.list()
|
||||
.await
|
||||
.expect("failed to list shards");
|
||||
assert!(
|
||||
shards.len() > 1,
|
||||
"expected more shards to be created, got {}",
|
||||
shards.len()
|
||||
);
|
||||
|
||||
let a = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shards[0].id, table_id)
|
||||
.await
|
||||
.expect("should create OK");
|
||||
|
||||
// Call create_or_get for the same (key, table_id) tuple, setting a
|
||||
// different shard ID
|
||||
let b = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shards[1].id, table_id)
|
||||
.await
|
||||
.expect("result should not be evaluated");
|
||||
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
macro_rules! test_column_create_or_get_many_unchecked {
|
||||
(
|
||||
$name:ident,
|
||||
|
@ -1833,7 +1654,7 @@ mod tests {
|
|||
|
||||
let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
|
||||
let mut txn = sqlite.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, _shards) = create_or_get_default_records(1, txn.deref_mut())
|
||||
let (kafka, query) = create_or_get_default_records(txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
@ -2000,7 +1821,7 @@ mod tests {
|
|||
|
||||
let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
|
||||
let mut txn = sqlite.start_transaction().await.expect("txn start");
|
||||
let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
|
||||
let (kafka, query) = create_or_get_default_records(txn.deref_mut())
|
||||
.await
|
||||
.expect("db init failed");
|
||||
txn.commit().await.expect("txn commit");
|
||||
|
@ -2023,13 +1844,12 @@ mod tests {
|
|||
.id;
|
||||
|
||||
let key = "bananas";
|
||||
let shard_id = *shards.keys().next().expect("no shard");
|
||||
|
||||
let partition_id = sqlite
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(key.into(), shard_id, table_id)
|
||||
.create_or_get(key.into(), table_id)
|
||||
.await
|
||||
.expect("should create OK")
|
||||
.id;
|
||||
|
@ -2039,7 +1859,6 @@ mod tests {
|
|||
let time_provider = Arc::new(SystemProvider::new());
|
||||
let time_now = Timestamp::from(time_provider.now());
|
||||
let mut p1 = ParquetFileParams {
|
||||
shard_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use data_types::{
|
||||
ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileId, Partition, PartitionId,
|
||||
PartitionKey, SequenceNumber, ShardId, SkippedCompaction, Table, TableId, Timestamp,
|
||||
PartitionKey, SequenceNumber, SkippedCompaction, Table, TableId, Timestamp,
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -17,7 +17,6 @@ impl ParquetFileBuilder {
|
|||
Self {
|
||||
file: ParquetFile {
|
||||
id: ParquetFileId::new(id),
|
||||
shard_id: ShardId::new(0),
|
||||
namespace_id: NamespaceId::new(0),
|
||||
table_id: TableId::new(0),
|
||||
partition_id: PartitionId::new(0),
|
||||
|
@ -155,7 +154,6 @@ impl PartitionBuilder {
|
|||
Self {
|
||||
partition: Partition {
|
||||
id: PartitionId::new(id),
|
||||
shard_id: ShardId::new(0),
|
||||
table_id: TableId::new(0),
|
||||
partition_key: PartitionKey::from("key"),
|
||||
sort_key: vec![],
|
||||
|
|
|
@ -6,8 +6,8 @@ use arrow::{
|
|||
};
|
||||
use data_types::{
|
||||
Column, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceSchema, ParquetFile,
|
||||
ParquetFileParams, Partition, PartitionId, QueryPool, SequenceNumber, Shard, ShardIndex, Table,
|
||||
TableId, TableSchema, Timestamp, TopicMetadata,
|
||||
ParquetFileParams, Partition, PartitionId, QueryPool, SequenceNumber, Table, TableId,
|
||||
TableSchema, Timestamp, TopicMetadata,
|
||||
};
|
||||
use datafusion::physical_plan::metrics::Count;
|
||||
use datafusion_util::MemoryStream;
|
||||
|
@ -137,21 +137,6 @@ impl TestCatalog {
|
|||
Arc::clone(&self.exec)
|
||||
}
|
||||
|
||||
/// Create a shard in the catalog
|
||||
pub async fn create_shard(self: &Arc<Self>, shard_index: i32) -> Arc<Shard> {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
|
||||
let topic = repos.topics().create_or_get("topic").await.unwrap();
|
||||
let shard_index = ShardIndex::new(shard_index);
|
||||
Arc::new(
|
||||
repos
|
||||
.shards()
|
||||
.create_or_get(&topic, shard_index)
|
||||
.await
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create namespace with specified retention
|
||||
pub async fn create_namespace_with_retention(
|
||||
self: &Arc<Self>,
|
||||
|
@ -254,23 +239,6 @@ impl TestNamespace {
|
|||
})
|
||||
}
|
||||
|
||||
/// Create a shard for this namespace
|
||||
pub async fn create_shard(self: &Arc<Self>, shard_index: i32) -> Arc<TestShard> {
|
||||
let mut repos = self.catalog.catalog.repositories().await;
|
||||
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&self.topic, ShardIndex::new(shard_index))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Arc::new(TestShard {
|
||||
catalog: Arc::clone(&self.catalog),
|
||||
namespace: Arc::clone(self),
|
||||
shard,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get namespace schema for this namespace.
|
||||
pub async fn schema(&self) -> NamespaceSchema {
|
||||
let mut repos = self.catalog.catalog.repositories().await;
|
||||
|
@ -304,15 +272,6 @@ impl TestNamespace {
|
|||
}
|
||||
}
|
||||
|
||||
/// A test shard with its namespace in the catalog
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct TestShard {
|
||||
pub catalog: Arc<TestCatalog>,
|
||||
pub namespace: Arc<TestNamespace>,
|
||||
pub shard: Shard,
|
||||
}
|
||||
|
||||
/// A test table of a namespace in the catalog
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug)]
|
||||
|
@ -323,16 +282,49 @@ pub struct TestTable {
|
|||
}
|
||||
|
||||
impl TestTable {
|
||||
/// Attach a shard to the table
|
||||
pub fn with_shard(self: &Arc<Self>, shard: &Arc<TestShard>) -> Arc<TestTableBoundShard> {
|
||||
assert!(Arc::ptr_eq(&self.catalog, &shard.catalog));
|
||||
assert!(Arc::ptr_eq(&self.namespace, &shard.namespace));
|
||||
/// Creat a partition for the table
|
||||
pub async fn create_partition(self: &Arc<Self>, key: &str) -> Arc<TestPartition> {
|
||||
let mut repos = self.catalog.catalog.repositories().await;
|
||||
|
||||
Arc::new(TestTableBoundShard {
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(key.into(), self.table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Arc::new(TestPartition {
|
||||
catalog: Arc::clone(&self.catalog),
|
||||
namespace: Arc::clone(&self.namespace),
|
||||
table: Arc::clone(self),
|
||||
shard: Arc::clone(shard),
|
||||
partition,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a partition with a specified sort key for the table
|
||||
pub async fn create_partition_with_sort_key(
|
||||
self: &Arc<Self>,
|
||||
key: &str,
|
||||
sort_key: &[&str],
|
||||
) -> Arc<TestPartition> {
|
||||
let mut repos = self.catalog.catalog.repositories().await;
|
||||
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(key.into(), self.table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.cas_sort_key(partition.id, None, sort_key)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Arc::new(TestPartition {
|
||||
catalog: Arc::clone(&self.catalog),
|
||||
namespace: Arc::clone(&self.namespace),
|
||||
table: Arc::clone(self),
|
||||
partition,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -407,73 +399,13 @@ pub struct TestColumn {
|
|||
pub column: Column,
|
||||
}
|
||||
|
||||
/// A test catalog with specified namespace, shard, and table
|
||||
#[allow(missing_docs)]
|
||||
pub struct TestTableBoundShard {
|
||||
pub catalog: Arc<TestCatalog>,
|
||||
pub namespace: Arc<TestNamespace>,
|
||||
pub table: Arc<TestTable>,
|
||||
pub shard: Arc<TestShard>,
|
||||
}
|
||||
|
||||
impl TestTableBoundShard {
|
||||
/// Creat a partition for the table
|
||||
pub async fn create_partition(self: &Arc<Self>, key: &str) -> Arc<TestPartition> {
|
||||
let mut repos = self.catalog.catalog.repositories().await;
|
||||
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(key.into(), self.shard.shard.id, self.table.table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Arc::new(TestPartition {
|
||||
catalog: Arc::clone(&self.catalog),
|
||||
namespace: Arc::clone(&self.namespace),
|
||||
table: Arc::clone(&self.table),
|
||||
shard: Arc::clone(&self.shard),
|
||||
partition,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creat a partition with a specified sort key for the table
|
||||
pub async fn create_partition_with_sort_key(
|
||||
self: &Arc<Self>,
|
||||
key: &str,
|
||||
sort_key: &[&str],
|
||||
) -> Arc<TestPartition> {
|
||||
let mut repos = self.catalog.catalog.repositories().await;
|
||||
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get(key.into(), self.shard.shard.id, self.table.table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.cas_sort_key(partition.id, None, sort_key)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Arc::new(TestPartition {
|
||||
catalog: Arc::clone(&self.catalog),
|
||||
namespace: Arc::clone(&self.namespace),
|
||||
table: Arc::clone(&self.table),
|
||||
shard: Arc::clone(&self.shard),
|
||||
partition,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A test catalog with specified namespace, shard, table, partition
|
||||
/// A test catalog with specified namespace, table, partition
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug)]
|
||||
pub struct TestPartition {
|
||||
pub catalog: Arc<TestCatalog>,
|
||||
pub namespace: Arc<TestNamespace>,
|
||||
pub table: Arc<TestTable>,
|
||||
pub shard: Arc<TestShard>,
|
||||
pub partition: Partition,
|
||||
}
|
||||
|
||||
|
@ -510,7 +442,6 @@ impl TestPartition {
|
|||
catalog: Arc::clone(&self.catalog),
|
||||
namespace: Arc::clone(&self.namespace),
|
||||
table: Arc::clone(&self.table),
|
||||
shard: Arc::clone(&self.shard),
|
||||
partition,
|
||||
})
|
||||
}
|
||||
|
@ -562,7 +493,6 @@ impl TestPartition {
|
|||
creation_timestamp: now(),
|
||||
namespace_id: self.namespace.namespace.id,
|
||||
namespace_name: self.namespace.namespace.name.clone().into(),
|
||||
shard_id: self.shard.shard.id,
|
||||
table_id: self.table.table.id,
|
||||
table_name: self.table.table.name.clone().into(),
|
||||
partition_id: self.partition.id,
|
||||
|
@ -651,7 +581,6 @@ impl TestPartition {
|
|||
};
|
||||
|
||||
let parquet_file_params = ParquetFileParams {
|
||||
shard_id: self.shard.shard.id,
|
||||
namespace_id: self.namespace.namespace.id,
|
||||
table_id: self.table.table.id,
|
||||
partition_id: self.partition.id,
|
||||
|
@ -686,7 +615,6 @@ impl TestPartition {
|
|||
catalog: Arc::clone(&self.catalog),
|
||||
namespace: Arc::clone(&self.namespace),
|
||||
table: Arc::clone(&self.table),
|
||||
shard: Arc::clone(&self.shard),
|
||||
partition: Arc::clone(self),
|
||||
parquet_file,
|
||||
size_override,
|
||||
|
@ -895,7 +823,6 @@ pub struct TestParquetFile {
|
|||
pub catalog: Arc<TestCatalog>,
|
||||
pub namespace: Arc<TestNamespace>,
|
||||
pub table: Arc<TestTable>,
|
||||
pub shard: Arc<TestShard>,
|
||||
pub partition: Arc<TestPartition>,
|
||||
pub parquet_file: ParquetFile,
|
||||
pub size_override: Option<i64>,
|
||||
|
|
|
@ -14,8 +14,7 @@
|
|||
|
||||
mod catalog;
|
||||
pub use catalog::{
|
||||
TestCatalog, TestNamespace, TestParquetFile, TestParquetFileBuilder, TestPartition, TestShard,
|
||||
TestTable,
|
||||
TestCatalog, TestNamespace, TestParquetFile, TestParquetFileBuilder, TestPartition, TestTable,
|
||||
};
|
||||
|
||||
mod builders;
|
||||
|
|
|
@ -5,7 +5,7 @@ use compactor2::{
|
|||
compactor::Compactor2,
|
||||
config::{Config, PartitionsSourceConfig, ShardConfig},
|
||||
};
|
||||
use data_types::{PartitionId, TRANSITION_SHARD_NUMBER};
|
||||
use data_types::PartitionId;
|
||||
use hyper::{Body, Request, Response};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::exec::Executor;
|
||||
|
@ -28,10 +28,6 @@ use std::{
|
|||
use tokio_util::sync::CancellationToken;
|
||||
use trace::TraceCollector;
|
||||
|
||||
// There is only one shard with index 1
|
||||
const TOPIC: &str = "iox-shared";
|
||||
const TRANSITION_SHARD_INDEX: i32 = TRANSITION_SHARD_NUMBER;
|
||||
|
||||
pub struct Compactor2ServerType {
|
||||
compactor: Compactor2,
|
||||
metric_registry: Arc<Registry>,
|
||||
|
@ -174,16 +170,8 @@ pub async fn create_compactor2_server_type(
|
|||
CompactionType::Cold => compactor2::config::CompactionType::Cold,
|
||||
};
|
||||
|
||||
let shard_id = Config::fetch_shard_id(
|
||||
Arc::clone(&catalog),
|
||||
backoff_config.clone(),
|
||||
TOPIC.to_string(),
|
||||
TRANSITION_SHARD_INDEX,
|
||||
)
|
||||
.await;
|
||||
let compactor = Compactor2::start(Config {
|
||||
compaction_type,
|
||||
shard_id,
|
||||
metric_registry: Arc::clone(&metric_registry),
|
||||
catalog,
|
||||
parquet_store_real,
|
||||
|
|
|
@ -118,9 +118,6 @@ mod tests {
|
|||
async fn test_get_namespaces_empty() {
|
||||
let catalog = TestCatalog::new();
|
||||
|
||||
// QuerierDatabase::new returns an error if there are no shards in the catalog
|
||||
catalog.create_shard(0).await;
|
||||
|
||||
let catalog_cache = Arc::new(QuerierCatalogCache::new_testing(
|
||||
catalog.catalog(),
|
||||
catalog.time_provider(),
|
||||
|
@ -154,9 +151,6 @@ mod tests {
|
|||
async fn test_get_namespaces() {
|
||||
let catalog = TestCatalog::new();
|
||||
|
||||
// QuerierDatabase::new returns an error if there are no shards in the catalog
|
||||
catalog.create_shard(0).await;
|
||||
|
||||
let catalog_cache = Arc::new(QuerierCatalogCache::new_testing(
|
||||
catalog.catalog(),
|
||||
catalog.time_provider(),
|
||||
|
|
|
@ -20,9 +20,7 @@ pub mod metadata;
|
|||
pub mod serialize;
|
||||
pub mod storage;
|
||||
|
||||
use data_types::{
|
||||
NamespaceId, ParquetFile, ParquetFileParams, PartitionId, ShardId, TableId, TRANSITION_SHARD_ID,
|
||||
};
|
||||
use data_types::{NamespaceId, ParquetFile, ParquetFileParams, PartitionId, TableId};
|
||||
use object_store::path::Path;
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -32,7 +30,6 @@ use uuid::Uuid;
|
|||
pub struct ParquetFilePath {
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
shard_id: ShardId,
|
||||
partition_id: PartitionId,
|
||||
object_store_id: Uuid,
|
||||
}
|
||||
|
@ -42,14 +39,12 @@ impl ParquetFilePath {
|
|||
pub fn new(
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
shard_id: ShardId,
|
||||
partition_id: PartitionId,
|
||||
object_store_id: Uuid,
|
||||
) -> Self {
|
||||
Self {
|
||||
namespace_id,
|
||||
table_id,
|
||||
shard_id,
|
||||
partition_id,
|
||||
object_store_id,
|
||||
}
|
||||
|
@ -60,26 +55,15 @@ impl ParquetFilePath {
|
|||
let Self {
|
||||
namespace_id,
|
||||
table_id,
|
||||
shard_id,
|
||||
partition_id,
|
||||
object_store_id,
|
||||
} = self;
|
||||
if shard_id == &TRANSITION_SHARD_ID {
|
||||
Path::from_iter([
|
||||
namespace_id.to_string().as_str(),
|
||||
table_id.to_string().as_str(),
|
||||
partition_id.to_string().as_str(),
|
||||
&format!("{object_store_id}.parquet"),
|
||||
])
|
||||
} else {
|
||||
Path::from_iter([
|
||||
namespace_id.to_string().as_str(),
|
||||
table_id.to_string().as_str(),
|
||||
shard_id.to_string().as_str(),
|
||||
partition_id.to_string().as_str(),
|
||||
&format!("{object_store_id}.parquet"),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
/// Get object store ID.
|
||||
|
@ -107,7 +91,6 @@ impl From<&crate::metadata::IoxMetadata> for ParquetFilePath {
|
|||
Self {
|
||||
namespace_id: m.namespace_id,
|
||||
table_id: m.table_id,
|
||||
shard_id: m.shard_id,
|
||||
partition_id: m.partition_id,
|
||||
object_store_id: m.object_store_id,
|
||||
}
|
||||
|
@ -119,7 +102,6 @@ impl From<&ParquetFile> for ParquetFilePath {
|
|||
Self {
|
||||
namespace_id: f.namespace_id,
|
||||
table_id: f.table_id,
|
||||
shard_id: f.shard_id,
|
||||
partition_id: f.partition_id,
|
||||
object_store_id: f.object_store_id,
|
||||
}
|
||||
|
@ -131,7 +113,6 @@ impl From<&ParquetFileParams> for ParquetFilePath {
|
|||
Self {
|
||||
namespace_id: f.namespace_id,
|
||||
table_id: f.table_id,
|
||||
shard_id: f.shard_id,
|
||||
partition_id: f.partition_id,
|
||||
object_store_id: f.object_store_id,
|
||||
}
|
||||
|
@ -147,23 +128,6 @@ mod tests {
|
|||
let pfp = ParquetFilePath::new(
|
||||
NamespaceId::new(1),
|
||||
TableId::new(2),
|
||||
ShardId::new(3),
|
||||
PartitionId::new(4),
|
||||
Uuid::nil(),
|
||||
);
|
||||
let path = pfp.object_store_path();
|
||||
assert_eq!(
|
||||
path.to_string(),
|
||||
"1/2/3/4/00000000-0000-0000-0000-000000000000.parquet".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parquet_file_without_shard_id() {
|
||||
let pfp = ParquetFilePath::new(
|
||||
NamespaceId::new(1),
|
||||
TableId::new(2),
|
||||
TRANSITION_SHARD_ID,
|
||||
PartitionId::new(4),
|
||||
Uuid::nil(),
|
||||
);
|
||||
|
|
|
@ -90,8 +90,8 @@ use base64::{prelude::BASE64_STANDARD, Engine};
|
|||
use bytes::Bytes;
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, ColumnSummary, CompactionLevel, InfluxDbType, NamespaceId,
|
||||
ParquetFileParams, PartitionId, PartitionKey, SequenceNumber, ShardId, StatValues, Statistics,
|
||||
TableId, Timestamp,
|
||||
ParquetFileParams, PartitionId, PartitionKey, SequenceNumber, StatValues, Statistics, TableId,
|
||||
Timestamp,
|
||||
};
|
||||
use generated_types::influxdata::iox::ingester::v1 as proto;
|
||||
use iox_time::Time;
|
||||
|
@ -262,9 +262,6 @@ pub struct IoxMetadata {
|
|||
/// namespace name of the data
|
||||
pub namespace_name: Arc<str>,
|
||||
|
||||
/// shard id of the data
|
||||
pub shard_id: ShardId,
|
||||
|
||||
/// table id of the data
|
||||
pub table_id: TableId,
|
||||
|
||||
|
@ -339,7 +336,6 @@ impl IoxMetadata {
|
|||
creation_timestamp: Some(self.creation_timestamp.date_time().into()),
|
||||
namespace_id: self.namespace_id.get(),
|
||||
namespace_name: self.namespace_name.to_string(),
|
||||
shard_id: self.shard_id.get(),
|
||||
table_id: self.table_id.get(),
|
||||
table_name: self.table_name.to_string(),
|
||||
partition_id: self.partition_id.get(),
|
||||
|
@ -392,7 +388,6 @@ impl IoxMetadata {
|
|||
creation_timestamp,
|
||||
namespace_id: NamespaceId::new(proto_msg.namespace_id),
|
||||
namespace_name,
|
||||
shard_id: ShardId::new(proto_msg.shard_id),
|
||||
table_id: TableId::new(proto_msg.table_id),
|
||||
table_name,
|
||||
partition_id: PartitionId::new(proto_msg.partition_id),
|
||||
|
@ -418,7 +413,6 @@ impl IoxMetadata {
|
|||
creation_timestamp: Time::from_timestamp_nanos(creation_timestamp_ns),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "external".into(),
|
||||
shard_id: ShardId::new(1),
|
||||
table_id: TableId::new(1),
|
||||
table_name: table_name.into(),
|
||||
partition_id: PartitionId::new(1),
|
||||
|
@ -501,7 +495,6 @@ impl IoxMetadata {
|
|||
};
|
||||
|
||||
ParquetFileParams {
|
||||
shard_id: self.shard_id,
|
||||
namespace_id: self.namespace_id,
|
||||
table_id: self.table_id,
|
||||
partition_id: self.partition_id,
|
||||
|
@ -1020,7 +1013,6 @@ mod tests {
|
|||
creation_timestamp: create_time,
|
||||
namespace_id: NamespaceId::new(2),
|
||||
namespace_name: Arc::from("hi"),
|
||||
shard_id: ShardId::new(1),
|
||||
table_id: TableId::new(3),
|
||||
table_name: Arc::from("weather"),
|
||||
partition_id: PartitionId::new(4),
|
||||
|
@ -1045,7 +1037,6 @@ mod tests {
|
|||
creation_timestamp: Time::from_timestamp_nanos(42),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "bananas".into(),
|
||||
shard_id: ShardId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
table_name: "platanos".into(),
|
||||
partition_id: PartitionId::new(4),
|
||||
|
|
|
@ -197,7 +197,7 @@ mod tests {
|
|||
record_batch::RecordBatch,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, ShardId, TableId};
|
||||
use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId};
|
||||
use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||
use datafusion_util::MemoryStream;
|
||||
use iox_time::Time;
|
||||
|
@ -210,7 +210,6 @@ mod tests {
|
|||
creation_timestamp: Time::from_timestamp_nanos(42),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "bananas".into(),
|
||||
shard_id: ShardId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
table_name: "platanos".into(),
|
||||
partition_id: PartitionId::new(4),
|
||||
|
|
|
@ -323,7 +323,7 @@ mod tests {
|
|||
array::{ArrayRef, Int64Array, StringArray},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, ShardId, TableId};
|
||||
use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId};
|
||||
use datafusion::common::DataFusionError;
|
||||
use datafusion_util::MemoryStream;
|
||||
use iox_time::Time;
|
||||
|
@ -575,7 +575,6 @@ mod tests {
|
|||
creation_timestamp: Time::from_timestamp_nanos(42),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "bananas".into(),
|
||||
shard_id: ShardId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
table_name: "platanos".into(),
|
||||
partition_id: PartitionId::new(4),
|
||||
|
|
|
@ -5,8 +5,7 @@ use arrow::{
|
|||
record_batch::RecordBatch,
|
||||
};
|
||||
use data_types::{
|
||||
ColumnId, CompactionLevel, NamespaceId, PartitionId, SequenceNumber, ShardId, TableId,
|
||||
Timestamp,
|
||||
ColumnId, CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId, Timestamp,
|
||||
};
|
||||
use datafusion_util::MemoryStream;
|
||||
use iox_time::Time;
|
||||
|
@ -54,7 +53,6 @@ async fn test_decoded_iox_metadata() {
|
|||
creation_timestamp: Time::from_timestamp_nanos(42),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "bananas".into(),
|
||||
shard_id: ShardId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
table_name: "platanos".into(),
|
||||
partition_id: PartitionId::new(4),
|
||||
|
@ -196,7 +194,6 @@ async fn test_empty_parquet_file_panic() {
|
|||
creation_timestamp: Time::from_timestamp_nanos(42),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "bananas".into(),
|
||||
shard_id: ShardId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
table_name: "platanos".into(),
|
||||
partition_id: PartitionId::new(4),
|
||||
|
@ -291,7 +288,6 @@ async fn test_decoded_many_columns_with_null_cols_iox_metadata() {
|
|||
creation_timestamp: Time::from_timestamp_nanos(42),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "bananas".into(),
|
||||
shard_id: ShardId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
table_name: "platanos".into(),
|
||||
partition_id: PartitionId::new(4),
|
||||
|
@ -380,7 +376,6 @@ async fn test_derive_parquet_file_params() {
|
|||
creation_timestamp: Time::from_timestamp_nanos(1234),
|
||||
namespace_id: NamespaceId::new(1),
|
||||
namespace_name: "bananas".into(),
|
||||
shard_id: ShardId::new(2),
|
||||
table_id: TableId::new(3),
|
||||
table_name: "platanos".into(),
|
||||
partition_id,
|
||||
|
@ -425,7 +420,6 @@ async fn test_derive_parquet_file_params() {
|
|||
//
|
||||
// NOTE: thrift-encoded metadata not checked
|
||||
// TODO: check thrift-encoded metadata which may be the issue of bug 4695
|
||||
assert_eq!(catalog_data.shard_id, meta.shard_id);
|
||||
assert_eq!(catalog_data.namespace_id, meta.namespace_id);
|
||||
assert_eq!(catalog_data.table_id, meta.table_id);
|
||||
assert_eq!(catalog_data.partition_id, meta.partition_id);
|
||||
|
|
|
@ -347,8 +347,8 @@ mod tests {
|
|||
partition.create_parquet_file(builder).await;
|
||||
let table_id = table.table.id;
|
||||
|
||||
let single_file_size = 232;
|
||||
let two_file_size = 424;
|
||||
let single_file_size = 224;
|
||||
let two_file_size = 408;
|
||||
assert!(single_file_size < two_file_size);
|
||||
|
||||
let cache = make_cache(&catalog);
|
||||
|
@ -444,9 +444,8 @@ mod tests {
|
|||
let table = ns.create_table(table_name).await;
|
||||
table.create_column("foo", ColumnType::F64).await;
|
||||
table.create_column("time", ColumnType::Time).await;
|
||||
let shard1 = ns.create_shard(1).await;
|
||||
|
||||
let partition = table.with_shard(&shard1).create_partition("k").await;
|
||||
let partition = table.create_partition("k").await;
|
||||
|
||||
(table, partition)
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ use cache_system::{
|
|||
loader::{metrics::MetricsLoader, FunctionLoader},
|
||||
resource_consumption::FunctionEstimator,
|
||||
};
|
||||
use data_types::{ColumnId, PartitionId, ShardId};
|
||||
use data_types::{ColumnId, PartitionId};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_time::TimeProvider;
|
||||
use schema::sort::SortKey;
|
||||
|
@ -74,10 +74,7 @@ impl PartitionCache {
|
|||
Arc::new(PartitionSortKey::new(sort_key, &extra.column_id_map_rev))
|
||||
});
|
||||
|
||||
Some(CachedPartition {
|
||||
shard_id: partition.shard_id,
|
||||
sort_key,
|
||||
})
|
||||
Some(CachedPartition { sort_key })
|
||||
}
|
||||
});
|
||||
let loader = Arc::new(MetricsLoader::new(
|
||||
|
@ -118,19 +115,6 @@ impl PartitionCache {
|
|||
}
|
||||
}
|
||||
|
||||
/// Get shard ID.
|
||||
pub async fn shard_id(
|
||||
&self,
|
||||
cached_table: Arc<CachedTable>,
|
||||
partition_id: PartitionId,
|
||||
span: Option<Span>,
|
||||
) -> Option<ShardId> {
|
||||
self.cache
|
||||
.get(partition_id, (cached_table, span))
|
||||
.await
|
||||
.map(|p| p.shard_id)
|
||||
}
|
||||
|
||||
/// Get sort key
|
||||
///
|
||||
/// Expire partition if the cached sort key does NOT cover the given set of columns.
|
||||
|
@ -164,7 +148,6 @@ impl PartitionCache {
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CachedPartition {
|
||||
shard_id: ShardId,
|
||||
sort_key: Option<Arc<PartitionSortKey>>,
|
||||
}
|
||||
|
||||
|
@ -227,74 +210,6 @@ mod tests {
|
|||
use iox_tests::TestCatalog;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shard_id() {
|
||||
let catalog = TestCatalog::new();
|
||||
|
||||
let ns = catalog.create_namespace_1hr_retention("ns").await;
|
||||
let t = ns.create_table("table").await;
|
||||
let s1 = ns.create_shard(1).await;
|
||||
let s2 = ns.create_shard(2).await;
|
||||
let p1 = t
|
||||
.with_shard(&s1)
|
||||
.create_partition("k1")
|
||||
.await
|
||||
.partition
|
||||
.clone();
|
||||
let p2 = t
|
||||
.with_shard(&s2)
|
||||
.create_partition("k2")
|
||||
.await
|
||||
.partition
|
||||
.clone();
|
||||
let cached_table = Arc::new(CachedTable {
|
||||
id: t.table.id,
|
||||
schema: schema(),
|
||||
column_id_map: HashMap::default(),
|
||||
column_id_map_rev: HashMap::default(),
|
||||
primary_key_column_ids: vec![],
|
||||
});
|
||||
|
||||
let cache = PartitionCache::new(
|
||||
catalog.catalog(),
|
||||
BackoffConfig::default(),
|
||||
catalog.time_provider(),
|
||||
&catalog.metric_registry(),
|
||||
test_ram_pool(),
|
||||
true,
|
||||
);
|
||||
|
||||
let id1 = cache
|
||||
.shard_id(Arc::clone(&cached_table), p1.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(id1, s1.shard.id);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
|
||||
|
||||
let id2 = cache
|
||||
.shard_id(Arc::clone(&cached_table), p2.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(id2, s2.shard.id);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
|
||||
|
||||
let id1 = cache
|
||||
.shard_id(Arc::clone(&cached_table), p1.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(id1, s1.shard.id);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
|
||||
|
||||
// non-existing partition
|
||||
for _ in 0..2 {
|
||||
let res = cache
|
||||
.shard_id(Arc::clone(&cached_table), PartitionId::new(i64::MAX), None)
|
||||
.await;
|
||||
assert_eq!(res, None);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sort_key() {
|
||||
let catalog = TestCatalog::new();
|
||||
|
@ -303,16 +218,12 @@ mod tests {
|
|||
let t = ns.create_table("table").await;
|
||||
let c1 = t.create_column("tag", ColumnType::Tag).await;
|
||||
let c2 = t.create_column("time", ColumnType::Time).await;
|
||||
let s1 = ns.create_shard(1).await;
|
||||
let s2 = ns.create_shard(2).await;
|
||||
let p1 = t
|
||||
.with_shard(&s1)
|
||||
.create_partition_with_sort_key("k1", &["tag", "time"])
|
||||
.await
|
||||
.partition
|
||||
.clone();
|
||||
let p2 = t
|
||||
.with_shard(&s2)
|
||||
.create_partition("k2") // no sort key
|
||||
.await
|
||||
.partition
|
||||
|
@ -391,26 +302,13 @@ mod tests {
|
|||
let t = ns.create_table("table").await;
|
||||
let c1 = t.create_column("tag", ColumnType::Tag).await;
|
||||
let c2 = t.create_column("time", ColumnType::Time).await;
|
||||
let s1 = ns.create_shard(1).await;
|
||||
let s2 = ns.create_shard(2).await;
|
||||
let p1 = t
|
||||
.with_shard(&s1)
|
||||
.create_partition_with_sort_key("k1", &["tag", "time"])
|
||||
.await
|
||||
.partition
|
||||
.clone();
|
||||
let p2 = t
|
||||
.with_shard(&s2)
|
||||
.create_partition("k2")
|
||||
.await
|
||||
.partition
|
||||
.clone();
|
||||
let p3 = t
|
||||
.with_shard(&s2)
|
||||
.create_partition("k3")
|
||||
.await
|
||||
.partition
|
||||
.clone();
|
||||
let p2 = t.create_partition("k2").await.partition.clone();
|
||||
let p3 = t.create_partition("k3").await.partition.clone();
|
||||
let cached_table = Arc::new(CachedTable {
|
||||
id: t.table.id,
|
||||
schema: schema(),
|
||||
|
@ -434,22 +332,19 @@ mod tests {
|
|||
true,
|
||||
);
|
||||
|
||||
cache.shard_id(Arc::clone(&cached_table), p2.id, None).await;
|
||||
cache
|
||||
.sort_key(Arc::clone(&cached_table), p3.id, &Vec::new(), None)
|
||||
.await;
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
|
||||
|
||||
cache.shard_id(Arc::clone(&cached_table), p1.id, None).await;
|
||||
cache
|
||||
.sort_key(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
|
||||
.await;
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
|
||||
|
||||
cache
|
||||
.sort_key(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
|
||||
.await;
|
||||
cache.shard_id(Arc::clone(&cached_table), p2.id, None).await;
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
|
||||
}
|
||||
|
||||
|
@ -461,8 +356,7 @@ mod tests {
|
|||
let t = ns.create_table("table").await;
|
||||
let c1 = t.create_column("foo", ColumnType::Tag).await;
|
||||
let c2 = t.create_column("time", ColumnType::Time).await;
|
||||
let s = ns.create_shard(1).await;
|
||||
let p = t.with_shard(&s).create_partition("k1").await;
|
||||
let p = t.create_partition("k1").await;
|
||||
let p_id = p.partition.id;
|
||||
let p_sort_key = p.partition.sort_key();
|
||||
let cached_table = Arc::new(CachedTable {
|
||||
|
|
|
@ -234,8 +234,6 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_namespace() {
|
||||
let catalog = TestCatalog::new();
|
||||
// QuerierDatabase::new returns an error if there are no shards in the catalog
|
||||
catalog.create_shard(0).await;
|
||||
|
||||
let catalog_cache = Arc::new(CatalogCache::new_testing(
|
||||
catalog.catalog(),
|
||||
|
@ -264,8 +262,6 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_namespaces() {
|
||||
let catalog = TestCatalog::new();
|
||||
// QuerierDatabase::new returns an error if there are no shards in the catalog
|
||||
catalog.create_shard(0).await;
|
||||
|
||||
let catalog_cache = Arc::new(CatalogCache::new_testing(
|
||||
catalog.catalog(),
|
||||
|
|
|
@ -160,7 +160,6 @@ impl Drop for QuerierHandlerImpl {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::{cache::CatalogCache, create_ingester_connection_for_testing};
|
||||
use data_types::ShardIndex;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
use iox_query::exec::Executor;
|
||||
use iox_time::{MockProvider, Time};
|
||||
|
@ -204,18 +203,6 @@ mod tests {
|
|||
Arc::clone(&object_store),
|
||||
&Handle::current(),
|
||||
));
|
||||
// QuerierDatabase::new returns an error if there are no shards in the catalog
|
||||
{
|
||||
let mut repos = catalog.repositories().await;
|
||||
|
||||
let topic = repos.topics().create_or_get("topic").await.unwrap();
|
||||
let shard_index = ShardIndex::new(0);
|
||||
repos
|
||||
.shards()
|
||||
.create_or_get(&topic, shard_index)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let database = Arc::new(
|
||||
QuerierDatabase::new(
|
||||
|
|
|
@ -13,8 +13,8 @@ use async_trait::async_trait;
|
|||
use backoff::{Backoff, BackoffConfig, BackoffError};
|
||||
use client_util::connection;
|
||||
use data_types::{
|
||||
ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, SequenceNumber, ShardId,
|
||||
ShardIndex, TableSummary, TimestampMinMax,
|
||||
ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, SequenceNumber, TableSummary,
|
||||
TimestampMinMax,
|
||||
};
|
||||
use datafusion::error::DataFusionError;
|
||||
use futures::{stream::FuturesUnordered, TryStreamExt};
|
||||
|
@ -127,16 +127,6 @@ pub enum Error {
|
|||
ingester_address: String,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"No ingester found in shard to ingester mapping for shard index {shard_index}"
|
||||
))]
|
||||
NoIngesterFoundForShard { shard_index: ShardIndex },
|
||||
|
||||
#[snafu(display(
|
||||
"Shard index {shard_index} was neither mapped to an ingester nor marked ignore"
|
||||
))]
|
||||
ShardNotMapped { shard_index: ShardIndex },
|
||||
|
||||
#[snafu(display("Could not parse `{ingester_uuid}` as a UUID: {source}"))]
|
||||
IngesterUuid {
|
||||
ingester_uuid: String,
|
||||
|
@ -498,46 +488,13 @@ async fn execute(
|
|||
decoder.finalize().await
|
||||
}
|
||||
|
||||
/// Current partition used while decoding the ingester response stream.
|
||||
#[derive(Debug)]
|
||||
enum CurrentPartition {
|
||||
/// There exists a partition.
|
||||
Some(IngesterPartition),
|
||||
|
||||
/// There is no existing partition.
|
||||
None,
|
||||
|
||||
/// Skip the current partition (e.g. because it is gone from the catalog).
|
||||
Skip,
|
||||
}
|
||||
|
||||
impl CurrentPartition {
|
||||
fn take(&mut self) -> Option<IngesterPartition> {
|
||||
let mut tmp = Self::None;
|
||||
std::mem::swap(&mut tmp, self);
|
||||
|
||||
match tmp {
|
||||
Self::None | Self::Skip => None,
|
||||
Self::Some(p) => Some(p),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_skip(&self) -> bool {
|
||||
matches!(self, Self::Skip)
|
||||
}
|
||||
|
||||
fn is_some(&self) -> bool {
|
||||
matches!(self, Self::Some(_))
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper to disassemble the data from the ingester Apache Flight arrow stream.
|
||||
///
|
||||
/// This should be used AFTER the stream was drained because we will perform some catalog IO and
|
||||
/// this should likely not block the ingester.
|
||||
struct IngesterStreamDecoder {
|
||||
finished_partitions: HashMap<PartitionId, IngesterPartition>,
|
||||
current_partition: CurrentPartition,
|
||||
current_partition: Option<IngesterPartition>,
|
||||
current_chunk: Option<(Schema, Vec<RecordBatch>)>,
|
||||
ingester_address: Arc<str>,
|
||||
catalog_cache: Arc<CatalogCache>,
|
||||
|
@ -555,7 +512,7 @@ impl IngesterStreamDecoder {
|
|||
) -> Self {
|
||||
Self {
|
||||
finished_partitions: HashMap::new(),
|
||||
current_partition: CurrentPartition::None,
|
||||
current_partition: None,
|
||||
current_chunk: None,
|
||||
ingester_address,
|
||||
catalog_cache,
|
||||
|
@ -571,11 +528,8 @@ impl IngesterStreamDecoder {
|
|||
.current_partition
|
||||
.take()
|
||||
.expect("Partition should have been checked before chunk creation");
|
||||
self.current_partition = CurrentPartition::Some(current_partition.try_add_chunk(
|
||||
ChunkId::new(),
|
||||
schema,
|
||||
batches,
|
||||
)?);
|
||||
self.current_partition =
|
||||
Some(current_partition.try_add_chunk(ChunkId::new(), schema, batches)?);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -648,21 +602,6 @@ impl IngesterStreamDecoder {
|
|||
ingester_address: self.ingester_address.as_ref()
|
||||
},
|
||||
);
|
||||
let shard_id = self
|
||||
.catalog_cache
|
||||
.partition()
|
||||
.shard_id(
|
||||
Arc::clone(&self.cached_table),
|
||||
partition_id,
|
||||
self.span_recorder
|
||||
.child_span("cache GET partition shard ID"),
|
||||
)
|
||||
.await;
|
||||
|
||||
let Some(shard_id) = shard_id else {
|
||||
self.current_partition = CurrentPartition::Skip;
|
||||
return Ok(())
|
||||
};
|
||||
|
||||
// Use a temporary empty partition sort key. We are going to fetch this AFTER we
|
||||
// know all chunks because then we are able to detect all relevant primary key
|
||||
|
@ -683,18 +622,13 @@ impl IngesterStreamDecoder {
|
|||
let partition = IngesterPartition::new(
|
||||
ingester_uuid,
|
||||
partition_id,
|
||||
shard_id,
|
||||
md.completed_persistence_count,
|
||||
status.parquet_max_sequence_number.map(SequenceNumber::new),
|
||||
partition_sort_key,
|
||||
);
|
||||
self.current_partition = CurrentPartition::Some(partition);
|
||||
self.current_partition = Some(partition);
|
||||
}
|
||||
DecodedPayload::Schema(schema) => {
|
||||
if self.current_partition.is_skip() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.flush_chunk()?;
|
||||
ensure!(
|
||||
self.current_partition.is_some(),
|
||||
|
@ -716,10 +650,6 @@ impl IngesterStreamDecoder {
|
|||
self.current_chunk = Some((schema, vec![]));
|
||||
}
|
||||
DecodedPayload::RecordBatch(batch) => {
|
||||
if self.current_partition.is_skip() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let current_chunk =
|
||||
self.current_chunk
|
||||
.as_mut()
|
||||
|
@ -771,7 +701,7 @@ fn encode_predicate_as_base64(predicate: &Predicate) -> String {
|
|||
|
||||
#[async_trait]
|
||||
impl IngesterConnection for IngesterConnectionImpl {
|
||||
/// Retrieve chunks from the ingester for the particular table, shard, and predicate
|
||||
/// Retrieve chunks from the ingester for the particular table and predicate
|
||||
async fn partitions(
|
||||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
|
@ -871,12 +801,11 @@ impl IngesterConnection for IngesterConnectionImpl {
|
|||
/// Given the catalog hierarchy:
|
||||
///
|
||||
/// ```text
|
||||
/// (Catalog) Shard -> (Catalog) Table --> (Catalog) Partition
|
||||
/// (Catalog) Table --> (Catalog) Partition
|
||||
/// ```
|
||||
///
|
||||
/// An IngesterPartition contains the unpersisted data for a catalog
|
||||
/// partition from a shard. Thus, there can be more than one
|
||||
/// IngesterPartition for each table the ingester knows about.
|
||||
/// An IngesterPartition contains the unpersisted data for a catalog partition. Thus, there can be
|
||||
/// more than one IngesterPartition for each table the ingester knows about.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IngesterPartition {
|
||||
/// If using ingester2/rpc write path, the ingester UUID will be present and will identify
|
||||
|
@ -887,7 +816,6 @@ pub struct IngesterPartition {
|
|||
ingester_uuid: Option<Uuid>,
|
||||
|
||||
partition_id: PartitionId,
|
||||
shard_id: ShardId,
|
||||
|
||||
/// If using ingester2/rpc write path, this will be the number of Parquet files this ingester
|
||||
/// UUID has persisted for this partition.
|
||||
|
@ -910,7 +838,6 @@ impl IngesterPartition {
|
|||
pub fn new(
|
||||
ingester_uuid: Option<Uuid>,
|
||||
partition_id: PartitionId,
|
||||
shard_id: ShardId,
|
||||
completed_persistence_count: u64,
|
||||
parquet_max_sequence_number: Option<SequenceNumber>,
|
||||
partition_sort_key: Option<Arc<SortKey>>,
|
||||
|
@ -918,7 +845,6 @@ impl IngesterPartition {
|
|||
Self {
|
||||
ingester_uuid,
|
||||
partition_id,
|
||||
shard_id,
|
||||
completed_persistence_count,
|
||||
parquet_max_sequence_number,
|
||||
partition_sort_key,
|
||||
|
@ -996,10 +922,6 @@ impl IngesterPartition {
|
|||
self.partition_id
|
||||
}
|
||||
|
||||
pub(crate) fn shard_id(&self) -> ShardId {
|
||||
self.shard_id
|
||||
}
|
||||
|
||||
pub(crate) fn completed_persistence_count(&self) -> u64 {
|
||||
self.completed_persistence_count
|
||||
}
|
||||
|
@ -1322,64 +1244,6 @@ mod tests {
|
|||
assert!(partitions.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_flight_unknown_partitions() {
|
||||
let ingester_uuid = Uuid::new_v4();
|
||||
let record_batch = lp_to_record_batch("table foo=1 1");
|
||||
|
||||
let schema = record_batch.schema();
|
||||
|
||||
let mock_flight_client = Arc::new(
|
||||
MockFlightClient::new([(
|
||||
"addr1",
|
||||
Ok(MockQueryData {
|
||||
results: vec![
|
||||
metadata(
|
||||
1000,
|
||||
Some(PartitionStatus {
|
||||
parquet_max_sequence_number: Some(11),
|
||||
}),
|
||||
ingester_uuid.to_string(),
|
||||
3,
|
||||
),
|
||||
metadata(
|
||||
1001,
|
||||
Some(PartitionStatus {
|
||||
parquet_max_sequence_number: Some(11),
|
||||
}),
|
||||
ingester_uuid.to_string(),
|
||||
4,
|
||||
),
|
||||
Ok((
|
||||
DecodedPayload::Schema(Arc::clone(&schema)),
|
||||
IngesterQueryResponseMetadata::default(),
|
||||
)),
|
||||
metadata(
|
||||
1002,
|
||||
Some(PartitionStatus {
|
||||
parquet_max_sequence_number: Some(11),
|
||||
}),
|
||||
ingester_uuid.to_string(),
|
||||
5,
|
||||
),
|
||||
Ok((
|
||||
DecodedPayload::Schema(Arc::clone(&schema)),
|
||||
IngesterQueryResponseMetadata::default(),
|
||||
)),
|
||||
Ok((
|
||||
DecodedPayload::RecordBatch(record_batch),
|
||||
IngesterQueryResponseMetadata::default(),
|
||||
)),
|
||||
],
|
||||
}),
|
||||
)])
|
||||
.await,
|
||||
);
|
||||
let ingester_conn = mock_flight_client.ingester_conn().await;
|
||||
let partitions = get_partitions(&ingester_conn).await.unwrap();
|
||||
assert!(partitions.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_flight_no_batches() {
|
||||
let ingester_uuid = Uuid::new_v4();
|
||||
|
@ -1515,7 +1379,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_flight_many_batches_no_shard() {
|
||||
async fn test_flight_many_batches() {
|
||||
let ingester_uuid1 = Uuid::new_v4();
|
||||
let ingester_uuid2 = Uuid::new_v4();
|
||||
|
||||
|
@ -1958,12 +1822,9 @@ mod tests {
|
|||
let ns = catalog.create_namespace_1hr_retention("namespace").await;
|
||||
let table = ns.create_table("table").await;
|
||||
|
||||
let s0 = ns.create_shard(0).await;
|
||||
let s1 = ns.create_shard(1).await;
|
||||
|
||||
table.with_shard(&s0).create_partition("k1").await;
|
||||
table.with_shard(&s0).create_partition("k2").await;
|
||||
table.with_shard(&s1).create_partition("k3").await;
|
||||
table.create_partition("k1").await;
|
||||
table.create_partition("k2").await;
|
||||
table.create_partition("k3").await;
|
||||
|
||||
Self {
|
||||
catalog,
|
||||
|
@ -2038,7 +1899,6 @@ mod tests {
|
|||
let ingester_partition = IngesterPartition::new(
|
||||
Some(ingester_uuid),
|
||||
PartitionId::new(1),
|
||||
ShardId::new(1),
|
||||
0,
|
||||
parquet_max_sequence_number,
|
||||
None,
|
||||
|
@ -2068,7 +1928,6 @@ mod tests {
|
|||
let err = IngesterPartition::new(
|
||||
Some(ingester_uuid),
|
||||
PartitionId::new(1),
|
||||
ShardId::new(1),
|
||||
0,
|
||||
parquet_max_sequence_number,
|
||||
None,
|
||||
|
|
|
@ -224,9 +224,6 @@ mod tests {
|
|||
// namespace with infinite retention policy
|
||||
let ns = catalog.create_namespace_with_retention("ns", None).await;
|
||||
|
||||
let shard1 = ns.create_shard(1).await;
|
||||
let shard2 = ns.create_shard(2).await;
|
||||
|
||||
let table_cpu = ns.create_table("cpu").await;
|
||||
let table_mem = ns.create_table("mem").await;
|
||||
|
||||
|
@ -238,11 +235,11 @@ mod tests {
|
|||
table_mem.create_column("time", ColumnType::Time).await;
|
||||
table_mem.create_column("perc", ColumnType::F64).await;
|
||||
|
||||
let partition_cpu_a_1 = table_cpu.with_shard(&shard1).create_partition("a").await;
|
||||
let partition_cpu_a_2 = table_cpu.with_shard(&shard2).create_partition("a").await;
|
||||
let partition_cpu_b_1 = table_cpu.with_shard(&shard1).create_partition("b").await;
|
||||
let partition_mem_c_1 = table_mem.with_shard(&shard1).create_partition("c").await;
|
||||
let partition_mem_c_2 = table_mem.with_shard(&shard2).create_partition("c").await;
|
||||
let partition_cpu_a_1 = table_cpu.create_partition("a").await;
|
||||
let partition_cpu_a_2 = table_cpu.create_partition("a").await;
|
||||
let partition_cpu_b_1 = table_cpu.create_partition("b").await;
|
||||
let partition_mem_c_1 = table_mem.create_partition("c").await;
|
||||
let partition_mem_c_2 = table_mem.create_partition("c").await;
|
||||
|
||||
let builder = TestParquetFileBuilder::default()
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(1))
|
||||
|
@ -322,8 +319,6 @@ mod tests {
|
|||
.flag_for_delete()
|
||||
.await;
|
||||
|
||||
table_mem.with_shard(&shard1);
|
||||
|
||||
let querier_namespace = Arc::new(querier_namespace(&ns).await);
|
||||
|
||||
let traces = Arc::new(RingBufferTraceCollector::new(100));
|
||||
|
|
|
@ -233,7 +233,6 @@ pub mod tests {
|
|||
]
|
||||
.join("\n");
|
||||
let ns = catalog.create_namespace_1hr_retention("ns").await;
|
||||
let shard = ns.create_shard(1).await;
|
||||
let table = ns.create_table("table").await;
|
||||
table.create_column("tag1", ColumnType::Tag).await;
|
||||
table.create_column("tag2", ColumnType::Tag).await;
|
||||
|
@ -243,7 +242,6 @@ pub mod tests {
|
|||
table.create_column("field_float", ColumnType::F64).await;
|
||||
table.create_column("time", ColumnType::Time).await;
|
||||
let partition = table
|
||||
.with_shard(&shard)
|
||||
.create_partition("part")
|
||||
.await
|
||||
.update_sort_key(SortKey::from_columns(["tag1", "tag2", "tag4", "time"]))
|
||||
|
|
|
@ -491,14 +491,13 @@ mod tests {
|
|||
let outside_retention =
|
||||
inside_retention - Duration::from_secs(2 * 60 * 60).as_nanos() as i64; // 2 hours ago
|
||||
|
||||
let shard = ns.create_shard(1).await;
|
||||
let table = ns.create_table("cpu").await;
|
||||
|
||||
table.create_column("host", ColumnType::Tag).await;
|
||||
table.create_column("time", ColumnType::Time).await;
|
||||
table.create_column("load", ColumnType::F64).await;
|
||||
|
||||
let partition = table.with_shard(&shard).create_partition("a").await;
|
||||
let partition = table.create_partition("a").await;
|
||||
|
||||
let querier_table = TestQuerierTable::new(&catalog, &table).await;
|
||||
|
||||
|
@ -577,12 +576,9 @@ mod tests {
|
|||
let table1 = ns.create_table("table1").await;
|
||||
let table2 = ns.create_table("table2").await;
|
||||
|
||||
let shard1 = ns.create_shard(1).await;
|
||||
let shard2 = ns.create_shard(2).await;
|
||||
|
||||
let partition11 = table1.with_shard(&shard1).create_partition("k").await;
|
||||
let partition12 = table1.with_shard(&shard2).create_partition("k").await;
|
||||
let partition21 = table2.with_shard(&shard1).create_partition("k").await;
|
||||
let partition11 = table1.create_partition("k").await;
|
||||
let partition12 = table1.create_partition("k").await;
|
||||
let partition21 = table2.create_partition("k").await;
|
||||
|
||||
table1.create_column("time", ColumnType::Time).await;
|
||||
table1.create_column("foo", ColumnType::F64).await;
|
||||
|
@ -704,12 +700,11 @@ mod tests {
|
|||
let catalog = TestCatalog::new();
|
||||
let ns = catalog.create_namespace_1hr_retention("ns").await;
|
||||
let table = ns.create_table("table").await;
|
||||
let shard = ns.create_shard(1).await;
|
||||
let partition = table.with_shard(&shard).create_partition("k").await;
|
||||
let partition = table.create_partition("k").await;
|
||||
let schema = make_schema_two_fields_two_tags(&table).await;
|
||||
|
||||
// let add a partion from the ingester
|
||||
let builder = IngesterPartitionBuilder::new(schema, &shard, &partition)
|
||||
let builder = IngesterPartitionBuilder::new(schema, &partition)
|
||||
.with_lp(["table,tag1=val1,tag2=val2 foo=3,bar=4 11"]);
|
||||
|
||||
let ingester_partition =
|
||||
|
@ -773,12 +768,10 @@ mod tests {
|
|||
let catalog = TestCatalog::new();
|
||||
let ns = catalog.create_namespace_1hr_retention("ns").await;
|
||||
let table = ns.create_table("table1").await;
|
||||
let shard = ns.create_shard(1).await;
|
||||
let partition = table.with_shard(&shard).create_partition("k").await;
|
||||
let partition = table.create_partition("k").await;
|
||||
let schema = make_schema(&table).await;
|
||||
|
||||
let builder =
|
||||
IngesterPartitionBuilder::new(schema, &shard, &partition).with_lp(["table foo=1 1"]);
|
||||
let builder = IngesterPartitionBuilder::new(schema, &partition).with_lp(["table foo=1 1"]);
|
||||
|
||||
// Parquet file between with max sequence number 2
|
||||
let pf_builder = TestParquetFileBuilder::default()
|
||||
|
|
|
@ -220,12 +220,11 @@ mod tests {
|
|||
interface::{IngesterPartitionInfo, ParquetFileInfo},
|
||||
*,
|
||||
};
|
||||
use data_types::{CompactionLevel, SequenceNumber, ShardId};
|
||||
use data_types::{CompactionLevel, SequenceNumber};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MockIngesterPartitionInfo {
|
||||
partition_id: PartitionId,
|
||||
shard_id: ShardId,
|
||||
parquet_max_sequence_number: Option<SequenceNumber>,
|
||||
}
|
||||
|
||||
|
@ -234,10 +233,6 @@ mod tests {
|
|||
self.partition_id
|
||||
}
|
||||
|
||||
fn shard_id(&self) -> ShardId {
|
||||
self.shard_id
|
||||
}
|
||||
|
||||
fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
|
||||
self.parquet_max_sequence_number
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//! Interface for reconciling Ingester and catalog state
|
||||
|
||||
use crate::{ingester::IngesterPartition, parquet::QuerierParquetChunk};
|
||||
use data_types::{CompactionLevel, ParquetFile, PartitionId, SequenceNumber, ShardId};
|
||||
use data_types::{CompactionLevel, ParquetFile, PartitionId, SequenceNumber};
|
||||
use std::{ops::Deref, sync::Arc};
|
||||
|
||||
/// Information about an ingester partition.
|
||||
|
@ -9,7 +9,6 @@ use std::{ops::Deref, sync::Arc};
|
|||
/// This is mostly the same as [`IngesterPartition`] but allows easier mocking.
|
||||
pub trait IngesterPartitionInfo {
|
||||
fn partition_id(&self) -> PartitionId;
|
||||
fn shard_id(&self) -> ShardId;
|
||||
fn parquet_max_sequence_number(&self) -> Option<SequenceNumber>;
|
||||
}
|
||||
|
||||
|
@ -18,10 +17,6 @@ impl IngesterPartitionInfo for IngesterPartition {
|
|||
self.deref().partition_id()
|
||||
}
|
||||
|
||||
fn shard_id(&self) -> ShardId {
|
||||
self.deref().shard_id()
|
||||
}
|
||||
|
||||
fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
|
||||
self.deref().parquet_max_sequence_number()
|
||||
}
|
||||
|
@ -35,10 +30,6 @@ where
|
|||
self.deref().partition_id()
|
||||
}
|
||||
|
||||
fn shard_id(&self) -> ShardId {
|
||||
self.deref().shard_id()
|
||||
}
|
||||
|
||||
fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
|
||||
self.deref().parquet_max_sequence_number()
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ use crate::{
|
|||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::{ChunkId, SequenceNumber};
|
||||
use iox_catalog::interface::{get_schema_by_name, SoftDeletedRows};
|
||||
use iox_tests::{TestCatalog, TestPartition, TestShard, TestTable};
|
||||
use iox_tests::{TestCatalog, TestPartition, TestTable};
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use schema::{sort::SortKey, Projection, Schema};
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
@ -64,7 +64,6 @@ pub(crate) fn lp_to_record_batch(lp: &str) -> RecordBatch {
|
|||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct IngesterPartitionBuilder {
|
||||
schema: Schema,
|
||||
shard: Arc<TestShard>,
|
||||
partition: Arc<TestPartition>,
|
||||
ingester_chunk_id: u128,
|
||||
|
||||
|
@ -75,14 +74,9 @@ pub(crate) struct IngesterPartitionBuilder {
|
|||
}
|
||||
|
||||
impl IngesterPartitionBuilder {
|
||||
pub(crate) fn new(
|
||||
schema: Schema,
|
||||
shard: &Arc<TestShard>,
|
||||
partition: &Arc<TestPartition>,
|
||||
) -> Self {
|
||||
pub(crate) fn new(schema: Schema, partition: &Arc<TestPartition>) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
shard: Arc::clone(shard),
|
||||
partition: Arc::clone(partition),
|
||||
partition_sort_key: None,
|
||||
ingester_chunk_id: 1,
|
||||
|
@ -115,7 +109,6 @@ impl IngesterPartitionBuilder {
|
|||
IngesterPartition::new(
|
||||
Some(Uuid::new_v4()),
|
||||
self.partition.partition.id,
|
||||
self.shard.shard.id,
|
||||
0,
|
||||
parquet_max_sequence_number,
|
||||
self.partition_sort_key.clone(),
|
||||
|
|
|
@ -200,7 +200,6 @@ mod tests {
|
|||
use super::*;
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp,
|
||||
TRANSITION_SHARD_INDEX,
|
||||
};
|
||||
use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
|
@ -222,11 +221,6 @@ mod tests {
|
|||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, TRANSITION_SHARD_INDEX)
|
||||
.await
|
||||
.unwrap();
|
||||
let namespace = repos
|
||||
.namespaces()
|
||||
.create("catalog_partition_test", None, topic.id, pool.id)
|
||||
|
@ -239,11 +233,10 @@ mod tests {
|
|||
.unwrap();
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get("foo".into(), shard.id, table.id)
|
||||
.create_or_get("foo".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let p1params = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
|
@ -299,11 +292,6 @@ mod tests {
|
|||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, TRANSITION_SHARD_INDEX)
|
||||
.await
|
||||
.unwrap();
|
||||
let namespace = repos
|
||||
.namespaces()
|
||||
.create("catalog_partition_test", None, topic.id, pool.id)
|
||||
|
@ -316,12 +304,12 @@ mod tests {
|
|||
.unwrap();
|
||||
partition1 = repos
|
||||
.partitions()
|
||||
.create_or_get("foo".into(), shard.id, table.id)
|
||||
.create_or_get("foo".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
partition2 = repos
|
||||
.partitions()
|
||||
.create_or_get("bar".into(), shard.id, table.id)
|
||||
.create_or_get("bar".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
|
|
@ -70,7 +70,6 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
|
|||
let path = ParquetFilePath::new(
|
||||
parquet_file.namespace_id,
|
||||
parquet_file.table_id,
|
||||
parquet_file.shard_id,
|
||||
parquet_file.partition_id,
|
||||
parquet_file.object_store_id,
|
||||
);
|
||||
|
@ -98,8 +97,7 @@ mod tests {
|
|||
use super::*;
|
||||
use bytes::Bytes;
|
||||
use data_types::{
|
||||
ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, ShardIndex,
|
||||
Timestamp,
|
||||
ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp,
|
||||
};
|
||||
use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
|
@ -120,11 +118,6 @@ mod tests {
|
|||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = repos
|
||||
.shards()
|
||||
.create_or_get(&topic, ShardIndex::new(1))
|
||||
.await
|
||||
.unwrap();
|
||||
let namespace = repos
|
||||
.namespaces()
|
||||
.create("catalog_partition_test", None, topic.id, pool.id)
|
||||
|
@ -137,11 +130,10 @@ mod tests {
|
|||
.unwrap();
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.create_or_get("foo".into(), shard.id, table.id)
|
||||
.create_or_get("foo".into(), table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let p1params = ParquetFileParams {
|
||||
shard_id: shard.id,
|
||||
namespace_id: namespace.id,
|
||||
table_id: table.id,
|
||||
partition_id: partition.id,
|
||||
|
@ -166,7 +158,6 @@ mod tests {
|
|||
let path = ParquetFilePath::new(
|
||||
p1.namespace_id,
|
||||
p1.table_id,
|
||||
p1.shard_id,
|
||||
p1.partition_id,
|
||||
p1.object_store_id,
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue