diff --git a/README.md b/README.md index 0ea4aa1944..c27227f035 100644 --- a/README.md +++ b/README.md @@ -325,7 +325,6 @@ influxdata.iox.ingester.v1.WriteService influxdata.iox.namespace.v1.NamespaceService influxdata.iox.object_store.v1.ObjectStoreService influxdata.iox.schema.v1.SchemaService -influxdata.iox.sharder.v1.ShardService influxdata.platform.storage.IOxTesting influxdata.platform.storage.Storage ``` diff --git a/clap_blocks/src/catalog_dsn.rs b/clap_blocks/src/catalog_dsn.rs index 3993668f78..efc1fac7be 100644 --- a/clap_blocks/src/catalog_dsn.rs +++ b/clap_blocks/src/catalog_dsn.rs @@ -213,7 +213,7 @@ impl CatalogDsnConfig { let mem = MemCatalog::new(metrics); let mut txn = mem.start_transaction().await.context(CatalogSnafu)?; - create_or_get_default_records(1, txn.deref_mut()) + create_or_get_default_records(txn.deref_mut()) .await .context(CatalogSnafu)?; txn.commit().await.context(CatalogSnafu)?; diff --git a/compactor2/src/components/hardcoded.rs b/compactor2/src/components/hardcoded.rs index b161acec96..4a6c04f17b 100644 --- a/compactor2/src/components/hardcoded.rs +++ b/compactor2/src/components/hardcoded.rs @@ -385,7 +385,6 @@ fn make_parquet_files_sink(config: &Config) -> Arc { let parquet_file_sink = Arc::new(LoggingParquetFileSinkWrapper::new( DedicatedExecParquetFileSinkWrapper::new( ObjectStoreParquetFileSink::new( - config.shard_id, config.parquet_store_scratchpad.clone(), Arc::clone(&config.time_provider), ), diff --git a/compactor2/src/components/parquet_file_sink/mock.rs b/compactor2/src/components/parquet_file_sink/mock.rs index 3cb90c8f65..f920270573 100644 --- a/compactor2/src/components/parquet_file_sink/mock.rs +++ b/compactor2/src/components/parquet_file_sink/mock.rs @@ -4,9 +4,7 @@ use std::{ }; use async_trait::async_trait; -use data_types::{ - ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, ShardId, Timestamp, -}; +use data_types::{ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp}; use datafusion::{ arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, error::DataFusionError, @@ -70,7 +68,6 @@ impl ParquetFileSink for MockParquetFileSink { let row_count = batches.iter().map(|b| b.num_rows()).sum::(); let mut guard = self.records.lock().expect("not poisoned"); let out = ((row_count > 0) || !self.filter_empty_files).then(|| ParquetFileParams { - shard_id: ShardId::new(1), namespace_id: partition.namespace_id, table_id: partition.table.id, partition_id: partition.partition_id, @@ -167,7 +164,6 @@ mod tests { .await .unwrap(), Some(ParquetFileParams { - shard_id: ShardId::new(1), namespace_id: NamespaceId::new(2), table_id: TableId::new(3), partition_id: PartitionId::new(1), @@ -231,7 +227,6 @@ mod tests { .await .unwrap(), Some(ParquetFileParams { - shard_id: ShardId::new(1), namespace_id: NamespaceId::new(2), table_id: TableId::new(3), partition_id: PartitionId::new(1), diff --git a/compactor2/src/components/parquet_file_sink/object_store.rs b/compactor2/src/components/parquet_file_sink/object_store.rs index e3f602a1a9..c6102f25e0 100644 --- a/compactor2/src/components/parquet_file_sink/object_store.rs +++ b/compactor2/src/components/parquet_file_sink/object_store.rs @@ -1,7 +1,7 @@ use std::{fmt::Display, sync::Arc}; use async_trait::async_trait; -use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber, ShardId}; +use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber}; use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream}; use iox_time::{Time, TimeProvider}; use parquet_file::{ @@ -20,19 +20,13 @@ const MAX_SEQUENCE_NUMBER: i64 = 0; #[derive(Debug)] pub struct ObjectStoreParquetFileSink { - shared_id: ShardId, store: ParquetStorage, time_provider: Arc, } impl ObjectStoreParquetFileSink { - pub fn new( - shared_id: ShardId, - store: ParquetStorage, - time_provider: Arc, - ) -> Self { + pub fn new(store: ParquetStorage, time_provider: Arc) -> Self { Self { - shared_id, store, time_provider, } @@ -57,7 +51,6 @@ impl ParquetFileSink for ObjectStoreParquetFileSink { let meta = IoxMetadata { object_store_id: Uuid::new_v4(), creation_timestamp: self.time_provider.now(), - shard_id: self.shared_id, namespace_id: partition.namespace_id, namespace_name: partition.namespace_name.clone().into(), table_id: partition.table.id, diff --git a/compactor2/src/components/report.rs b/compactor2/src/components/report.rs index 83be540b09..d9510f5968 100644 --- a/compactor2/src/components/report.rs +++ b/compactor2/src/components/report.rs @@ -11,7 +11,6 @@ pub fn log_config(config: &Config) { // use struct unpack so we don't forget any members let Config { compaction_type, - shard_id, // no need to print the internal state of the registry metric_registry: _, catalog, @@ -59,7 +58,6 @@ pub fn log_config(config: &Config) { info!( ?compaction_type, - shard_id=shard_id.get(), %catalog, %parquet_store_real, %parquet_store_scratchpad, diff --git a/compactor2/src/components/scratchpad/test_util.rs b/compactor2/src/components/scratchpad/test_util.rs index 108f59c04b..736af60108 100644 --- a/compactor2/src/components/scratchpad/test_util.rs +++ b/compactor2/src/components/scratchpad/test_util.rs @@ -1,6 +1,6 @@ use std::{collections::HashSet, sync::Arc}; -use data_types::{NamespaceId, PartitionId, ShardId, TableId}; +use data_types::{NamespaceId, PartitionId, TableId}; use object_store::{memory::InMemory, DynObjectStore}; use parquet_file::ParquetFilePath; use uuid::Uuid; @@ -23,7 +23,6 @@ pub fn file_path(i: u128) -> ParquetFilePath { ParquetFilePath::new( NamespaceId::new(1), TableId::new(1), - ShardId::new(1), PartitionId::new(1), Uuid::from_u128(i), ) diff --git a/compactor2/src/config.rs b/compactor2/src/config.rs index cd87b5baba..1188681bef 100644 --- a/compactor2/src/config.rs +++ b/compactor2/src/config.rs @@ -1,8 +1,8 @@ //! Config-related stuff. use std::{collections::HashSet, fmt::Display, num::NonZeroUsize, sync::Arc, time::Duration}; -use backoff::{Backoff, BackoffConfig}; -use data_types::{PartitionId, ShardId, ShardIndex}; +use backoff::BackoffConfig; +use data_types::PartitionId; use iox_catalog::interface::Catalog; use iox_query::exec::Executor; use iox_time::TimeProvider; @@ -22,9 +22,6 @@ pub struct Config { /// Compaction type. pub compaction_type: CompactionType, - /// Shard Id - pub shard_id: ShardId, - /// Metric registry. pub metric_registry: Arc, @@ -146,55 +143,6 @@ impl Config { pub fn max_compact_size_bytes(&self) -> usize { self.max_desired_file_size_bytes as usize * MIN_COMPACT_SIZE_MULTIPLE } - - /// Fetch shard ID. - /// - /// This is likely required to construct a [`Config`] object. - pub async fn fetch_shard_id( - catalog: Arc, - backoff_config: BackoffConfig, - topic_name: String, - shard_index: i32, - ) -> ShardId { - // Get shardId from topic and shard_index - // Fetch topic - let topic = Backoff::new(&backoff_config) - .retry_all_errors("topic_of_given_name", || async { - catalog - .repositories() - .await - .topics() - .get_by_name(topic_name.as_str()) - .await - }) - .await - .expect("retry forever"); - - if topic.is_none() { - panic!("Topic {topic_name} not found"); - } - let topic = topic.unwrap(); - - // Fetch shard - let shard = Backoff::new(&backoff_config) - .retry_all_errors("sahrd_of_given_index", || async { - catalog - .repositories() - .await - .shards() - .get_by_topic_id_and_shard_index(topic.id, ShardIndex::new(shard_index)) - .await - }) - .await - .expect("retry forever"); - - match shard { - Some(shard) => shard.id, - None => { - panic!("Topic {topic_name} and Shard Index {shard_index} not found") - } - } - } } /// Shard config. diff --git a/compactor2/tests/integration.rs b/compactor2/tests/integration.rs index fb164872e1..7c11a8b66e 100644 --- a/compactor2/tests/integration.rs +++ b/compactor2/tests/integration.rs @@ -212,12 +212,12 @@ async fn test_compact_large_overlapes() { --- - initial - "L1 " - - "L1.4[6000,68000] 240s 2.66kb|------------------L1.4------------------| " - - "L1.5[136000,136000] 300s 2.17kb |L1.5|" + - "L1.4[6000,68000] 240s 2.65kb|------------------L1.4------------------| " + - "L1.5[136000,136000] 300s 2.16kb |L1.5|" - "L2 " - - "L2.1[8000,12000] 60s 1.8kb |L2.1| " + - "L2.1[8000,12000] 60s 1.79kb |L2.1| " - "L2.2[20000,30000] 120s 2.61kb |L2.2| " - - "L2.3[36000,36000] 180s 2.17kb |L2.3| " + - "L2.3[36000,36000] 180s 2.16kb |L2.3| " "### ); @@ -233,7 +233,7 @@ async fn test_compact_large_overlapes() { - "L2 " - "L2.6[6000,36000] 300s 2.71kb|-------L2.6-------| " - "L2.7[68000,68000] 300s 2.51kb |L2.7| " - - "L2.8[136000,136000] 300s 2.55kb |L2.8|" + - "L2.8[136000,136000] 300s 2.54kb |L2.8|" "### ); @@ -323,11 +323,11 @@ async fn test_compact_large_overlape_2() { - initial - "L1 " - "L1.4[6000,25000] 240s 1.8kb|---L1.4----| " - - "L1.5[28000,136000] 300s 2.65kb |----------------------------------L1.5----------------------------------| " + - "L1.5[28000,136000] 300s 2.64kb |----------------------------------L1.5----------------------------------| " - "L2 " - - "L2.1[8000,12000] 60s 1.8kb |L2.1| " + - "L2.1[8000,12000] 60s 1.79kb |L2.1| " - "L2.2[20000,30000] 120s 2.61kb |L2.2| " - - "L2.3[36000,36000] 180s 2.17kb |L2.3| " + - "L2.3[36000,36000] 180s 2.16kb |L2.3| " "### ); @@ -343,7 +343,7 @@ async fn test_compact_large_overlape_2() { - "L2 " - "L2.6[6000,36000] 300s 2.71kb|-------L2.6-------| " - "L2.7[68000,68000] 300s 2.51kb |L2.7| " - - "L2.8[136000,136000] 300s 2.55kb |L2.8|" + - "L2.8[136000,136000] 300s 2.54kb |L2.8|" "### ); diff --git a/compactor2/tests/layouts/backfill.rs b/compactor2/tests/layouts/backfill.rs index 86b2f2c526..6e6f55fa21 100644 --- a/compactor2/tests/layouts/backfill.rs +++ b/compactor2/tests/layouts/backfill.rs @@ -759,13 +759,13 @@ async fn random_backfill_empty_partition() { - "L0 " - "L0.?[76,329] 1.04us 2.96mb|-------------------------------------L0.?--------------------------------------| " - "L0.?[330,356] 1.04us 322.99kb |-L0.?-| " - - "**** Simulation run 71, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:" - - "L0, all files 3.32mb " - - "L0.166[357,670] 1.04us |-----------------------------------------L0.166-----------------------------------------|" - - "**** 2 Output Files (parquet_file_id not yet assigned), 3.32mb total:" + - "**** Simulation run 71, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:" + - "L0, all files 3.66mb " + - "L0.163[357,670] 1.04us |-----------------------------------------L0.163-----------------------------------------|" + - "**** 2 Output Files (parquet_file_id not yet assigned), 3.66mb total:" - "L0 " - - "L0.?[357,658] 1.04us 3.19mb|----------------------------------------L0.?----------------------------------------| " - - "L0.?[659,670] 1.04us 130.17kb |L0.?|" + - "L0.?[357,658] 1.04us 3.52mb|----------------------------------------L0.?----------------------------------------| " + - "L0.?[659,670] 1.04us 143.55kb |L0.?|" - "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:" - "L0, all files 2.36mb " - "L0.168[173,356] 1.04us |-----------------------------------------L0.168-----------------------------------------|" @@ -815,13 +815,13 @@ async fn random_backfill_empty_partition() { - "L0 " - "L0.?[357,658] 1.05us 3.19mb|----------------------------------------L0.?----------------------------------------| " - "L0.?[659,670] 1.05us 130.17kb |L0.?|" - - "**** Simulation run 79, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:" - - "L0, all files 3.66mb " - - "L0.163[357,670] 1.04us |-----------------------------------------L0.163-----------------------------------------|" - - "**** 2 Output Files (parquet_file_id not yet assigned), 3.66mb total:" + - "**** Simulation run 79, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:" + - "L0, all files 2.36mb " + - "L0.179[173,356] 1.05us |-----------------------------------------L0.179-----------------------------------------|" + - "**** 2 Output Files (parquet_file_id not yet assigned), 2.36mb total:" - "L0 " - - "L0.?[357,658] 1.04us 3.52mb|----------------------------------------L0.?----------------------------------------| " - - "L0.?[659,670] 1.04us 143.55kb |L0.?|" + - "L0.?[173,329] 1.05us 2.01mb|-----------------------------------L0.?-----------------------------------| " + - "L0.?[330,356] 1.05us 355.83kb |---L0.?---| " - "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.33mb total:" - "L0, all files 3.33mb " - "L0.165[42,356] 1.04us |-----------------------------------------L0.165-----------------------------------------|" @@ -829,13 +829,13 @@ async fn random_backfill_empty_partition() { - "L0 " - "L0.?[42,329] 1.04us 3.04mb|--------------------------------------L0.?--------------------------------------| " - "L0.?[330,356] 1.04us 292.88kb |L0.?-| " - - "**** Simulation run 81, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:" - - "L0, all files 2.36mb " - - "L0.179[173,356] 1.05us |-----------------------------------------L0.179-----------------------------------------|" - - "**** 2 Output Files (parquet_file_id not yet assigned), 2.36mb total:" + - "**** Simulation run 81, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:" + - "L0, all files 3.32mb " + - "L0.166[357,670] 1.04us |-----------------------------------------L0.166-----------------------------------------|" + - "**** 2 Output Files (parquet_file_id not yet assigned), 3.32mb total:" - "L0 " - - "L0.?[173,329] 1.05us 2.01mb|-----------------------------------L0.?-----------------------------------| " - - "L0.?[330,356] 1.05us 355.83kb |---L0.?---| " + - "L0.?[357,658] 1.04us 3.19mb|----------------------------------------L0.?----------------------------------------| " + - "L0.?[659,670] 1.04us 130.17kb |L0.?|" - "**** Simulation run 82, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4.03mb total:" - "L0, all files 4.03mb " - "L0.180[357,670] 1.05us |-----------------------------------------L0.180-----------------------------------------|" @@ -963,14 +963,14 @@ async fn random_backfill_empty_partition() { - "L0.?[649,658] 1.04us 131.79kb |L0.?|" - "**** Simulation run 98, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.52mb total:" - "L0, all files 3.52mb " - - "L0.249[357,658] 1.04us |-----------------------------------------L0.249-----------------------------------------|" + - "L0.233[357,658] 1.04us |-----------------------------------------L0.233-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 3.52mb total:" - "L0 " - "L0.?[357,648] 1.04us 3.4mb|----------------------------------------L0.?-----------------------------------------| " - "L0.?[649,658] 1.04us 119.63kb |L0.?|" - "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.19mb total:" - "L0, all files 3.19mb " - - "L0.233[357,658] 1.04us |-----------------------------------------L0.233-----------------------------------------|" + - "L0.253[357,658] 1.04us |-----------------------------------------L0.253-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 3.19mb total:" - "L0 " - "L0.?[357,648] 1.04us 3.08mb|----------------------------------------L0.?-----------------------------------------| " @@ -1039,7 +1039,7 @@ async fn random_backfill_empty_partition() { - "L0.?[671,966] 1.05us 3.14mb|---------------------------------------L0.?---------------------------------------| " - "L0.?[967,986] 1.05us 218.33kb |L0.?|" - "Committing partition 1:" - - " Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.249, L0.255, L0.261, L0.265" + - " Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.253, L0.255, L0.261, L0.265" - " Creating 40 files" - "**** Simulation run 109, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[263]). 2 Input Files, 103.14mb total:" - "L0 " @@ -1167,7 +1167,7 @@ async fn random_backfill_empty_partition() { - "L0.?[264,329] 1.05us 715.93kb |-------L0.?-------| " - "**** Simulation run 126, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 2.01mb total:" - "L0, all files 2.01mb " - - "L0.253[173,329] 1.05us |-----------------------------------------L0.253-----------------------------------------|" + - "L0.249[173,329] 1.05us |-----------------------------------------L0.249-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 2.01mb total:" - "L0 " - "L0.?[173,263] 1.05us 1.16mb|----------------------L0.?-----------------------| " @@ -1194,7 +1194,7 @@ async fn random_backfill_empty_partition() { - "L0.?[42,263] 1.05us 2.34mb|-------------------------------L0.?--------------------------------| " - "L0.?[264,329] 1.05us 715.93kb |-------L0.?-------| " - "Committing partition 1:" - - " Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.251, L0.253, L0.257, L0.259, L0.263" + - " Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.249, L0.251, L0.257, L0.259, L0.263" - " Creating 40 files" - "**** Simulation run 130, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[570, 876]). 9 Input Files, 229.77mb total:" - "L0 " @@ -2050,7 +2050,7 @@ async fn random_backfill_empty_partition() { - "L0.522[584,590] 1.04us 84.83kb |L0.522| " - "L0.455[591,648] 1.04us 702.84kb |L0.455| " - "L0.289[649,658] 1.04us 119.63kb |L0.289| " - - "L0.250[659,670] 1.04us 143.55kb |L0.250| " + - "L0.234[659,670] 1.04us 143.55kb |L0.234| " - "L0.523[671,870] 1.04us 2.34mb |-----L0.523-----| " - "L0.524[871,876] 1.04us 72.33kb |L0.524| " - "L0.388[877,932] 1.04us 675.04kb |L0.388| " @@ -2063,7 +2063,7 @@ async fn random_backfill_empty_partition() { - "L0.526[584,590] 1.04us 76.92kb |L0.526| " - "L0.459[591,648] 1.04us 637.32kb |L0.459| " - "L0.291[649,658] 1.04us 108.47kb |L0.291| " - - "L0.234[659,670] 1.04us 130.17kb |L0.234| " + - "L0.254[659,670] 1.04us 130.17kb |L0.254| " - "L0.527[671,870] 1.04us 2.12mb |-----L0.527-----| " - "L0.528[871,876] 1.04us 65.5kb |L0.528| " - "L0.392[877,966] 1.04us 982.47kb |L0.392| " @@ -2119,7 +2119,7 @@ async fn random_backfill_empty_partition() { - "L0.344[173,263] 1.05us 1.16mb |L0.344| " - "L0.480[264,295] 1.05us 414.83kb |L0.480| " - "L0.481[296,329] 1.05us 454.98kb |L0.481| " - - "L0.254[330,356] 1.05us 355.83kb |L0.254| " + - "L0.250[330,356] 1.05us 355.83kb |L0.250| " - "L0.407[357,570] 1.05us 2.74mb |------L0.407------| " - "L0.543[571,583] 1.05us 160.2kb |L0.543| " - "L0.544[584,590] 1.05us 93.45kb |L0.544| " @@ -3639,13 +3639,13 @@ async fn random_backfill_over_l2s() { - "L0 " - "L0.?[592,626] 1.03us 374.74kb|----------------L0.?-----------------| " - "L0.?[627,670] 1.03us 484.96kb |---------------------L0.?----------------------| " - - "**** Simulation run 142, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:" - - "L0, all files 817.09kb " - - "L0.281[295,356] 1.03us |-----------------------------------------L0.281-----------------------------------------|" - - "**** 2 Output Files (parquet_file_id not yet assigned), 817.09kb total:" + - "**** Simulation run 142, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:" + - "L0, all files 672.54kb " + - "L0.279[295,356] 1.03us |-----------------------------------------L0.279-----------------------------------------|" + - "**** 2 Output Files (parquet_file_id not yet assigned), 672.54kb total:" - "L0 " - - "L0.?[295,334] 1.03us 522.4kb|-------------------------L0.?--------------------------| " - - "L0.?[335,356] 1.03us 294.69kb |------------L0.?------------| " + - "L0.?[295,334] 1.03us 429.99kb|-------------------------L0.?--------------------------| " + - "L0.?[335,356] 1.03us 242.56kb |------------L0.?------------| " - "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677.02kb total:" - "L0, all files 677.02kb " - "L0.328[592,629] 1.03us |-----------------------------------------L0.328-----------------------------------------|" @@ -3695,13 +3695,13 @@ async fn random_backfill_over_l2s() { - "L0 " - "L0.?[592,626] 1.04us 455.28kb|----------------L0.?-----------------| " - "L0.?[627,670] 1.04us 589.19kb |---------------------L0.?----------------------| " - - "**** Simulation run 150, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:" - - "L0, all files 672.54kb " - - "L0.279[295,356] 1.03us |-----------------------------------------L0.279-----------------------------------------|" - - "**** 2 Output Files (parquet_file_id not yet assigned), 672.54kb total:" + - "**** Simulation run 150, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:" + - "L0, all files 817.09kb " + - "L0.289[295,356] 1.04us |-----------------------------------------L0.289-----------------------------------------|" + - "**** 2 Output Files (parquet_file_id not yet assigned), 817.09kb total:" - "L0 " - - "L0.?[295,334] 1.03us 429.99kb|-------------------------L0.?--------------------------| " - - "L0.?[335,356] 1.03us 242.56kb |------------L0.?------------| " + - "L0.?[295,334] 1.04us 522.4kb|-------------------------L0.?--------------------------| " + - "L0.?[335,356] 1.04us 294.69kb |------------L0.?------------| " - "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1.02mb total:" - "L0, all files 1.02mb " - "L0.324[592,670] 1.03us |-----------------------------------------L0.324-----------------------------------------|" @@ -3711,11 +3711,11 @@ async fn random_backfill_over_l2s() { - "L0.?[627,670] 1.03us 589.19kb |---------------------L0.?----------------------| " - "**** Simulation run 152, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:" - "L0, all files 817.09kb " - - "L0.289[295,356] 1.04us |-----------------------------------------L0.289-----------------------------------------|" + - "L0.281[295,356] 1.03us |-----------------------------------------L0.281-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 817.09kb total:" - "L0 " - - "L0.?[295,334] 1.04us 522.4kb|-------------------------L0.?--------------------------| " - - "L0.?[335,356] 1.04us 294.69kb |------------L0.?------------| " + - "L0.?[295,334] 1.03us 522.4kb|-------------------------L0.?--------------------------| " + - "L0.?[335,356] 1.03us 294.69kb |------------L0.?------------| " - "**** Simulation run 153, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677.02kb total:" - "L0, all files 677.02kb " - "L0.342[592,629] 1.04us |-----------------------------------------L0.342-----------------------------------------|" @@ -4236,8 +4236,8 @@ async fn random_backfill_over_l2s() { - " Creating 32 files" - "**** Simulation run 223, type=split(CompactAndSplitOutput(ManySmallFiles))(split_times=[610]). 200 Input Files, 166.42mb total:" - "L0 " - - "L0.402[295,334] 1.03us 429.99kb |L0.402| " - - "L0.403[335,356] 1.03us 242.56kb |L0.403| " + - "L0.386[295,334] 1.03us 429.99kb |L0.386| " + - "L0.387[335,356] 1.03us 242.56kb |L0.387| " - "L0.319[358,591] 1.03us 2.48mb |-------L0.319-------| " - "L0.455[592,619] 1.03us 297.59kb |L0.455| " - "L0.456[620,626] 1.03us 77.15kb |L0.456| " @@ -4247,8 +4247,8 @@ async fn random_backfill_over_l2s() { - "L0.458[904,986] 1.03us 918.23kb |L0.458|" - "L0.517[173,275] 1.03us 1.31mb |L0.517-| " - "L0.518[276,294] 1.03us 250.4kb |L0.518| " - - "L0.386[295,334] 1.03us 522.4kb |L0.386| " - - "L0.387[335,356] 1.03us 294.69kb |L0.387| " + - "L0.406[295,334] 1.03us 522.4kb |L0.406| " + - "L0.407[335,356] 1.03us 294.69kb |L0.407| " - "L0.216[357,357] 1.03us 0b |L0.216| " - "L0.323[358,591] 1.03us 3.01mb |-------L0.323-------| " - "L0.459[592,619] 1.03us 361.55kb |L0.459| " @@ -4295,8 +4295,8 @@ async fn random_backfill_over_l2s() { - "L0.472[904,986] 1.04us 918.23kb |L0.472|" - "L0.525[173,275] 1.04us 1.31mb |L0.525-| " - "L0.526[276,294] 1.04us 250.4kb |L0.526| " - - "L0.406[295,334] 1.04us 522.4kb |L0.406| " - - "L0.407[335,356] 1.04us 294.69kb |L0.407| " + - "L0.402[295,334] 1.04us 522.4kb |L0.402| " + - "L0.403[335,356] 1.04us 294.69kb |L0.403| " - "L0.230[357,357] 1.04us 0b |L0.230| " - "L0.337[358,591] 1.04us 3.01mb |-------L0.337-------| " - "L0.473[592,619] 1.04us 361.55kb |L0.473| " diff --git a/compactor2/tests/layouts/knobs.rs b/compactor2/tests/layouts/knobs.rs index 3dbd177639..ccc7c57334 100644 --- a/compactor2/tests/layouts/knobs.rs +++ b/compactor2/tests/layouts/knobs.rs @@ -858,11 +858,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L0.?[171444,200000] 5ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "**** Simulation run 51, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|" + - "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - - "L0.?[171443,171443] 8ns 0b|L0.?| " - - "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " + - "L0.?[171443,171443] 6ns 0b|L0.?| " + - "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|" @@ -879,18 +879,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L0.?[171444,200000] 10ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|" - - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - - "L0 " - - "L0.?[171443,171443] 6ns 0b|L0.?| " - - "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - - "L0, all files 1.29mb " - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171443,171443] 7ns 0b|L0.?| " - "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " + - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" + - "L0, all files 1.29mb " + - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|" + - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" + - "L0 " + - "L0.?[171443,171443] 8ns 0b|L0.?| " + - "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "Committing partition 1:" - " Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123" - " Creating 55 files" @@ -1213,7 +1213,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L0.?[156351,160867] 6ns 208.25kb |--------L0.?--------| " - "**** Simulation run 95, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.176[171444,200000] 6ns|-----------------------------------------L0.176-----------------------------------------|" + - "L0.170[171444,200000] 6ns|-----------------------------------------L0.170-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171444,198370] 6ns 1.21mb|---------------------------------------L0.?---------------------------------------| " @@ -1227,7 +1227,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L0.?[156351,160867] 7ns 208.25kb |--------L0.?--------| " - "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.178[171444,200000] 7ns|-----------------------------------------L0.178-----------------------------------------|" + - "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171444,198370] 7ns 1.21mb|---------------------------------------L0.?---------------------------------------| " @@ -1241,7 +1241,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L0.?[156351,160867] 8ns 208.25kb |--------L0.?--------| " - "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.170[171444,200000] 8ns|-----------------------------------------L0.170-----------------------------------------|" + - "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171444,198370] 8ns 1.21mb|---------------------------------------L0.?---------------------------------------| " @@ -1389,7 +1389,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L0.257[142887,156350] 6ns 620.71kb |---L0.257---| " - "L0.258[156351,160867] 6ns 208.25kb |L0.258| " - "L0.186[160868,171442] 6ns 487.56kb |-L0.186--| " - - "L0.175[171443,171443] 6ns 0b |L0.175| " + - "L0.169[171443,171443] 6ns 0b |L0.169| " - "L0.259[171444,198370] 6ns 1.21mb |----------L0.259----------| " - "L0.260[198371,200000] 6ns 75.17kb |L0.260|" - "L1 " @@ -1404,7 +1404,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L1.?[149666,185000] 6ns 10mb |---------------L1.?----------------| " - "L1.?[185001,200000] 6ns 4.25mb |----L1.?-----| " - "Committing partition 1:" - - " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.175, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260" + - " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.169, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260" - " Creating 3 files" - "**** Simulation run 116, type=split(HighL0OverlapTotalBacklog)(split_times=[142886]). 1 Input Files, 10mb total:" - "L1, all files 10mb " @@ -1743,7 +1743,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 19.54mb total:" - "L0 " - "L0.190[160868,171442] 7ns 487.56kb |----L0.190----| " - - "L0.177[171443,171443] 7ns 0b |L0.177| " + - "L0.175[171443,171443] 7ns 0b |L0.175| " - "L0.309[171444,185000] 7ns 625.13kb |------L0.309------| " - "L0.310[185001,198370] 7ns 616.55kb |------L0.310------| " - "L0.264[198371,200000] 7ns 75.17kb |L0.264|" @@ -1756,7 +1756,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------| " - "L1.?[170978,200000] 7ns 9.54mb |------------------L1.?-------------------| " - "Committing partition 1:" - - " Soft Deleting 8 files: L0.177, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356" + - " Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356" - " Creating 2 files" - "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 487.56kb total:" - "L0, all files 487.56kb " @@ -1924,7 +1924,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L0.266[156351,160867] 8ns 208.25kb |L0.266| " - "L0.387[160868,170977] 8ns 466.12kb |---L0.387----| " - "L0.388[170978,171442] 8ns 21.44kb |L0.388| " - - "L0.169[171443,171443] 8ns 0b |L0.169| " + - "L0.177[171443,171443] 8ns 0b |L0.177| " - "L0.313[171444,185000] 8ns 625.13kb |------L0.313------| " - "L0.314[185001,198370] 8ns 616.55kb |------L0.314------| " - "L0.268[198371,200000] 8ns 75.17kb |L0.268|" @@ -1937,7 +1937,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() { - "L1.?[167315,194064] 8ns 10mb |-----------------L1.?-----------------| " - "L1.?[194065,200000] 8ns 2.22mb |-L1.?-| " - "Committing partition 1:" - - " Soft Deleting 13 files: L0.159, L0.169, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388" + - " Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388" - " Creating 3 files" - "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466.12kb total:" - "L0, all files 466.12kb " @@ -2812,11 +2812,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L0.?[171444,200000] 5ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "**** Simulation run 51, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|" + - "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - - "L0.?[171443,171443] 8ns 0b|L0.?| " - - "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " + - "L0.?[171443,171443] 6ns 0b|L0.?| " + - "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|" @@ -2833,18 +2833,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L0.?[171444,200000] 10ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|" - - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - - "L0 " - - "L0.?[171443,171443] 6ns 0b|L0.?| " - - "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" - - "L0, all files 1.29mb " - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171443,171443] 7ns 0b|L0.?| " - "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " + - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:" + - "L0, all files 1.29mb " + - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|" + - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" + - "L0 " + - "L0.?[171443,171443] 8ns 0b|L0.?| " + - "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| " - "Committing partition 1:" - " Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123" - " Creating 55 files" @@ -3167,7 +3167,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L0.?[156351,160867] 6ns 208.25kb |--------L0.?--------| " - "**** Simulation run 95, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.176[171444,200000] 6ns|-----------------------------------------L0.176-----------------------------------------|" + - "L0.170[171444,200000] 6ns|-----------------------------------------L0.170-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171444,198370] 6ns 1.21mb|---------------------------------------L0.?---------------------------------------| " @@ -3181,7 +3181,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L0.?[156351,160867] 7ns 208.25kb |--------L0.?--------| " - "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.178[171444,200000] 7ns|-----------------------------------------L0.178-----------------------------------------|" + - "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171444,198370] 7ns 1.21mb|---------------------------------------L0.?---------------------------------------| " @@ -3195,7 +3195,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L0.?[156351,160867] 8ns 208.25kb |--------L0.?--------| " - "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:" - "L0, all files 1.29mb " - - "L0.170[171444,200000] 8ns|-----------------------------------------L0.170-----------------------------------------|" + - "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|" - "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:" - "L0 " - "L0.?[171444,198370] 8ns 1.21mb|---------------------------------------L0.?---------------------------------------| " @@ -3343,7 +3343,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L0.257[142887,156350] 6ns 620.71kb |---L0.257---| " - "L0.258[156351,160867] 6ns 208.25kb |L0.258| " - "L0.186[160868,171442] 6ns 487.56kb |-L0.186--| " - - "L0.175[171443,171443] 6ns 0b |L0.175| " + - "L0.169[171443,171443] 6ns 0b |L0.169| " - "L0.259[171444,198370] 6ns 1.21mb |----------L0.259----------| " - "L0.260[198371,200000] 6ns 75.17kb |L0.260|" - "L1 " @@ -3358,7 +3358,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L1.?[149666,185000] 6ns 10mb |---------------L1.?----------------| " - "L1.?[185001,200000] 6ns 4.25mb |----L1.?-----| " - "Committing partition 1:" - - " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.175, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260" + - " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.169, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260" - " Creating 3 files" - "**** Simulation run 116, type=split(HighL0OverlapTotalBacklog)(split_times=[142886]). 1 Input Files, 10mb total:" - "L1, all files 10mb " @@ -3697,7 +3697,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 19.54mb total:" - "L0 " - "L0.190[160868,171442] 7ns 487.56kb |----L0.190----| " - - "L0.177[171443,171443] 7ns 0b |L0.177| " + - "L0.175[171443,171443] 7ns 0b |L0.175| " - "L0.309[171444,185000] 7ns 625.13kb |------L0.309------| " - "L0.310[185001,198370] 7ns 616.55kb |------L0.310------| " - "L0.264[198371,200000] 7ns 75.17kb |L0.264|" @@ -3710,7 +3710,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------| " - "L1.?[170978,200000] 7ns 9.54mb |------------------L1.?-------------------| " - "Committing partition 1:" - - " Soft Deleting 8 files: L0.177, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356" + - " Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356" - " Creating 2 files" - "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 487.56kb total:" - "L0, all files 487.56kb " @@ -3878,7 +3878,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L0.266[156351,160867] 8ns 208.25kb |L0.266| " - "L0.387[160868,170977] 8ns 466.12kb |---L0.387----| " - "L0.388[170978,171442] 8ns 21.44kb |L0.388| " - - "L0.169[171443,171443] 8ns 0b |L0.169| " + - "L0.177[171443,171443] 8ns 0b |L0.177| " - "L0.313[171444,185000] 8ns 625.13kb |------L0.313------| " - "L0.314[185001,198370] 8ns 616.55kb |------L0.314------| " - "L0.268[198371,200000] 8ns 75.17kb |L0.268|" @@ -3891,7 +3891,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file - "L1.?[167315,194064] 8ns 10mb |-----------------L1.?-----------------| " - "L1.?[194065,200000] 8ns 2.22mb |-L1.?-| " - "Committing partition 1:" - - " Soft Deleting 13 files: L0.159, L0.169, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388" + - " Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388" - " Creating 3 files" - "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466.12kb total:" - "L0, all files 466.12kb " diff --git a/compactor2/tests/layouts/many_files.rs b/compactor2/tests/layouts/many_files.rs index dca6ba846a..3319949495 100644 --- a/compactor2/tests/layouts/many_files.rs +++ b/compactor2/tests/layouts/many_files.rs @@ -73,43 +73,43 @@ async fn many_l0_files_different_created_order() { @r###" --- - "**** Input Files " - - "L0, all files 2.55kb " + - "L0, all files 2.54kb " - "L0.1[10,22] 1ns |---------L0.1----------| " - "L0.2[30,42] 2ns |---------L0.2----------| " - "L0.3[20,32] 3ns |---------L0.3----------| " - "L0.4[40,52] 4ns |---------L0.4----------| " - - "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:" - - "L0, all files 2.55kb " + - "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:" + - "L0, all files 2.54kb " - "L0.1[10,22] 1ns |-------------L0.1--------------| " - "L0.2[30,42] 2ns |-------------L0.2--------------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:" - - "L0, all files 5.1kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:" + - "L0, all files 5.09kb " - "L0.?[10,42] 2ns |------------------------------------------L0.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L0.1, L0.2" - " Creating 1 files" - - "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:" - - "L0, all files 2.55kb " + - "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:" + - "L0, all files 2.54kb " - "L0.3[20,32] 3ns |-------------L0.3--------------| " - "L0.4[40,52] 4ns |-------------L0.4--------------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:" - - "L0, all files 5.1kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:" + - "L0, all files 5.09kb " - "L0.?[20,52] 4ns |------------------------------------------L0.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L0.3, L0.4" - " Creating 1 files" - - "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.2kb total:" - - "L0, all files 5.1kb " + - "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.18kb total:" + - "L0, all files 5.09kb " - "L0.6[20,52] 4ns |-------------------------------L0.6-------------------------------| " - "L0.5[10,42] 2ns |-------------------------------L0.5-------------------------------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 10.2kb total:" - - "L1, all files 10.2kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 10.18kb total:" + - "L1, all files 10.18kb " - "L1.?[10,52] 4ns |------------------------------------------L1.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L0.5, L0.6" - " Creating 1 files" - - "**** Final Output Files (20.39kb written)" - - "L1, all files 10.2kb " + - "**** Final Output Files (20.36kb written)" + - "L1, all files 10.18kb " - "L1.7[10,52] 4ns |------------------------------------------L1.7------------------------------------------|" "### ); @@ -183,43 +183,43 @@ async fn many_l1_files_different_created_order() { @r###" --- - "**** Input Files " - - "L1, all files 2.55kb " + - "L1, all files 2.54kb " - "L1.1[11,20] 1ns |-------L1.1-------| " - "L1.2[31,40] 2ns |-------L1.2-------| " - "L1.3[21,30] 3ns |-------L1.3-------| " - "L1.4[41,50] 4ns |-------L1.4-------| " - - "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:" - - "L1, all files 2.55kb " + - "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:" + - "L1, all files 2.54kb " - "L1.1[11,20] 1ns |------------------L1.1------------------| " - "L1.3[21,30] 3ns |------------------L1.3------------------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:" - - "L1, all files 5.1kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:" + - "L1, all files 5.09kb " - "L1.?[11,30] 3ns |------------------------------------------L1.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L1.1, L1.3" - " Creating 1 files" - - "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:" - - "L1, all files 2.55kb " + - "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:" + - "L1, all files 2.54kb " - "L1.2[31,40] 2ns |------------------L1.2------------------| " - "L1.4[41,50] 4ns |------------------L1.4------------------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:" - - "L1, all files 5.1kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:" + - "L1, all files 5.09kb " - "L1.?[31,50] 4ns |------------------------------------------L1.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L1.2, L1.4" - " Creating 1 files" - - "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.2kb total:" - - "L1, all files 5.1kb " + - "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.18kb total:" + - "L1, all files 5.09kb " - "L1.6[31,50] 4ns |------------------L1.6-------------------| " - "L1.5[11,30] 3ns |------------------L1.5-------------------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 10.2kb total:" - - "L2, all files 10.2kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 10.18kb total:" + - "L2, all files 10.18kb " - "L2.?[11,50] 4ns |------------------------------------------L2.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L1.5, L1.6" - " Creating 1 files" - - "**** Final Output Files (20.39kb written)" - - "L2, all files 10.2kb " + - "**** Final Output Files (20.36kb written)" + - "L2, all files 10.18kb " - "L2.7[11,50] 4ns |------------------------------------------L2.7------------------------------------------|" "### ); @@ -291,43 +291,43 @@ async fn many_l0_files_different_created_order_non_overlap() { @r###" --- - "**** Input Files " - - "L0, all files 2.55kb " + - "L0, all files 2.54kb " - "L0.1[11,20] 1ns |-------L0.1-------| " - "L0.2[31,40] 2ns |-------L0.2-------| " - "L0.3[21,30] 3ns |-------L0.3-------| " - "L0.4[41,50] 4ns |-------L0.4-------| " - - "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:" - - "L0, all files 2.55kb " + - "**** Simulation run 0, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:" + - "L0, all files 2.54kb " - "L0.1[11,20] 1ns |----------L0.1-----------| " - "L0.2[31,40] 2ns |----------L0.2-----------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:" - - "L0, all files 5.1kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:" + - "L0, all files 5.09kb " - "L0.?[11,40] 2ns |------------------------------------------L0.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L0.1, L0.2" - " Creating 1 files" - - "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.1kb total:" - - "L0, all files 2.55kb " + - "**** Simulation run 1, type=compact(ManySmallFiles). 2 Input Files, 5.09kb total:" + - "L0, all files 2.54kb " - "L0.3[21,30] 3ns |----------L0.3-----------| " - "L0.4[41,50] 4ns |----------L0.4-----------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 5.1kb total:" - - "L0, all files 5.1kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 5.09kb total:" + - "L0, all files 5.09kb " - "L0.?[21,50] 4ns |------------------------------------------L0.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L0.3, L0.4" - " Creating 1 files" - - "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.2kb total:" - - "L0, all files 5.1kb " + - "**** Simulation run 2, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 10.18kb total:" + - "L0, all files 5.09kb " - "L0.6[21,50] 4ns |------------------------------L0.6------------------------------| " - "L0.5[11,40] 2ns |------------------------------L0.5------------------------------| " - - "**** 1 Output Files (parquet_file_id not yet assigned), 10.2kb total:" - - "L1, all files 10.2kb " + - "**** 1 Output Files (parquet_file_id not yet assigned), 10.18kb total:" + - "L1, all files 10.18kb " - "L1.?[11,50] 4ns |------------------------------------------L1.?------------------------------------------|" - "Committing partition 1:" - " Soft Deleting 2 files: L0.5, L0.6" - " Creating 1 files" - - "**** Final Output Files (20.39kb written)" - - "L1, all files 10.2kb " + - "**** Final Output Files (20.36kb written)" + - "L1, all files 10.18kb " - "L1.7[11,50] 4ns |------------------------------------------L1.7------------------------------------------|" "### ); diff --git a/compactor2_test_utils/src/lib.rs b/compactor2_test_utils/src/lib.rs index 41d3398b6e..d241e8b7c7 100644 --- a/compactor2_test_utils/src/lib.rs +++ b/compactor2_test_utils/src/lib.rs @@ -37,7 +37,7 @@ use compactor2::{ config::{CompactionType, Config, PartitionsSourceConfig}, hardcoded_components, Components, PanicDataFusionPlanner, PartitionInfo, }; -use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId, TRANSITION_SHARD_NUMBER}; +use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId}; use datafusion::arrow::record_batch::RecordBatch; use datafusion_util::config::register_iox_object_store; use futures::TryStreamExt; @@ -45,7 +45,7 @@ use iox_catalog::interface::Catalog; use iox_query::exec::ExecutorType; use iox_tests::{ ParquetFileBuilder, TestCatalog, TestNamespace, TestParquetFileBuilder, TestPartition, - TestShard, TestTable, + TestTable, }; use iox_time::{MockProvider, Time, TimeProvider}; use object_store::{path::Path, DynObjectStore}; @@ -54,7 +54,6 @@ use schema::sort::SortKey; use tracker::AsyncSemaphoreMetrics; // Default values for the test setup builder -const SHARD_INDEX: i32 = TRANSITION_SHARD_NUMBER; const PARTITION_THRESHOLD: Duration = Duration::from_secs(10 * 60); // 10min const MAX_DESIRE_FILE_SIZE: u64 = 100 * 1024; const PERCENTAGE_MAX_FILE_SIZE: u16 = 5; @@ -70,7 +69,6 @@ pub struct TestSetupBuilder { config: Config, catalog: Arc, ns: Arc, - shard: Arc, table: Arc, partition: Arc, files: Vec, @@ -88,7 +86,6 @@ impl TestSetupBuilder { pub async fn new() -> Self { let catalog = TestCatalog::new(); let ns = catalog.create_namespace_1hr_retention("ns").await; - let shard = ns.create_shard(SHARD_INDEX).await; let table = ns.create_table("table").await; table.create_column("field_int", ColumnType::I64).await; table.create_column("tag1", ColumnType::Tag).await; @@ -96,10 +93,7 @@ impl TestSetupBuilder { table.create_column("tag3", ColumnType::Tag).await; table.create_column("time", ColumnType::Time).await; - let partition = table - .with_shard(&shard) - .create_partition("2022-07-13") - .await; + let partition = table.create_partition("2022-07-13").await; // The sort key comes from the catalog and should be the union of all tags the // ingester has seen @@ -122,7 +116,6 @@ impl TestSetupBuilder { let config = Config { compaction_type: Default::default(), - shard_id: shard.shard.id, metric_registry: catalog.metric_registry(), catalog: catalog.catalog(), parquet_store_real: catalog.parquet_store.clone(), @@ -162,7 +155,6 @@ impl TestSetupBuilder { config, catalog, ns, - shard, table, partition, files: vec![], @@ -299,7 +291,6 @@ impl TestSetupBuilder { config: self.config, catalog: self.catalog, ns: self.ns, - shard: self.shard, table: self.table, partition: self.partition, files, @@ -333,7 +324,6 @@ impl TestSetupBuilder { config: self.config.clone(), catalog: Arc::clone(&self.catalog), ns: Arc::clone(&self.ns), - shard: Arc::clone(&self.shard), table: Arc::clone(&self.table), partition: Arc::clone(&self.partition), files, @@ -368,7 +358,6 @@ impl TestSetupBuilder { config: self.config.clone(), catalog: Arc::clone(&self.catalog), ns: Arc::clone(&self.ns), - shard: Arc::clone(&self.shard), table: Arc::clone(&self.table), partition: Arc::clone(&self.partition), files, diff --git a/compactor2_test_utils/src/simulator.rs b/compactor2_test_utils/src/simulator.rs index a90d6c1337..35bfbdd6f5 100644 --- a/compactor2_test_utils/src/simulator.rs +++ b/compactor2_test_utils/src/simulator.rs @@ -8,7 +8,7 @@ use std::{ use async_trait::async_trait; use data_types::{ - ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, SequenceNumber, ShardId, Timestamp, + ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, SequenceNumber, Timestamp, }; use datafusion::physical_plan::SendableRecordBatchStream; use iox_time::Time; @@ -202,7 +202,6 @@ impl SimulatedFile { } = self; ParquetFileParams { - shard_id: ShardId::new(1), namespace_id: partition_info.namespace_id, table_id: partition_info.table.id, partition_id: partition_info.partition_id, diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs index d6006761c3..30cf7775b8 100644 --- a/data_types/src/lib.rs +++ b/data_types/src/lib.rs @@ -24,7 +24,6 @@ use schema::{ builder::SchemaBuilder, sort::SortKey, InfluxColumnType, InfluxFieldType, Schema, TIME_COLUMN_NAME, }; -use serde::Deserialize; use sqlx::postgres::PgHasArrayType; use std::{ borrow::Borrow, @@ -38,13 +37,6 @@ use std::{ }; use uuid::Uuid; -/// Magic number to be used shard indices and shard ids in "kafkaless". -pub const TRANSITION_SHARD_NUMBER: i32 = 1234; -/// In kafkaless mode all new persisted data uses this shard id. -pub const TRANSITION_SHARD_ID: ShardId = ShardId::new(TRANSITION_SHARD_NUMBER as i64); -/// In kafkaless mode all new persisted data uses this shard index. -pub const TRANSITION_SHARD_INDEX: ShardIndex = ShardIndex::new(TRANSITION_SHARD_NUMBER); - /// Compaction levels #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, sqlx::Type)] #[repr(i16)] @@ -215,61 +207,6 @@ impl PgHasArrayType for ColumnId { } } -/// Unique ID for a `Shard`, assigned by the catalog. Joins to other catalog tables to uniquely -/// identify shards independently of the underlying write buffer implementation. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] -#[sqlx(transparent)] -pub struct ShardId(i64); - -#[allow(missing_docs)] -impl ShardId { - pub const fn new(v: i64) -> Self { - Self(v) - } - pub fn get(&self) -> i64 { - self.0 - } -} - -impl std::fmt::Display for ShardId { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -/// The index of the shard in the set of shards. When Kafka is used as the write buffer, this is -/// the Kafka Partition ID. Used by the router and write buffer to shard requests to a particular -/// index in a set of shards. -#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] -#[sqlx(transparent)] -#[serde(transparent)] -pub struct ShardIndex(i32); - -#[allow(missing_docs)] -impl ShardIndex { - pub const fn new(v: i32) -> Self { - Self(v) - } - pub fn get(&self) -> i32 { - self.0 - } -} - -impl std::fmt::Display for ShardIndex { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -impl std::str::FromStr for ShardIndex { - type Err = std::num::ParseIntError; - - fn from_str(s: &str) -> Result { - let v: i32 = s.parse()?; - Ok(Self(v)) - } -} - /// Unique ID for a `Partition` #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type, sqlx::FromRow)] #[sqlx(transparent)] @@ -769,24 +706,6 @@ pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType { } } -/// Data object for a shard. Only one shard record can exist for a given topic and shard -/// index (enforced via uniqueness constraint). -#[derive(Debug, Copy, Clone, PartialEq, Eq, sqlx::FromRow)] -pub struct Shard { - /// the id of the shard, assigned by the catalog - pub id: ShardId, - /// the topic the shard is reading from - pub topic_id: TopicId, - /// the shard index of the shard the sequence numbers are coming from, sharded by the router - /// and write buffer - pub shard_index: ShardIndex, - /// The minimum unpersisted sequence number. Because different tables - /// can be persisted at different times, it is possible some data has been persisted - /// with a higher sequence number than this. However, all data with a sequence number - /// lower than this must have been persisted to Parquet. - pub min_unpersisted_sequence_number: SequenceNumber, -} - /// Defines an partition via an arbitrary string within a table within /// a namespace. /// @@ -880,8 +799,6 @@ impl sqlx::Decode<'_, sqlx::Sqlite> for PartitionKey { pub struct Partition { /// the id of the partition pub id: PartitionId, - /// the shard the data in the partition arrived from - pub shard_id: ShardId, /// the table the partition is under pub table_id: TableId, /// the string key of the partition @@ -1020,8 +937,6 @@ impl Deref for ColumnSet { pub struct ParquetFile { /// the id of the file in the catalog pub id: ParquetFileId, - /// the shard that sequenced writes that went into this file - pub shard_id: ShardId, /// the namespace pub namespace_id: NamespaceId, /// the table @@ -1084,7 +999,6 @@ impl ParquetFile { pub fn from_params(params: ParquetFileParams, id: ParquetFileId) -> Self { Self { id, - shard_id: params.shard_id, namespace_id: params.namespace_id, table_id: params.table_id, partition_id: params.partition_id, @@ -1122,8 +1036,6 @@ impl ParquetFile { /// Data for a parquet file to be inserted into the catalog. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParquetFileParams { - /// the shard that sequenced writes that went into this file - pub shard_id: ShardId, /// the namespace pub namespace_id: NamespaceId, /// the table @@ -1155,7 +1067,6 @@ pub struct ParquetFileParams { impl From for ParquetFileParams { fn from(value: ParquetFile) -> Self { Self { - shard_id: value.shard_id, namespace_id: value.namespace_id, table_id: value.table_id, partition_id: value.partition_id, diff --git a/garbage_collector/src/objectstore/checker.rs b/garbage_collector/src/objectstore/checker.rs index 278b586d3c..af58c73b55 100644 --- a/garbage_collector/src/objectstore/checker.rs +++ b/garbage_collector/src/objectstore/checker.rs @@ -138,7 +138,7 @@ mod tests { use chrono::TimeZone; use data_types::{ ColumnId, ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileParams, - PartitionId, SequenceNumber, ShardId, ShardIndex, TableId, Timestamp, + PartitionId, SequenceNumber, TableId, Timestamp, }; use iox_catalog::{interface::Catalog, mem::MemCatalog}; use object_store::path::Path; @@ -167,19 +167,13 @@ mod tests { .create_or_get("test_table", namespace.id) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1)) - .await - .unwrap(); let partition = repos .partitions() - .create_or_get("one".into(), shard.id, table.id) + .create_or_get("one".into(), table.id) .await .unwrap(); let parquet_file_params = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace.id, table_id: partition.table_id, partition_id: partition.id, @@ -213,7 +207,6 @@ mod tests { let location = ParquetFilePath::new( file_in_catalog.namespace_id, file_in_catalog.table_id, - file_in_catalog.shard_id, file_in_catalog.partition_id, file_in_catalog.object_store_id, ) @@ -241,7 +234,6 @@ mod tests { let location = ParquetFilePath::new( NamespaceId::new(1), TableId::new(2), - ShardId::new(3), PartitionId::new(4), Uuid::new_v4(), ) @@ -287,7 +279,6 @@ mod tests { let location = ParquetFilePath::new( file_in_catalog.namespace_id, file_in_catalog.table_id, - file_in_catalog.shard_id, file_in_catalog.partition_id, file_in_catalog.object_store_id, ) @@ -315,7 +306,6 @@ mod tests { let location = ParquetFilePath::new( NamespaceId::new(1), TableId::new(2), - ShardId::new(3), PartitionId::new(4), Uuid::new_v4(), ) diff --git a/garbage_collector/src/objectstore/deleter.rs b/garbage_collector/src/objectstore/deleter.rs index a1255ca426..a78b6d3cc3 100644 --- a/garbage_collector/src/objectstore/deleter.rs +++ b/garbage_collector/src/objectstore/deleter.rs @@ -64,7 +64,7 @@ mod tests { use super::*; use bytes::Bytes; use chrono::Utc; - use data_types::{NamespaceId, PartitionId, ShardId, TableId}; + use data_types::{NamespaceId, PartitionId, TableId}; use object_store::path::Path; use parquet_file::ParquetFilePath; use std::time::Duration; @@ -146,7 +146,6 @@ mod tests { ParquetFilePath::new( NamespaceId::new(1), TableId::new(2), - ShardId::new(3), PartitionId::new(4), Uuid::new_v4(), ) diff --git a/generated_types/build.rs b/generated_types/build.rs index 93628288b7..585eef0a62 100644 --- a/generated_types/build.rs +++ b/generated_types/build.rs @@ -28,7 +28,6 @@ fn main() -> Result<()> { /// - `influxdata.iox.predicate.v1.rs` /// - `influxdata.iox.querier.v1.rs` /// - `influxdata.iox.schema.v1.rs` -/// - `influxdata.iox.sharder.v1.rs` /// - `influxdata.iox.wal.v1.rs` /// - `influxdata.iox.write.v1.rs` /// - `influxdata.iox.write_buffer.v1.rs` @@ -44,7 +43,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> { let predicate_path = root.join("influxdata/iox/predicate/v1"); let querier_path = root.join("influxdata/iox/querier/v1"); let schema_path = root.join("influxdata/iox/schema/v1"); - let sharder_path = root.join("influxdata/iox/sharder/v1"); let wal_path = root.join("influxdata/iox/wal/v1"); let write_buffer_path = root.join("influxdata/iox/write_buffer/v1"); let storage_path = root.join("influxdata/platform/storage"); @@ -71,7 +69,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> { root.join("grpc/health/v1/service.proto"), root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"), schema_path.join("service.proto"), - sharder_path.join("sharder.proto"), wal_path.join("wal.proto"), write_buffer_path.join("write_buffer.proto"), storage_path.join("predicate.proto"), diff --git a/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto b/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto index dbda394cf6..36f9c45543 100644 --- a/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto +++ b/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto @@ -16,6 +16,9 @@ message IoxMetadata { // Renamed to shard_id reserved 5; reserved "sequencer_id"; + // shard_id was removed + reserved 17; + reserved "shard_id"; // Object store ID. Used in the parquet filename. 16 bytes in big-endian order. bytes object_store_id = 1; @@ -29,9 +32,6 @@ message IoxMetadata { // Unique name of the namespace. string namespace_name = 4; - // Unique shard ID. - int64 shard_id = 17; - // Unique table ID. int64 table_id = 6; diff --git a/generated_types/protos/influxdata/iox/sharder/v1/sharder.proto b/generated_types/protos/influxdata/iox/sharder/v1/sharder.proto deleted file mode 100644 index f376ce972f..0000000000 --- a/generated_types/protos/influxdata/iox/sharder/v1/sharder.proto +++ /dev/null @@ -1,20 +0,0 @@ -syntax = "proto3"; -package influxdata.iox.sharder.v1; -option go_package = "github.com/influxdata/iox/sharder/v1"; - -service ShardService { - // Shard the given inputs to a Catalog ID for the destination Shard - // (Shard ID). - rpc MapToShard(MapToShardRequest) returns (MapToShardResponse); -} - -message MapToShardRequest { - // The input values to map onto a Shard. - string table_name = 1; - string namespace_name = 2; -} - -message MapToShardResponse { - int64 shard_id = 1; - int32 shard_index = 2; -} diff --git a/generated_types/src/lib.rs b/generated_types/src/lib.rs index 5f179a595d..5d274b578d 100644 --- a/generated_types/src/lib.rs +++ b/generated_types/src/lib.rs @@ -167,16 +167,6 @@ pub mod influxdata { } } - pub mod sharder { - pub mod v1 { - include!(concat!(env!("OUT_DIR"), "/influxdata.iox.sharder.v1.rs")); - include!(concat!( - env!("OUT_DIR"), - "/influxdata.iox.sharder.v1.serde.rs" - )); - } - } - pub mod wal { pub mod v1 { include!(concat!(env!("OUT_DIR"), "/influxdata.iox.wal.v1.rs")); diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs index 2f41741bbe..78caa606fe 100644 --- a/import/src/aggregate_tsm_schema/update_catalog.rs +++ b/import/src/aggregate_tsm_schema/update_catalog.rs @@ -1,11 +1,9 @@ -use self::generated_types::{shard_service_client::ShardServiceClient, *}; use crate::{AggregateTSMMeasurement, AggregateTSMSchema}; use chrono::{format::StrftimeItems, offset::FixedOffset, DateTime, Duration}; use data_types::{ ColumnType, Namespace, NamespaceName, NamespaceSchema, OrgBucketMappingError, Partition, - PartitionKey, QueryPoolId, ShardId, TableSchema, TopicId, + PartitionKey, QueryPoolId, TableSchema, TopicId, }; -use influxdb_iox_client::connection::{Connection, GrpcConnection}; use iox_catalog::interface::{ get_schema_by_name, CasFailure, Catalog, RepoCollection, SoftDeletedRows, }; @@ -16,10 +14,6 @@ use schema::{ use std::{collections::HashMap, fmt::Write, ops::DerefMut, sync::Arc}; use thiserror::Error; -pub mod generated_types { - pub use generated_types::influxdata::iox::sharder::v1::*; -} - #[derive(Debug, Error)] pub enum UpdateCatalogError { #[error("Error returned from the Catalog: {0}")] @@ -45,9 +39,6 @@ pub enum UpdateCatalogError { #[error("Time calculation error when deriving partition key: {0}")] PartitionKeyCalculationError(String), - - #[error("Error fetching shard ID from shard service: {0}")] - ShardServiceError(#[from] tonic::Status), } /// Given a merged schema, update the IOx catalog to either merge that schema into the existing one @@ -61,7 +52,6 @@ pub async fn update_iox_catalog<'a>( topic: &'a str, query_pool_name: &'a str, catalog: Arc, - connection: Connection, ) -> Result<(), UpdateCatalogError> { let namespace_name = NamespaceName::from_org_and_bucket(&merged_tsm_schema.org_id, &merged_tsm_schema.bucket_id) @@ -103,18 +93,8 @@ pub async fn update_iox_catalog<'a>( return Err(UpdateCatalogError::CatalogError(e)); } }; - // initialise a client of the shard service in the router. we will use it to find out which - // shard a table/namespace combo would shard to, without exposing the implementation - // details of the sharding - let mut shard_client = ShardServiceClient::new(connection.into_grpc_connection()); - update_catalog_schema_with_merged( - namespace_name.as_str(), - iox_schema, - merged_tsm_schema, - repos.deref_mut(), - &mut shard_client, - ) - .await?; + + update_catalog_schema_with_merged(iox_schema, merged_tsm_schema, repos.deref_mut()).await?; Ok(()) } @@ -179,11 +159,9 @@ where /// This is basically the same as iox_catalog::validate_mutable_batch() but operates on /// AggregateTSMSchema instead of a MutableBatch (we don't have any data, only a schema) async fn update_catalog_schema_with_merged( - namespace_name: &str, iox_schema: NamespaceSchema, merged_tsm_schema: &AggregateTSMSchema, repos: &mut R, - shard_client: &mut ShardServiceClient, ) -> Result<(), UpdateCatalogError> where R: RepoCollection + ?Sized, @@ -290,19 +268,12 @@ where // date, but this is what the router logic currently does so that would need to change too. let partition_keys = get_partition_keys_for_range(measurement.earliest_time, measurement.latest_time)?; - let response = shard_client - .map_to_shard(tonic::Request::new(MapToShardRequest { - table_name: measurement_name.clone(), - namespace_name: namespace_name.to_string(), - })) - .await?; - let shard_id = ShardId::new(response.into_inner().shard_id); for partition_key in partition_keys { // create the partition if it doesn't exist; new partitions get an empty sort key which // gets matched as `None`` in the code below let partition = repos .partitions() - .create_or_get(partition_key, shard_id, table.id) + .create_or_get(partition_key, table.id) .await .map_err(UpdateCatalogError::CatalogError)?; // get the sort key from the partition, if it exists. create it or update it as @@ -418,88 +389,12 @@ fn datetime_to_partition_key( #[cfg(test)] mod tests { - use super::{generated_types::shard_service_server::ShardService, *}; + use super::*; use crate::{AggregateTSMField, AggregateTSMTag}; use assert_matches::assert_matches; - use client_util::connection::Builder; use data_types::{PartitionId, TableId}; use iox_catalog::mem::MemCatalog; - use parking_lot::RwLock; - use std::{collections::HashSet, net::SocketAddr}; - use tokio::task::JoinHandle; - use tokio_stream::wrappers::TcpListenerStream; - use tonic::transport::Server; - - struct MockShardService { - requests: Arc>>, - reply_with: MapToShardResponse, - } - - impl MockShardService { - pub fn new(response: MapToShardResponse) -> Self { - MockShardService { - requests: Arc::new(RwLock::new(vec![])), - reply_with: response, - } - } - - /// Use to replace the next reply with the given response (not currently used but would be - /// handy for expanded tests) - #[allow(dead_code)] - pub fn with_reply(mut self, response: MapToShardResponse) -> Self { - self.reply_with = response; - self - } - - /// Get all the requests that were made to the mock (not currently used but would be handy - /// for expanded tests) - #[allow(dead_code)] - pub fn get_requests(&self) -> Arc>> { - Arc::clone(&self.requests) - } - } - - #[tonic::async_trait] - impl ShardService for MockShardService { - async fn map_to_shard( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.requests.write().push(request.into_inner()); - Ok(tonic::Response::new(self.reply_with.clone())) - } - } - - async fn create_test_shard_service( - response: MapToShardResponse, - ) -> ( - Connection, - JoinHandle<()>, - Arc>>, - ) { - let bind_addr = SocketAddr::new( - std::net::IpAddr::V4(std::net::Ipv4Addr::new(127, 0, 0, 1)), - 0, - ); - let socket = tokio::net::TcpListener::bind(bind_addr) - .await - .expect("failed to bind to socket in test"); - let bind_addr = socket.local_addr().unwrap(); - let sharder = MockShardService::new(response); - let requests = Arc::clone(&sharder.get_requests()); - let server = - Server::builder().add_service(shard_service_server::ShardServiceServer::new(sharder)); - let server = async move { - let stream = TcpListenerStream::new(socket); - server.serve_with_incoming(stream).await.ok(); - }; - let join_handle = tokio::task::spawn(server); - let connection = Builder::default() - .build(format!("http://{bind_addr}")) - .await - .expect("failed to connect to server"); - (connection, join_handle, requests) - } + use std::collections::HashSet; #[tokio::test] async fn needs_creating() { @@ -513,11 +408,6 @@ mod tests { .create_or_get("iox-shared") .await .expect("topic created"); - let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse { - shard_id: 0, - shard_index: 0, - }) - .await; let json = r#" { @@ -543,7 +433,6 @@ mod tests { "iox-shared", "iox-shared", Arc::clone(&catalog), - connection, ) .await .expect("schema update worked"); @@ -602,11 +491,6 @@ mod tests { .create_or_get("iox-shared") .await .expect("topic created"); - let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse { - shard_id: 0, - shard_index: 0, - }) - .await; // create namespace, table and columns for weather measurement let namespace = txn @@ -666,7 +550,6 @@ mod tests { "iox-shared", "iox-shared", Arc::clone(&catalog), - connection, ) .await .expect("schema update worked"); @@ -710,11 +593,6 @@ mod tests { .create_or_get("iox-shared") .await .expect("topic created"); - let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse { - shard_id: 0, - shard_index: 0, - }) - .await; // create namespace, table and columns for weather measurement let namespace = txn @@ -767,7 +645,6 @@ mod tests { "iox-shared", "iox-shared", Arc::clone(&catalog), - connection, ) .await .expect_err("should fail catalog update"); @@ -790,11 +667,6 @@ mod tests { .create_or_get("iox-shared") .await .expect("topic created"); - let (connection, _join_handle, _requests) = create_test_shard_service(MapToShardResponse { - shard_id: 0, - shard_index: 0, - }) - .await; // create namespace, table and columns for weather measurement let namespace = txn @@ -846,7 +718,6 @@ mod tests { "iox-shared", "iox-shared", Arc::clone(&catalog), - connection, ) .await .expect_err("should fail catalog update"); @@ -856,85 +727,6 @@ mod tests { )); } - #[tokio::test] - async fn shard_lookup() { - // init a test catalog stack - let metrics = Arc::new(metric::Registry::default()); - let catalog: Arc = Arc::new(MemCatalog::new(Arc::clone(&metrics))); - catalog - .repositories() - .await - .topics() - .create_or_get("iox-shared") - .await - .expect("topic created"); - let (connection, _join_handle, requests) = create_test_shard_service(MapToShardResponse { - shard_id: 0, - shard_index: 0, - }) - .await; - - let json = r#" - { - "org_id": "1234", - "bucket_id": "5678", - "measurements": { - "cpu": { - "tags": [ - { "name": "host", "values": ["server", "desktop"] } - ], - "fields": [ - { "name": "usage", "types": ["Float"] } - ], - "earliest_time": "2022-01-01T00:00:00.00Z", - "latest_time": "2022-07-07T06:00:00.00Z" - }, - "weather": { - "tags": [ - ], - "fields": [ - { "name": "temperature", "types": ["Integer"] } - ], - "earliest_time": "2022-01-01T00:00:00.00Z", - "latest_time": "2022-07-07T06:00:00.00Z" - } - } - } - "#; - let agg_schema: AggregateTSMSchema = json.try_into().unwrap(); - update_iox_catalog( - &agg_schema, - "iox-shared", - "iox-shared", - Arc::clone(&catalog), - connection, - ) - .await - .expect("schema update worked"); - // check that a request was made for the two shard lookups for the tables - let requests = requests.read(); - assert_eq!(requests.len(), 2); - let cpu_req = requests - .iter() - .find(|r| r.table_name == "cpu") - .expect("cpu request missing from mock"); - assert_eq!( - (cpu_req.namespace_name.as_str(), cpu_req.table_name.as_str()), - ("1234_5678", "cpu"), - ); - let weather_req = requests - .iter() - .find(|r| r.table_name == "weather") - .expect("weather request missing from mock"); - assert_eq!( - ( - weather_req.namespace_name.as_str(), - weather_req.table_name.as_str() - ), - ("1234_5678", "weather"), - ); - } - #[tokio::test] async fn partition_keys_from_datetime_range_midday_to_midday() { let earliest_time = DateTime::parse_from_rfc3339("2022-10-30T12:00:00+00:00") @@ -1230,7 +1022,6 @@ mod tests { }; let partition = Partition { id: PartitionId::new(1), - shard_id: ShardId::new(1), table_id: TableId::new(1), persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), @@ -1278,7 +1069,6 @@ mod tests { }; let partition = Partition { id: PartitionId::new(1), - shard_id: ShardId::new(1), table_id: TableId::new(1), persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), @@ -1326,7 +1116,6 @@ mod tests { }; let partition = Partition { id: PartitionId::new(1), - shard_id: ShardId::new(1), table_id: TableId::new(1), persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), @@ -1376,7 +1165,6 @@ mod tests { }; let partition = Partition { id: PartitionId::new(1), - shard_id: ShardId::new(1), table_id: TableId::new(1), persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), diff --git a/influxdb_iox/src/commands/import/mod.rs b/influxdb_iox/src/commands/import/mod.rs index 134f705647..2fb94ba509 100644 --- a/influxdb_iox/src/commands/import/mod.rs +++ b/influxdb_iox/src/commands/import/mod.rs @@ -1,4 +1,3 @@ -use influxdb_iox_client::connection::Connection; use thiserror::Error; mod schema; @@ -23,9 +22,9 @@ pub enum Command { } /// Handle variants of the schema command. -pub async fn command(connection: Connection, config: Config) -> Result<(), ImportError> { +pub async fn command(config: Config) -> Result<(), ImportError> { match config.command { - Command::Schema(schema_config) => schema::command(connection, *schema_config) + Command::Schema(schema_config) => schema::command(*schema_config) .await .map_err(ImportError::SchemaError), } diff --git a/influxdb_iox/src/commands/import/schema.rs b/influxdb_iox/src/commands/import/schema.rs index 74ed41eb41..193ff96d89 100644 --- a/influxdb_iox/src/commands/import/schema.rs +++ b/influxdb_iox/src/commands/import/schema.rs @@ -10,7 +10,6 @@ use clap_blocks::{ catalog_dsn::CatalogDsnConfig, object_store::{make_object_store, ObjectStoreConfig}, }; -use influxdb_iox_client::connection::Connection; use iox_time::{SystemProvider, TimeProvider}; use object_store::{path::Path, DynObjectStore}; use object_store_metrics::ObjectStoreMetrics; @@ -133,7 +132,7 @@ pub struct MergeConfig { } /// Entry-point for the schema command -pub async fn command(connection: Connection, config: Config) -> Result<(), SchemaCommandError> { +pub async fn command(config: Config) -> Result<(), SchemaCommandError> { match config { Config::Merge(merge_config) => { let time_provider = Arc::new(SystemProvider::new()) as Arc; @@ -198,7 +197,6 @@ pub async fn command(connection: Connection, config: Config) -> Result<(), Schem &merge_config.topic, &merge_config.query_pool_name, Arc::clone(&catalog), - connection.clone(), ) .await?; diff --git a/influxdb_iox/src/commands/remote/partition.rs b/influxdb_iox/src/commands/remote/partition.rs index b25fe3c1db..c40f22b30f 100644 --- a/influxdb_iox/src/commands/remote/partition.rs +++ b/influxdb_iox/src/commands/remote/partition.rs @@ -5,8 +5,8 @@ use clap_blocks::object_store::{make_object_store, ObjectStoreType}; use clap_blocks::{catalog_dsn::CatalogDsnConfig, object_store::ObjectStoreConfig}; use data_types::{ ColumnId, ColumnSet, ColumnType, NamespaceId, NamespaceSchema as CatalogNamespaceSchema, - ParquetFile as CatalogParquetFile, ParquetFileParams, PartitionId, SequenceNumber, ShardId, - TableId, Timestamp, TRANSITION_SHARD_INDEX, + ParquetFile as CatalogParquetFile, ParquetFileParams, PartitionId, SequenceNumber, TableId, + Timestamp, }; use futures::future::join_all; use influxdb_iox_client::{ @@ -172,7 +172,6 @@ pub async fn command(connection: Connection, config: Config) -> Result<(), Error let path = ParquetFilePath::new( parquet_file.namespace_id, parquet_file.table_id, - parquet_file.shard_id, parquet_file.partition_id, parquet_file.object_store_id, ); @@ -242,11 +241,6 @@ async fn load_schema( let mut repos = catalog.repositories().await; let topic = repos.topics().create_or_get(TOPIC_NAME).await?; let query_pool = repos.query_pools().create_or_get(QUERY_POOL).await?; - // ensure there's a shard for this partition so it can be used later - let _shard = repos - .shards() - .create_or_get(&topic, TRANSITION_SHARD_INDEX) - .await?; let namespace = match repos .namespaces() @@ -307,27 +301,16 @@ async fn load_partition( remote_partition: &Partition, ) -> Result { let mut repos = catalog.repositories().await; - let topic = repos - .topics() - .get_by_name(TOPIC_NAME) - .await? - .expect("topic should have been inserted earlier"); - let shard = repos - .shards() - .get_by_topic_id_and_shard_index(topic.id, TRANSITION_SHARD_INDEX) - .await? - .expect("shard should have been inserted earlier"); let table = schema .tables .get(table_name) .expect("table should have been loaded"); let partition = repos .partitions() - .create_or_get(remote_partition.key.clone().into(), shard.id, table.id) + .create_or_get(remote_partition.key.clone().into(), table.id) .await?; Ok(PartitionMapping { - shard_id: shard.id, table_id: table.id, partition_id: partition.id, remote_partition_id: remote_partition.id, @@ -353,7 +336,6 @@ async fn load_parquet_files( None => { println!("creating file {uuid} in catalog"); let params = ParquetFileParams { - shard_id: partition_mapping.shard_id, namespace_id, table_id: partition_mapping.table_id, partition_id: partition_mapping.partition_id, @@ -382,9 +364,8 @@ async fn load_parquet_files( Ok(files) } -// keeps a mapping of the locally created partition and shard to the remote partition id +// keeps a mapping of the locally created partition to the remote partition id struct PartitionMapping { - shard_id: ShardId, table_id: TableId, partition_id: PartitionId, remote_partition_id: i64, @@ -518,7 +499,6 @@ mod tests { async fn load_parquet_files() { let metrics = Arc::new(metric::Registry::new()); let catalog: Arc = Arc::new(MemCatalog::new(Arc::clone(&metrics))); - let shard; let namespace; let table; let partition; @@ -527,11 +507,6 @@ mod tests { let mut repos = catalog.repositories().await; let topic = repos.topics().create_or_get(TOPIC_NAME).await.unwrap(); let query_pool = repos.query_pools().create_or_get(QUERY_POOL).await.unwrap(); - shard = repos - .shards() - .create_or_get(&topic, TRANSITION_SHARD_INDEX) - .await - .unwrap(); namespace = repos .namespaces() .create("load_parquet_files", None, topic.id, query_pool.id) @@ -544,13 +519,12 @@ mod tests { .unwrap(); partition = repos .partitions() - .create_or_get("1970-01-01".into(), shard.id, table.id) + .create_or_get("1970-01-01".into(), table.id) .await .unwrap(); } let partition_mapping = PartitionMapping { - shard_id: shard.id, table_id: table.id, partition_id: partition.id, remote_partition_id: 4, @@ -589,12 +563,11 @@ mod tests { .await .unwrap(); - // the inserted parquet file should have shard, namespace, table, and partition ids + // the inserted parquet file should have namespace, table, and partition ids // that match with the ones in the catalog, not the remote. The other values should // match those of the remote. let expected = vec![CatalogParquetFile { id: ParquetFileId::new(1), - shard_id: shard.id, namespace_id: namespace.id, table_id: table.id, partition_id: partition.id, diff --git a/influxdb_iox/src/main.rs b/influxdb_iox/src/main.rs index c525be6ec5..b71da1bb9c 100644 --- a/influxdb_iox/src/main.rs +++ b/influxdb_iox/src/main.rs @@ -375,8 +375,7 @@ fn main() -> Result<(), std::io::Error> { } Some(Command::Import(config)) => { let _tracing_guard = handle_init_logs(init_simple_logs(log_verbose_count)); - let connection = connection(grpc_host).await; - if let Err(e) = commands::import::command(connection, config).await { + if let Err(e) = commands::import::command(config).await { eprintln!("{e}"); std::process::exit(ReturnCode::Failure as _) } diff --git a/ingester2/src/buffer_tree/namespace.rs b/ingester2/src/buffer_tree/namespace.rs index 7a02f2bb79..d51d75d6d7 100644 --- a/ingester2/src/buffer_tree/namespace.rs +++ b/ingester2/src/buffer_tree/namespace.rs @@ -5,7 +5,7 @@ pub(crate) mod name_resolver; use std::sync::Arc; use async_trait::async_trait; -use data_types::{NamespaceId, ShardId, TableId}; +use data_types::{NamespaceId, TableId}; use dml::DmlOperation; use metric::U64Counter; use observability_deps::tracing::warn; @@ -52,7 +52,7 @@ impl std::fmt::Display for NamespaceName { } } -/// Data of a Namespace that belongs to a given Shard +/// Data of a Namespace #[derive(Debug)] pub(crate) struct NamespaceData { namespace_id: NamespaceId, @@ -77,8 +77,6 @@ pub(crate) struct NamespaceData { partition_provider: Arc, post_write_observer: Arc, - - transition_shard_id: ShardId, } impl NamespaceData { @@ -90,7 +88,6 @@ impl NamespaceData { partition_provider: Arc, post_write_observer: Arc, metrics: &metric::Registry, - transition_shard_id: ShardId, ) -> Self { let table_count = metrics .register_metric::( @@ -107,7 +104,6 @@ impl NamespaceData { table_count, partition_provider, post_write_observer, - transition_shard_id, } } @@ -144,10 +140,7 @@ where type Error = mutable_batch::Error; async fn apply(&self, op: DmlOperation) -> Result<(), Self::Error> { - let sequence_number = op - .meta() - .sequence() - .expect("applying unsequenced op"); + let sequence_number = op.meta().sequence().expect("applying unsequenced op"); match op { DmlOperation::Write(write) => { @@ -166,7 +159,6 @@ where Arc::clone(&self.namespace_name), Arc::clone(&self.partition_provider), Arc::clone(&self.post_write_observer), - self.transition_shard_id, )) }); @@ -230,7 +222,6 @@ where mod tests { use std::sync::Arc; - use data_types::TRANSITION_SHARD_ID; use metric::{Attributes, Metric}; use super::*; @@ -264,7 +255,6 @@ mod tests { partition_provider, Arc::new(MockPostWriteObserver::default()), &metrics, - TRANSITION_SHARD_ID, ); // Assert the namespace name was stored diff --git a/ingester2/src/buffer_tree/namespace/name_resolver.rs b/ingester2/src/buffer_tree/namespace/name_resolver.rs index 9d096e4c5e..b12dc91680 100644 --- a/ingester2/src/buffer_tree/namespace/name_resolver.rs +++ b/ingester2/src/buffer_tree/namespace/name_resolver.rs @@ -102,13 +102,11 @@ pub(crate) mod mock { mod tests { use std::sync::Arc; - use data_types::ShardIndex; use test_helpers::timeout::FutureTimeout; use super::*; use crate::test_util::populate_catalog; - const SHARD_INDEX: ShardIndex = ShardIndex::new(24); const TABLE_NAME: &str = "bananas"; const NAMESPACE_NAME: &str = "platanos"; @@ -119,9 +117,8 @@ mod tests { let catalog: Arc = Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics))); - // Populate the catalog with the shard / namespace / table - let (_shard_id, ns_id, _table_id) = - populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await; + // Populate the catalog with the namespace / table + let (ns_id, _table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await; let fetcher = Arc::new(NamespaceNameResolver::new( Duration::from_secs(10), diff --git a/ingester2/src/buffer_tree/partition.rs b/ingester2/src/buffer_tree/partition.rs index 833a4e264a..2e46bcdd0c 100644 --- a/ingester2/src/buffer_tree/partition.rs +++ b/ingester2/src/buffer_tree/partition.rs @@ -4,7 +4,7 @@ use std::{collections::VecDeque, sync::Arc}; use data_types::{ sequence_number_set::SequenceNumberSet, NamespaceId, PartitionId, PartitionKey, SequenceNumber, - ShardId, TableId, + TableId, }; use mutable_batch::MutableBatch; use observability_deps::tracing::*; @@ -41,8 +41,7 @@ impl SortKeyState { } } -/// Data of an IOx Partition of a given Table of a Namespace that belongs to a -/// given Shard +/// Data of an IOx Partition of a given Table of a Namespace #[derive(Debug)] pub struct PartitionData { /// The catalog ID of the partition this buffer is for. @@ -92,8 +91,6 @@ pub struct PartitionData { /// The number of persist operations completed over the lifetime of this /// [`PartitionData`]. completed_persistence_count: u64, - - transition_shard_id: ShardId, } impl PartitionData { @@ -107,7 +104,6 @@ impl PartitionData { table_id: TableId, table_name: Arc>, sort_key: SortKeyState, - transition_shard_id: ShardId, ) -> Self { Self { partition_id: id, @@ -121,7 +117,6 @@ impl PartitionData { persisting: VecDeque::with_capacity(1), started_persistence_count: BatchIdent::default(), completed_persistence_count: 0, - transition_shard_id, } } @@ -305,11 +300,6 @@ impl PartitionData { &self.partition_key } - /// Return the transition_shard_id for this partition. - pub(crate) fn transition_shard_id(&self) -> ShardId { - self.transition_shard_id - } - /// Return the [`NamespaceId`] this partition is a part of. pub(crate) fn namespace_id(&self) -> NamespaceId { self.namespace_id @@ -347,7 +337,6 @@ mod tests { use arrow_util::assert_batches_eq; use assert_matches::assert_matches; use backoff::BackoffConfig; - use data_types::ShardIndex; use datafusion::{ physical_expr::PhysicalSortExpr, physical_plan::{expressions::col, memory::MemoryExec, ExecutionPlan}, @@ -944,15 +933,14 @@ mod tests { let catalog: Arc = Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics))); - // Populate the catalog with the shard / namespace / table - let (shard_id, _ns_id, table_id) = - populate_catalog(&*catalog, ShardIndex::new(1), "bananas", "platanos").await; + // Populate the catalog with the namespace / table + let (_ns_id, table_id) = populate_catalog(&*catalog, "bananas", "platanos").await; let partition_id = catalog .repositories() .await .partitions() - .create_or_get("test".into(), shard_id, table_id) + .create_or_get("test".into(), table_id) .await .expect("should create") .id; diff --git a/ingester2/src/buffer_tree/partition/resolver/cache.rs b/ingester2/src/buffer_tree/partition/resolver/cache.rs index fc49b62874..a4cf93e9d6 100644 --- a/ingester2/src/buffer_tree/partition/resolver/cache.rs +++ b/ingester2/src/buffer_tree/partition/resolver/cache.rs @@ -2,9 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; use async_trait::async_trait; use backoff::BackoffConfig; -use data_types::{ - NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId, -}; +use data_types::{NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, TableId}; use iox_catalog::interface::Catalog; use observability_deps::tracing::debug; use parking_lot::Mutex; @@ -166,7 +164,6 @@ where namespace_name: Arc>, table_id: TableId, table_name: Arc>, - transition_shard_id: ShardId, ) -> Arc> { // Use the cached PartitionKey instead of the caller's partition_key, // instead preferring to reuse the already-shared Arc in the cache. @@ -196,7 +193,6 @@ where table_id, table_name, SortKeyState::Deferred(Arc::new(sort_key_resolver)), - transition_shard_id, ))); } @@ -210,7 +206,6 @@ where namespace_name, table_id, table_name, - transition_shard_id, ) .await } @@ -221,7 +216,6 @@ mod tests { // Harmless in tests - saves a bunch of extra vars. #![allow(clippy::await_holding_lock)] - use data_types::{ShardId, TRANSITION_SHARD_ID}; use iox_catalog::mem::MemCatalog; use super::*; @@ -264,7 +258,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ) .await; @@ -302,7 +295,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ) .await; @@ -354,7 +346,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ) .await; @@ -385,7 +376,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), other_table, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ) .await; diff --git a/ingester2/src/buffer_tree/partition/resolver/catalog.rs b/ingester2/src/buffer_tree/partition/resolver/catalog.rs index e00103b877..eb5b69160c 100644 --- a/ingester2/src/buffer_tree/partition/resolver/catalog.rs +++ b/ingester2/src/buffer_tree/partition/resolver/catalog.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use async_trait::async_trait; use backoff::{Backoff, BackoffConfig}; -use data_types::{NamespaceId, Partition, PartitionKey, ShardId, TableId}; +use data_types::{NamespaceId, Partition, PartitionKey, TableId}; use iox_catalog::interface::Catalog; use observability_deps::tracing::debug; use parking_lot::Mutex; @@ -43,13 +43,12 @@ impl CatalogPartitionResolver { &self, partition_key: PartitionKey, table_id: TableId, - transition_shard_id: ShardId, ) -> Result { self.catalog .repositories() .await .partitions() - .create_or_get(partition_key, transition_shard_id, table_id) + .create_or_get(partition_key, table_id) .await } } @@ -63,18 +62,16 @@ impl PartitionProvider for CatalogPartitionResolver { namespace_name: Arc>, table_id: TableId, table_name: Arc>, - transition_shard_id: ShardId, ) -> Arc> { debug!( %partition_key, %table_id, %table_name, - %transition_shard_id, "upserting partition in catalog" ); let p = Backoff::new(&self.backoff_config) .retry_all_errors("resolve partition", || { - self.get(partition_key.clone(), table_id, transition_shard_id) + self.get(partition_key.clone(), table_id) }) .await .expect("retry forever"); @@ -90,7 +87,6 @@ impl PartitionProvider for CatalogPartitionResolver { table_id, table_name, SortKeyState::Provided(p.sort_key()), - transition_shard_id, ))) } } @@ -103,7 +99,6 @@ mod tests { use std::{sync::Arc, time::Duration}; use assert_matches::assert_matches; - use data_types::ShardIndex; use super::*; @@ -117,7 +112,7 @@ mod tests { let catalog: Arc = Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics))); - let (shard_id, namespace_id, table_id) = { + let (namespace_id, table_id) = { let mut repos = catalog.repositories().await; let t = repos.topics().create_or_get("platanos").await.unwrap(); let q = repos.query_pools().create_or_get("platanos").await.unwrap(); @@ -127,19 +122,13 @@ mod tests { .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&t, ShardIndex::new(0)) - .await - .unwrap(); - let table = repos .tables() .create_or_get(TABLE_NAME, ns.id) .await .unwrap(); - (shard.id, ns.id, table.id) + (ns.id, table.id) }; let callers_partition_key = PartitionKey::from(PARTITION_KEY); @@ -156,7 +145,6 @@ mod tests { Arc::new(DeferredLoad::new(Duration::from_secs(1), async { TableName::from(TABLE_NAME) })), - shard_id, ) .await; diff --git a/ingester2/src/buffer_tree/partition/resolver/coalesce.rs b/ingester2/src/buffer_tree/partition/resolver/coalesce.rs index 099e271330..eca50cd5a7 100644 --- a/ingester2/src/buffer_tree/partition/resolver/coalesce.rs +++ b/ingester2/src/buffer_tree/partition/resolver/coalesce.rs @@ -8,7 +8,7 @@ use std::{ use arrow::compute::kernels::partition; use async_trait::async_trait; -use data_types::{NamespaceId, PartitionKey, ShardId, TableId}; +use data_types::{NamespaceId, PartitionKey, TableId}; use futures::{future::Shared, FutureExt}; use hashbrown::{hash_map::Entry, HashMap}; use parking_lot::Mutex; @@ -147,7 +147,6 @@ where namespace_name: Arc>, table_id: TableId, table_name: Arc>, - transition_shard_id: ShardId, ) -> Arc> { let key = Key { namespace_id, @@ -172,7 +171,6 @@ where namespace_name, table_id, table_name, - transition_shard_id, )); // Make the future poll-able by many callers, all of which @@ -236,7 +234,6 @@ async fn do_fetch( namespace_name: Arc>, table_id: TableId, table_name: Arc>, - transition_shard_id: ShardId, ) -> Arc> where T: PartitionProvider + 'static, @@ -257,7 +254,6 @@ where namespace_name, table_id, table_name, - transition_shard_id, ) .await }) @@ -275,7 +271,7 @@ mod tests { }; use assert_matches::assert_matches; - use data_types::{PartitionId, TRANSITION_SHARD_ID}; + use data_types::PartitionId; use futures::Future; use futures::{stream::FuturesUnordered, StreamExt}; use lazy_static::lazy_static; @@ -314,7 +310,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ) }) .collect::>() @@ -349,7 +344,6 @@ mod tests { _namespace_name: Arc>, _table_id: TableId, _table_name: Arc>, - _transition_shard_id: ShardId, ) -> core::pin::Pin< Box< dyn core::future::Future>> @@ -390,7 +384,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ); let pa_2 = layer.get_partition( ARBITRARY_PARTITION_KEY.clone(), @@ -398,7 +391,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ); let waker = futures::task::noop_waker(); @@ -419,7 +411,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ) .with_timeout_panic(Duration::from_secs(5)) .await; @@ -450,7 +441,6 @@ mod tests { _namespace_name: Arc>, _table_id: TableId, _table_name: Arc>, - _transition_shard_id: ShardId, ) -> Arc> { let waker = self.wait.notified(); let permit = self.sem.acquire().await.unwrap(); @@ -491,7 +481,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ); let waker = futures::task::noop_waker(); diff --git a/ingester2/src/buffer_tree/partition/resolver/mock.rs b/ingester2/src/buffer_tree/partition/resolver/mock.rs index 84276fe61c..f5ca824bd5 100644 --- a/ingester2/src/buffer_tree/partition/resolver/mock.rs +++ b/ingester2/src/buffer_tree/partition/resolver/mock.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, sync::Arc}; use async_trait::async_trait; -use data_types::{NamespaceId, PartitionKey, ShardId, TableId}; +use data_types::{NamespaceId, PartitionKey, TableId}; use parking_lot::Mutex; use super::r#trait::PartitionProvider; @@ -54,7 +54,6 @@ impl PartitionProvider for MockPartitionProvider { namespace_name: Arc>, table_id: TableId, table_name: Arc>, - _transition_shard_id: ShardId, ) -> Arc> { let p = self .partitions diff --git a/ingester2/src/buffer_tree/partition/resolver/sort_key.rs b/ingester2/src/buffer_tree/partition/resolver/sort_key.rs index 1c8b699e6f..71e898f140 100644 --- a/ingester2/src/buffer_tree/partition/resolver/sort_key.rs +++ b/ingester2/src/buffer_tree/partition/resolver/sort_key.rs @@ -59,12 +59,9 @@ impl SortKeyResolver { mod tests { use std::sync::Arc; - use data_types::ShardIndex; - use super::*; use crate::test_util::populate_catalog; - const SHARD_INDEX: ShardIndex = ShardIndex::new(24); const TABLE_NAME: &str = "bananas"; const NAMESPACE_NAME: &str = "platanos"; const PARTITION_KEY: &str = "platanos"; @@ -76,15 +73,14 @@ mod tests { let catalog: Arc = Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics))); - // Populate the catalog with the shard / namespace / table - let (shard_id, _ns_id, table_id) = - populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await; + // Populate the catalog with the namespace / table + let (_ns_id, table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await; let partition_id = catalog .repositories() .await .partitions() - .create_or_get(PARTITION_KEY.into(), shard_id, table_id) + .create_or_get(PARTITION_KEY.into(), table_id) .await .expect("should create") .id; diff --git a/ingester2/src/buffer_tree/partition/resolver/trait.rs b/ingester2/src/buffer_tree/partition/resolver/trait.rs index 417640634a..9075b0ec71 100644 --- a/ingester2/src/buffer_tree/partition/resolver/trait.rs +++ b/ingester2/src/buffer_tree/partition/resolver/trait.rs @@ -1,7 +1,7 @@ use std::{fmt::Debug, sync::Arc}; use async_trait::async_trait; -use data_types::{NamespaceId, PartitionKey, ShardId, TableId}; +use data_types::{NamespaceId, PartitionKey, TableId}; use parking_lot::Mutex; use crate::{ @@ -25,7 +25,6 @@ pub(crate) trait PartitionProvider: Send + Sync + Debug { namespace_name: Arc>, table_id: TableId, table_name: Arc>, - transition_shard_id: ShardId, ) -> Arc>; } @@ -41,7 +40,6 @@ where namespace_name: Arc>, table_id: TableId, table_name: Arc>, - transition_shard_id: ShardId, ) -> Arc> { (**self) .get_partition( @@ -50,7 +48,6 @@ where namespace_name, table_id, table_name, - transition_shard_id, ) .await } @@ -60,8 +57,6 @@ where mod tests { use std::{sync::Arc, time::Duration}; - use data_types::{PartitionId, ShardId, TRANSITION_SHARD_ID}; - use super::*; use crate::{ buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState}, @@ -85,7 +80,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), ARBITRARY_TABLE_ID, Arc::clone(&*DEFER_TABLE_NAME_1_SEC), - TRANSITION_SHARD_ID, ) .await; assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID); diff --git a/ingester2/src/buffer_tree/root.rs b/ingester2/src/buffer_tree/root.rs index c680b5171a..bc9db2dcee 100644 --- a/ingester2/src/buffer_tree/root.rs +++ b/ingester2/src/buffer_tree/root.rs @@ -1,7 +1,7 @@ use std::{fmt::Debug, sync::Arc}; use async_trait::async_trait; -use data_types::{NamespaceId, ShardId, TableId}; +use data_types::{NamespaceId, TableId}; use dml::DmlOperation; use metric::U64Counter; use parking_lot::Mutex; @@ -103,7 +103,6 @@ pub(crate) struct BufferTree { namespace_count: U64Counter, post_write_observer: Arc, - transition_shard_id: ShardId, } impl BufferTree @@ -117,7 +116,6 @@ where partition_provider: Arc, post_write_observer: Arc, metrics: Arc, - transition_shard_id: ShardId, ) -> Self { let namespace_count = metrics .register_metric::( @@ -134,7 +132,6 @@ where partition_provider, post_write_observer, namespace_count, - transition_shard_id, } } @@ -185,7 +182,6 @@ where Arc::clone(&self.partition_provider), Arc::clone(&self.post_write_observer), &self.metrics, - self.transition_shard_id, )) }); @@ -234,7 +230,7 @@ mod tests { use std::{sync::Arc, time::Duration}; use assert_matches::assert_matches; - use data_types::{PartitionId, PartitionKey, TRANSITION_SHARD_ID}; + use data_types::{PartitionId, PartitionKey}; use datafusion::{assert_batches_eq, assert_batches_sorted_eq}; use futures::{StreamExt, TryStreamExt}; use metric::{Attributes, Metric}; @@ -274,7 +270,6 @@ mod tests { partition_provider, Arc::new(MockPostWriteObserver::default()), &metrics, - TRANSITION_SHARD_ID, ); // Assert the namespace name was stored @@ -351,7 +346,6 @@ mod tests { partition_provider, Arc::new(MockPostWriteObserver::default()), Arc::new(metric::Registry::default()), - TRANSITION_SHARD_ID, ); // Write the provided DmlWrites @@ -628,7 +622,6 @@ mod tests { partition_provider, Arc::new(MockPostWriteObserver::default()), Arc::clone(&metrics), - TRANSITION_SHARD_ID, ); // Write data to partition p1, in the arbitrary table @@ -725,7 +718,6 @@ mod tests { partition_provider, Arc::new(MockPostWriteObserver::default()), Arc::clone(&Arc::new(metric::Registry::default())), - TRANSITION_SHARD_ID, ); assert_eq!(buf.partitions().count(), 0); @@ -808,7 +800,6 @@ mod tests { partition_provider, Arc::new(MockPostWriteObserver::default()), Arc::new(metric::Registry::default()), - TRANSITION_SHARD_ID, ); // Query the empty tree @@ -894,7 +885,6 @@ mod tests { partition_provider, Arc::new(MockPostWriteObserver::default()), Arc::new(metric::Registry::default()), - TRANSITION_SHARD_ID, ); // Write data to partition p1, in the arbitrary table diff --git a/ingester2/src/buffer_tree/table.rs b/ingester2/src/buffer_tree/table.rs index 450a15355c..fe2f9272ed 100644 --- a/ingester2/src/buffer_tree/table.rs +++ b/ingester2/src/buffer_tree/table.rs @@ -5,7 +5,7 @@ pub(crate) mod name_resolver; use std::{fmt::Debug, sync::Arc}; use async_trait::async_trait; -use data_types::{NamespaceId, PartitionKey, SequenceNumber, ShardId, TableId}; +use data_types::{NamespaceId, PartitionKey, SequenceNumber, TableId}; use datafusion_util::MemoryStream; use mutable_batch::MutableBatch; use parking_lot::Mutex; @@ -66,7 +66,7 @@ impl PartialEq for TableName { } } -/// Data of a Table in a given Namesapce that belongs to a given Shard +/// Data of a Table in a given Namesapce #[derive(Debug)] pub(crate) struct TableData { table_id: TableId, @@ -84,7 +84,6 @@ pub(crate) struct TableData { partition_data: ArcMap>, post_write_observer: Arc, - transition_shard_id: ShardId, } impl TableData { @@ -100,7 +99,6 @@ impl TableData { namespace_name: Arc>, partition_provider: Arc, post_write_observer: Arc, - transition_shard_id: ShardId, ) -> Self { Self { table_id, @@ -110,7 +108,6 @@ impl TableData { partition_data: Default::default(), partition_provider, post_write_observer, - transition_shard_id, } } @@ -171,7 +168,6 @@ where Arc::clone(&self.namespace_name), self.table_id, Arc::clone(&self.table_name), - self.transition_shard_id, ) .await; // Add the partition to the map. @@ -262,7 +258,6 @@ where mod tests { use std::sync::Arc; - use data_types::TRANSITION_SHARD_ID; use mutable_batch_lp::lines_to_batches; use super::*; @@ -292,7 +287,6 @@ mod tests { Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC), partition_provider, Arc::new(MockPostWriteObserver::default()), - TRANSITION_SHARD_ID, ); let batch = lines_to_batches( diff --git a/ingester2/src/buffer_tree/table/name_resolver.rs b/ingester2/src/buffer_tree/table/name_resolver.rs index 576e7ce6cb..b822d2c9c0 100644 --- a/ingester2/src/buffer_tree/table/name_resolver.rs +++ b/ingester2/src/buffer_tree/table/name_resolver.rs @@ -103,13 +103,11 @@ pub(crate) mod mock { mod tests { use std::sync::Arc; - use data_types::ShardIndex; use test_helpers::timeout::FutureTimeout; use super::*; use crate::test_util::populate_catalog; - const SHARD_INDEX: ShardIndex = ShardIndex::new(24); const TABLE_NAME: &str = "bananas"; const NAMESPACE_NAME: &str = "platanos"; @@ -120,9 +118,8 @@ mod tests { let catalog: Arc = Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics))); - // Populate the catalog with the shard / namespace / table - let (_shard_id, _ns_id, table_id) = - populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await; + // Populate the catalog with the namespace / table + let (_ns_id, table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await; let fetcher = Arc::new(TableNameResolver::new( Duration::from_secs(10), diff --git a/ingester2/src/init.rs b/ingester2/src/init.rs index 27ad3510a7..1750b07195 100644 --- a/ingester2/src/init.rs +++ b/ingester2/src/init.rs @@ -46,7 +46,6 @@ use crate::{ server::grpc::GrpcDelegate, timestamp_oracle::TimestampOracle, wal::{rotate_task::periodic_rotation, wal_sink::WalSink}, - TRANSITION_SHARD_INDEX, }; use self::graceful_shutdown::graceful_shutdown_handler; @@ -235,23 +234,6 @@ pub async fn new( where F: Future + Send + 'static, { - // Create the transition shard. - let mut txn = catalog - .start_transaction() - .await - .expect("start transaction"); - let topic = txn - .topics() - .create_or_get("iox-shared") - .await - .expect("get topic"); - let transition_shard = txn - .shards() - .create_or_get(&topic, TRANSITION_SHARD_INDEX) - .await - .expect("create transition shard"); - txn.commit().await.expect("commit transition shard"); - // Initialise a random ID for this ingester instance. let ingester_id = IngesterId::new(); @@ -336,7 +318,6 @@ where partition_provider, Arc::new(hot_partition_persister), Arc::clone(&metrics), - transition_shard.id, )); // Initialise the WAL diff --git a/ingester2/src/lib.rs b/ingester2/src/lib.rs index d77a7bb14a..e50d08c9bb 100644 --- a/ingester2/src/lib.rs +++ b/ingester2/src/lib.rs @@ -199,8 +199,6 @@ missing_docs )] -use data_types::TRANSITION_SHARD_INDEX; - /// A macro to conditionally prepend `pub` to the inner tokens for benchmarking /// purposes, should the `benches` feature be enabled. /// diff --git a/ingester2/src/persist/context.rs b/ingester2/src/persist/context.rs index 4ef0515f59..67eb7538f6 100644 --- a/ingester2/src/persist/context.rs +++ b/ingester2/src/persist/context.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use data_types::{NamespaceId, PartitionId, PartitionKey, ShardId, TableId}; +use data_types::{NamespaceId, PartitionId, PartitionKey, TableId}; use observability_deps::tracing::*; use parking_lot::Mutex; use schema::sort::SortKey; @@ -87,8 +87,6 @@ pub(super) struct Context { table_id: TableId, partition_id: PartitionId, - transition_shard_id: ShardId, - // The partition key for this partition partition_key: PartitionKey, @@ -173,7 +171,6 @@ impl Context { enqueued_at, dequeued_at: Instant::now(), permit, - transition_shard_id: guard.transition_shard_id(), } }; @@ -306,8 +303,4 @@ impl Context { pub(super) fn table_name(&self) -> &DeferredLoad { self.table_name.as_ref() } - - pub(super) fn transition_shard_id(&self) -> ShardId { - self.transition_shard_id - } } diff --git a/ingester2/src/persist/handle.rs b/ingester2/src/persist/handle.rs index 097f7918c4..fd02ce7824 100644 --- a/ingester2/src/persist/handle.rs +++ b/ingester2/src/persist/handle.rs @@ -475,7 +475,6 @@ mod tests { use std::{sync::Arc, task::Poll, time::Duration}; use assert_matches::assert_matches; - use data_types::TRANSITION_SHARD_ID; use dml::DmlOperation; use futures::Future; use iox_catalog::mem::MemCatalog; @@ -526,7 +525,6 @@ mod tests { ), Arc::new(MockPostWriteObserver::default()), Default::default(), - TRANSITION_SHARD_ID, ); buffer_tree diff --git a/ingester2/src/persist/mod.rs b/ingester2/src/persist/mod.rs index 1785abc21c..e9fa51cc3a 100644 --- a/ingester2/src/persist/mod.rs +++ b/ingester2/src/persist/mod.rs @@ -15,7 +15,7 @@ mod tests { use std::{sync::Arc, time::Duration}; use assert_matches::assert_matches; - use data_types::{CompactionLevel, ParquetFile, SequenceNumber, TRANSITION_SHARD_ID}; + use data_types::{CompactionLevel, ParquetFile, SequenceNumber}; use dml::DmlOperation; use futures::TryStreamExt; use iox_catalog::{ @@ -48,7 +48,6 @@ mod tests { ARBITRARY_NAMESPACE_NAME_PROVIDER, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER, }, - TRANSITION_SHARD_INDEX, }; use super::handle::PersistHandle; @@ -62,13 +61,8 @@ mod tests { /// partition entry exists (by driving the buffer tree to create it). async fn partition_with_write(catalog: Arc) -> Arc> { // Create the namespace in the catalog and it's the schema - let (_shard_id, namespace_id, table_id) = populate_catalog( - &*catalog, - TRANSITION_SHARD_INDEX, - &ARBITRARY_NAMESPACE_NAME, - &ARBITRARY_TABLE_NAME, - ) - .await; + let (namespace_id, table_id) = + populate_catalog(&*catalog, &ARBITRARY_NAMESPACE_NAME, &ARBITRARY_TABLE_NAME).await; // Init the buffer tree let buf = BufferTree::new( @@ -77,7 +71,6 @@ mod tests { Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))), Arc::new(MockPostWriteObserver::default()), Arc::new(metric::Registry::default()), - TRANSITION_SHARD_ID, ); let write = make_write_op( @@ -448,14 +441,8 @@ mod tests { assert_eq!(files.len(), 2, "expected two uploaded files"); // Ensure the catalog record points at a valid file in object storage. - let want_path = ParquetFilePath::new( - namespace_id, - table_id, - TRANSITION_SHARD_ID, - partition_id, - object_store_id, - ) - .object_store_path(); + let want_path = ParquetFilePath::new(namespace_id, table_id, partition_id, object_store_id) + .object_store_path(); let file = files .into_iter() .find(|f| f.location == want_path) diff --git a/ingester2/src/persist/worker.rs b/ingester2/src/persist/worker.rs index d4579de95a..17877d2b80 100644 --- a/ingester2/src/persist/worker.rs +++ b/ingester2/src/persist/worker.rs @@ -257,7 +257,6 @@ where let iox_metadata = IoxMetadata { object_store_id, creation_timestamp: time_now, - shard_id: ctx.transition_shard_id(), namespace_id: ctx.namespace_id(), namespace_name: Arc::clone(&*ctx.namespace_name().get().await), table_id: ctx.table_id(), diff --git a/ingester2/src/test_util.rs b/ingester2/src/test_util.rs index 7907cbba17..aa5a925ecc 100644 --- a/ingester2/src/test_util.rs +++ b/ingester2/src/test_util.rs @@ -1,9 +1,6 @@ use std::{collections::BTreeMap, sync::Arc, time::Duration}; -use data_types::{ - NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, ShardId, ShardIndex, - TableId, TRANSITION_SHARD_ID, -}; +use data_types::{NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, TableId}; use dml::{DmlMeta, DmlWrite}; use iox_catalog::interface::Catalog; use lazy_static::lazy_static; @@ -117,7 +114,6 @@ impl PartitionDataBuilder { self.table_name .unwrap_or_else(|| Arc::clone(&*DEFER_TABLE_NAME_1_SEC)), self.sort_key.unwrap_or(SortKeyState::Provided(None)), - TRANSITION_SHARD_ID, ) } } @@ -127,7 +123,6 @@ impl PartitionDataBuilder { pub(crate) fn arbitrary_partition() -> Partition { Partition { id: ARBITRARY_PARTITION_ID, - shard_id: TRANSITION_SHARD_ID, table_id: ARBITRARY_TABLE_ID, partition_key: ARBITRARY_PARTITION_KEY.clone(), sort_key: Default::default(), @@ -285,10 +280,9 @@ pub(crate) fn make_write_op( pub(crate) async fn populate_catalog( catalog: &dyn Catalog, - shard_index: ShardIndex, namespace: &str, table: &str, -) -> (ShardId, NamespaceId, TableId) { +) -> (NamespaceId, TableId) { let mut c = catalog.repositories().await; let topic = c.topics().create_or_get("kafka-topic").await.unwrap(); let query_pool = c.query_pools().create_or_get("query-pool").await.unwrap(); @@ -299,14 +293,8 @@ pub(crate) async fn populate_catalog( .unwrap() .id; let table_id = c.tables().create_or_get(table, ns_id).await.unwrap().id; - let shard_id = c - .shards() - .create_or_get(&topic, shard_index) - .await - .unwrap() - .id; - (shard_id, ns_id, table_id) + (ns_id, table_id) } /// Assert `a` and `b` have identical metadata, and that when converting diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index ee56af5afe..7a23d4a866 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -4,8 +4,7 @@ use async_trait::async_trait; use data_types::{ Column, ColumnSchema, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceSchema, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, - QueryPoolId, SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, - TableSchema, Timestamp, TopicId, TopicMetadata, + QueryPoolId, SkippedCompaction, Table, TableId, TableSchema, Timestamp, TopicId, TopicMetadata, }; use iox_time::TimeProvider; use snafu::{OptionExt, Snafu}; @@ -301,9 +300,6 @@ pub trait RepoCollection: Send + Sync + Debug { /// Repository for [columns](data_types::Column). fn columns(&mut self) -> &mut dyn ColumnRepo; - /// Repository for [shards](data_types::Shard). - fn shards(&mut self) -> &mut dyn ShardRepo; - /// Repository for [partitions](data_types::Partition). fn partitions(&mut self) -> &mut dyn PartitionRepo; @@ -437,48 +433,12 @@ pub trait ColumnRepo: Send + Sync { async fn list(&mut self) -> Result>; } -/// Functions for working with shards in the catalog -#[async_trait] -pub trait ShardRepo: Send + Sync { - /// create a shard record for the topic and shard index or return the existing record - async fn create_or_get( - &mut self, - topic: &TopicMetadata, - shard_index: ShardIndex, - ) -> Result; - - /// get the shard record by `TopicId` and `ShardIndex` - async fn get_by_topic_id_and_shard_index( - &mut self, - topic_id: TopicId, - shard_index: ShardIndex, - ) -> Result>; - - /// list all shards - async fn list(&mut self) -> Result>; - - /// list all shards for a given topic - async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result>; - - /// updates the `min_unpersisted_sequence_number` for a shard - async fn update_min_unpersisted_sequence_number( - &mut self, - shard: ShardId, - sequence_number: SequenceNumber, - ) -> Result<()>; -} - /// Functions for working with IOx partitions in the catalog. Note that these are how IOx splits up /// data within a namespace, which is different than Kafka partitions. #[async_trait] pub trait PartitionRepo: Send + Sync { /// create or get a partition record for the given partition key, shard and table - async fn create_or_get( - &mut self, - key: PartitionKey, - shard_id: ShardId, - table_id: TableId, - ) -> Result; + async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result; /// get partition by ID async fn get_by_id(&mut self, partition_id: PartitionId) -> Result>; @@ -580,8 +540,8 @@ pub trait ParquetFileRepo: Send + Sync { /// /// Returns the deleted IDs only. /// - /// This deletion is limited to a certain (backend-specific) number of files to avoid overlarge changes. The caller - /// MAY call this method again if the result was NOT empty. + /// This deletion is limited to a certain (backend-specific) number of files to avoid overlarge + /// changes. The caller MAY call this method again if the result was NOT empty. async fn delete_old_ids_only(&mut self, older_than: Timestamp) -> Result>; /// List parquet files for a given partition that are NOT marked as @@ -827,17 +787,12 @@ pub async fn list_schemas( #[cfg(test)] pub(crate) mod test_helpers { - use crate::{ - validate_or_insert_schema, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, - SHARED_TOPIC_ID, - }; + use crate::{validate_or_insert_schema, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES}; use super::*; use ::test_helpers::{assert_contains, tracing::TracingCapture}; use assert_matches::assert_matches; - use data_types::{ - ColumnId, ColumnSet, CompactionLevel, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX, - }; + use data_types::{ColumnId, ColumnSet, CompactionLevel, SequenceNumber}; use futures::Future; use metric::{Attributes, DurationHistogram, Metric}; use std::{collections::BTreeSet, ops::DerefMut, sync::Arc, time::Duration}; @@ -891,23 +846,6 @@ pub(crate) mod test_helpers { async fn test_setup(catalog: Arc) { catalog.setup().await.expect("first catalog setup"); catalog.setup().await.expect("second catalog setup"); - - let transition_shard = catalog - .repositories() - .await - .shards() - .get_by_topic_id_and_shard_index(SHARED_TOPIC_ID, TRANSITION_SHARD_INDEX) - .await - .expect("transition shard"); - - assert_matches!( - transition_shard, - Some(Shard { - id, - shard_index, - .. - }) if id == TRANSITION_SHARD_ID && shard_index == TRANSITION_SHARD_INDEX - ); } async fn test_topic(catalog: Arc) { @@ -1560,29 +1498,19 @@ pub(crate) mod test_helpers { .create_or_get("test_table", namespace.id) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1)) - .await - .unwrap(); - let other_shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(2)) - .await - .unwrap(); let mut created = BTreeMap::new(); for key in ["foo", "bar"] { let partition = repos .partitions() - .create_or_get(key.into(), shard.id, table.id) + .create_or_get(key.into(), table.id) .await .expect("failed to create partition"); created.insert(partition.id, partition); } let other_partition = repos .partitions() - .create_or_get("asdf".into(), other_shard.id, table.id) + .create_or_get("asdf".into(), table.id) .await .unwrap(); @@ -1859,24 +1787,18 @@ pub(crate) mod test_helpers { .create_or_get("other", namespace.id) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1)) - .await - .unwrap(); let partition = repos .partitions() - .create_or_get("one".into(), shard.id, table.id) + .create_or_get("one".into(), table.id) .await .unwrap(); let other_partition = repos .partitions() - .create_or_get("one".into(), shard.id, other_table.id) + .create_or_get("one".into(), other_table.id) .await .unwrap(); let parquet_file_params = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace.id, table_id: partition.table_id, partition_id: partition.id, @@ -2049,7 +1971,7 @@ pub(crate) mod test_helpers { .unwrap(); let partition2 = repos .partitions() - .create_or_get("foo".into(), shard.id, table2.id) + .create_or_get("foo".into(), table2.id) .await .unwrap(); let files = repos @@ -2285,24 +2207,18 @@ pub(crate) mod test_helpers { .create_or_get("test_table", namespace_2.id) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1)) - .await - .unwrap(); let partition_1 = repos .partitions() - .create_or_get("one".into(), shard.id, table_1.id) + .create_or_get("one".into(), table_1.id) .await .unwrap(); let partition_2 = repos .partitions() - .create_or_get("one".into(), shard.id, table_2.id) + .create_or_get("one".into(), table_2.id) .await .unwrap(); let parquet_file_params_1 = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace_1.id, table_id: table_1.id, partition_id: partition_1.id, @@ -2318,7 +2234,6 @@ pub(crate) mod test_helpers { max_l0_created_at: Timestamp::new(1), }; let parquet_file_params_2 = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace_2.id, table_id: table_2.id, partition_id: partition_2.id, @@ -2374,11 +2289,6 @@ pub(crate) mod test_helpers { .create_or_get("test_table_for_new_file_between", namespace.id) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(101)) - .await - .unwrap(); // param for the tests let time_now = Timestamp::from(catalog.time_provider().now()); @@ -2401,7 +2311,7 @@ pub(crate) mod test_helpers { // The DB has 1 partition but it does not have any file let partition1 = repos .partitions() - .create_or_get("one".into(), shard.id, table.id) + .create_or_get("one".into(), table.id) .await .unwrap(); let partitions = repos @@ -2413,7 +2323,6 @@ pub(crate) mod test_helpers { // create files for partition one let parquet_file_params = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace.id, table_id: partition1.table_id, partition_id: partition1.id, @@ -2504,7 +2413,7 @@ pub(crate) mod test_helpers { // Partition two without any file let partition2 = repos .partitions() - .create_or_get("two".into(), shard.id, table.id) + .create_or_get("two".into(), table.id) .await .unwrap(); // should return partition one only @@ -2612,7 +2521,7 @@ pub(crate) mod test_helpers { // Partition three without any file let partition3 = repos .partitions() - .create_or_get("three".into(), shard.id, table.id) + .create_or_get("three".into(), table.id) .await .unwrap(); // should return partition one and two only @@ -2754,28 +2663,15 @@ pub(crate) mod test_helpers { .create_or_get("test_table", namespace.id) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(100)) - .await - .unwrap(); let partition = repos .partitions() - .create_or_get( - "test_list_by_partiton_not_to_delete_one".into(), - shard.id, - table.id, - ) + .create_or_get("test_list_by_partiton_not_to_delete_one".into(), table.id) .await .unwrap(); let partition2 = repos .partitions() - .create_or_get( - "test_list_by_partiton_not_to_delete_two".into(), - shard.id, - table.id, - ) + .create_or_get("test_list_by_partiton_not_to_delete_two".into(), table.id) .await .unwrap(); @@ -2783,7 +2679,6 @@ pub(crate) mod test_helpers { let max_time = Timestamp::new(10); let parquet_file_params = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace.id, table_id: partition.table_id, partition_id: partition.id, @@ -2883,18 +2778,9 @@ pub(crate) mod test_helpers { .create_or_get("update_table", namespace.id) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1000)) - .await - .unwrap(); let partition = repos .partitions() - .create_or_get( - "test_update_to_compaction_level_1_one".into(), - shard.id, - table.id, - ) + .create_or_get("test_update_to_compaction_level_1_one".into(), table.id) .await .unwrap(); @@ -2904,12 +2790,10 @@ pub(crate) mod test_helpers { // Create a file with times entirely within the window let parquet_file_params = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace.id, table_id: partition.table_id, partition_id: partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time: query_min_time + 1, max_time: query_max_time - 1, @@ -2988,21 +2872,15 @@ pub(crate) mod test_helpers { .create_or_get("column_test_1", table_1.id, ColumnType::Tag) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1)) - .await - .unwrap(); let partition_1 = repos .partitions() - .create_or_get("test_delete_namespace_one".into(), shard.id, table_1.id) + .create_or_get("test_delete_namespace_one".into(), table_1.id) .await .unwrap(); // parquet files let parquet_file_params = ParquetFileParams { namespace_id: namespace_1.id, - shard_id: shard.id, table_id: partition_1.table_id, partition_id: partition_1.id, object_store_id: Uuid::new_v4(), @@ -3051,21 +2929,15 @@ pub(crate) mod test_helpers { .create_or_get("column_test_2", table_2.id, ColumnType::Tag) .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1)) - .await - .unwrap(); let partition_2 = repos .partitions() - .create_or_get("test_delete_namespace_two".into(), shard.id, table_2.id) + .create_or_get("test_delete_namespace_two".into(), table_2.id) .await .unwrap(); // parquet files let parquet_file_params = ParquetFileParams { namespace_id: namespace_2.id, - shard_id: shard.id, table_id: partition_2.table_id, partition_id: partition_2.id, object_store_id: Uuid::new_v4(), diff --git a/iox_catalog/src/kafkaless_transition.rs b/iox_catalog/src/kafkaless_transition.rs new file mode 100644 index 0000000000..c290507322 --- /dev/null +++ b/iox_catalog/src/kafkaless_transition.rs @@ -0,0 +1,83 @@ +use data_types::{SequenceNumber, TopicId}; + +/// Magic number to be used shard indices and shard ids in "kafkaless". +pub(crate) const TRANSITION_SHARD_NUMBER: i32 = 1234; +/// In kafkaless mode all new persisted data uses this shard id. +pub(crate) const TRANSITION_SHARD_ID: ShardId = ShardId::new(TRANSITION_SHARD_NUMBER as i64); +/// In kafkaless mode all new persisted data uses this shard index. +pub(crate) const TRANSITION_SHARD_INDEX: ShardIndex = ShardIndex::new(TRANSITION_SHARD_NUMBER); + +/// Unique ID for a `Shard`, assigned by the catalog. Joins to other catalog tables to uniquely +/// identify shards independently of the underlying write buffer implementation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub(crate) struct ShardId(i64); + +#[allow(missing_docs)] +impl ShardId { + pub(crate) const fn new(v: i64) -> Self { + Self(v) + } +} + +impl std::fmt::Display for ShardId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +/// The index of the shard in the set of shards. When Kafka is used as the write buffer, this is +/// the Kafka Partition ID. Used by the router and write buffer to shard requests to a particular +/// index in a set of shards. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub(crate) struct ShardIndex(i32); + +#[allow(missing_docs)] +impl ShardIndex { + pub(crate) const fn new(v: i32) -> Self { + Self(v) + } +} + +impl std::fmt::Display for ShardIndex { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::str::FromStr for ShardIndex { + type Err = std::num::ParseIntError; + + fn from_str(s: &str) -> Result { + let v: i32 = s.parse()?; + Ok(Self(v)) + } +} + +/// Data object for a shard. Only one shard record can exist for a given topic and shard +/// index (enforced via uniqueness constraint). +#[derive(Debug, Copy, Clone, PartialEq, Eq, sqlx::FromRow)] +pub(crate) struct Shard { + /// the id of the shard, assigned by the catalog + pub(crate) id: ShardId, + /// the topic the shard is reading from + pub(crate) topic_id: TopicId, + /// the shard index of the shard the sequence numbers are coming from, sharded by the router + /// and write buffer + pub(crate) shard_index: ShardIndex, + /// The minimum unpersisted sequence number. Because different tables + /// can be persisted at different times, it is possible some data has been persisted + /// with a higher sequence number than this. However, all data with a sequence number + /// lower than this must have been persisted to Parquet. + pub(crate) min_unpersisted_sequence_number: SequenceNumber, +} + +/// Shard index plus offset +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub(crate) struct Sequence { + /// The shard index + pub(crate) shard_index: ShardIndex, + /// The sequence number + pub(crate) sequence_number: SequenceNumber, +} diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs index a1caf178d8..4ce2a7c397 100644 --- a/iox_catalog/src/lib.rs +++ b/iox_catalog/src/lib.rs @@ -14,15 +14,9 @@ )] use crate::interface::{ColumnTypeMismatchSnafu, Error, RepoCollection, Result, Transaction}; -use data_types::{ - ColumnType, NamespaceSchema, QueryPool, Shard, ShardId, ShardIndex, TableSchema, TopicId, - TopicMetadata, -}; +use data_types::{ColumnType, NamespaceSchema, QueryPool, TableSchema, TopicId, TopicMetadata}; use mutable_batch::MutableBatch; -use std::{ - borrow::Cow, - collections::{BTreeMap, HashMap}, -}; +use std::{borrow::Cow, collections::HashMap}; use thiserror::Error; const SHARED_TOPIC_NAME: &str = "iox-shared"; @@ -38,6 +32,7 @@ pub const DEFAULT_MAX_COLUMNS_PER_TABLE: i32 = 200; pub const DEFAULT_RETENTION_PERIOD: Option = None; pub mod interface; +pub(crate) mod kafkaless_transition; pub mod mem; pub mod metrics; pub mod postgres; @@ -209,37 +204,28 @@ where Ok(()) } -/// Creates or gets records in the catalog for the shared topic, query pool, and shards -/// for each of the partitions. +/// Creates or gets records in the catalog for the shared topic and query pool for each of the +/// partitions. /// /// Used in tests and when creating an in-memory catalog. pub async fn create_or_get_default_records( - shard_count: i32, txn: &mut dyn Transaction, -) -> Result<(TopicMetadata, QueryPool, BTreeMap)> { +) -> Result<(TopicMetadata, QueryPool)> { let topic = txn.topics().create_or_get(SHARED_TOPIC_NAME).await?; let query_pool = txn.query_pools().create_or_get(SHARED_QUERY_POOL).await?; - let mut shards = BTreeMap::new(); - // Start at 0 to match the one write buffer shard index used in all-in-one mode - for shard_index in 0..shard_count { - let shard = txn - .shards() - .create_or_get(&topic, ShardIndex::new(shard_index)) - .await?; - shards.insert(shard.id, shard); - } - - Ok((topic, query_pool, shards)) + Ok((topic, query_pool)) } #[cfg(test)] mod tests { - use std::sync::Arc; + use std::{collections::BTreeMap, sync::Arc}; use super::*; - use crate::interface::{get_schema_by_name, SoftDeletedRows}; - use crate::mem::MemCatalog; + use crate::{ + interface::{get_schema_by_name, SoftDeletedRows}, + mem::MemCatalog, + }; // Generate a test that simulates multiple, sequential writes in `lp` and // asserts the resulting schema. @@ -265,8 +251,7 @@ mod tests { let metrics = Arc::new(metric::Registry::default()); let repo = MemCatalog::new(metrics); let mut txn = repo.start_transaction().await.unwrap(); - let (topic, query_pool, _) = create_or_get_default_records( - 2, + let (topic, query_pool) = create_or_get_default_records( txn.deref_mut() ).await.unwrap(); diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 00f97e4ffa..e0e40a3eef 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -5,9 +5,10 @@ use crate::{ interface::{ sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo, ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, QueryPoolRepo, RepoCollection, - Result, ShardRepo, SoftDeletedRows, TableRepo, TopicMetadataRepo, Transaction, + Result, SoftDeletedRows, TableRepo, TopicMetadataRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE, }, + kafkaless_transition::{Shard, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX}, metrics::MetricDecorator, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME, }; @@ -15,8 +16,7 @@ use async_trait::async_trait; use data_types::{ Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId, - SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp, - TopicId, TopicMetadata, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX, + SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, }; use iox_time::{SystemProvider, TimeProvider}; use observability_deps::tracing::warn; @@ -248,10 +248,6 @@ impl RepoCollection for MemTxn { self } - fn shards(&mut self) -> &mut dyn ShardRepo { - self - } - fn partitions(&mut self) -> &mut dyn PartitionRepo { self } @@ -689,115 +685,30 @@ impl ColumnRepo for MemTxn { } #[async_trait] -impl ShardRepo for MemTxn { - async fn create_or_get( - &mut self, - topic: &TopicMetadata, - _shard_index: ShardIndex, - ) -> Result { +impl PartitionRepo for MemTxn { + async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result { let stage = self.stage(); - // Temporary: only ever create the transition shard, no matter what is asked. Shards are - // going away completely soon. - let shard = match stage - .shards + let partition = match stage + .partitions .iter() - .find(|s| s.topic_id == topic.id && s.shard_index == TRANSITION_SHARD_INDEX) + .find(|p| p.partition_key == key && p.table_id == table_id) { - Some(t) => t, + Some(p) => p, None => { - let shard = Shard { - id: TRANSITION_SHARD_ID, - topic_id: topic.id, - shard_index: TRANSITION_SHARD_INDEX, - min_unpersisted_sequence_number: SequenceNumber::new(0), + let p = Partition { + id: PartitionId::new(stage.partitions.len() as i64 + 1), + table_id, + partition_key: key, + sort_key: vec![], + persisted_sequence_number: None, + new_file_at: None, }; - stage.shards.push(shard); - stage.shards.last().unwrap() + stage.partitions.push(p); + stage.partitions.last().unwrap() } }; - Ok(*shard) - } - - async fn get_by_topic_id_and_shard_index( - &mut self, - topic_id: TopicId, - shard_index: ShardIndex, - ) -> Result> { - let stage = self.stage(); - - let shard = stage - .shards - .iter() - .find(|s| s.topic_id == topic_id && s.shard_index == shard_index) - .cloned(); - Ok(shard) - } - - async fn list(&mut self) -> Result> { - let stage = self.stage(); - - Ok(stage.shards.clone()) - } - - async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result> { - let stage = self.stage(); - - let shards: Vec<_> = stage - .shards - .iter() - .filter(|s| s.topic_id == topic.id) - .cloned() - .collect(); - Ok(shards) - } - - async fn update_min_unpersisted_sequence_number( - &mut self, - shard_id: ShardId, - sequence_number: SequenceNumber, - ) -> Result<()> { - let stage = self.stage(); - - if let Some(s) = stage.shards.iter_mut().find(|s| s.id == shard_id) { - s.min_unpersisted_sequence_number = sequence_number - }; - - Ok(()) - } -} - -#[async_trait] -impl PartitionRepo for MemTxn { - async fn create_or_get( - &mut self, - key: PartitionKey, - shard_id: ShardId, - table_id: TableId, - ) -> Result { - let stage = self.stage(); - - let partition = - match stage.partitions.iter().find(|p| { - p.partition_key == key && p.shard_id == shard_id && p.table_id == table_id - }) { - Some(p) => p, - None => { - let p = Partition { - id: PartitionId::new(stage.partitions.len() as i64 + 1), - shard_id, - table_id, - partition_key: key, - sort_key: vec![], - persisted_sequence_number: None, - new_file_at: None, - }; - stage.partitions.push(p); - stage.partitions.last().unwrap() - } - }; - Ok(partition.clone()) } diff --git a/iox_catalog/src/metrics.rs b/iox_catalog/src/metrics.rs index ded7ca75e7..561e1347c6 100644 --- a/iox_catalog/src/metrics.rs +++ b/iox_catalog/src/metrics.rs @@ -2,15 +2,14 @@ use crate::interface::{ sealed::TransactionFinalize, CasFailure, ColumnRepo, NamespaceRepo, ParquetFileRepo, - PartitionRepo, QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo, + PartitionRepo, QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo, TopicMetadataRepo, }; use async_trait::async_trait; use data_types::{ Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId, - SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp, - TopicId, TopicMetadata, + SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, }; use iox_time::{SystemProvider, TimeProvider}; use metric::{DurationHistogram, Metric}; @@ -48,7 +47,6 @@ where + NamespaceRepo + TableRepo + ColumnRepo - + ShardRepo + PartitionRepo + ParquetFileRepo + Debug, @@ -74,10 +72,6 @@ where self } - fn shards(&mut self) -> &mut dyn ShardRepo { - self - } - fn partitions(&mut self) -> &mut dyn PartitionRepo { self } @@ -215,21 +209,10 @@ decorate!( ] ); -decorate!( - impl_trait = ShardRepo, - methods = [ - "shard_create_or_get" = create_or_get(&mut self, topic: &TopicMetadata, shard_index: ShardIndex) -> Result; - "shard_get_by_topic_id_and_shard_index" = get_by_topic_id_and_shard_index(&mut self, topic_id: TopicId, shard_index: ShardIndex) -> Result>; - "shard_list" = list(&mut self) -> Result>; - "shard_list_by_topic" = list_by_topic(&mut self, topic: &TopicMetadata) -> Result>; - "shard_update_min_unpersisted_sequence_number" = update_min_unpersisted_sequence_number(&mut self, shard_id: ShardId, sequence_number: SequenceNumber) -> Result<()>; - ] -); - decorate!( impl_trait = PartitionRepo, methods = [ - "partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, shard_id: ShardId, table_id: TableId) -> Result; + "partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result; "partition_get_by_id" = get_by_id(&mut self, partition_id: PartitionId) -> Result>; "partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result>; "partition_list_ids" = list_ids(&mut self) -> Result>; diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 43f81f9ccd..9550965ce0 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -4,9 +4,10 @@ use crate::{ interface::{ self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo, ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, - QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo, - TopicMetadataRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE, + QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo, TopicMetadataRepo, + Transaction, MAX_PARQUET_FILES_SELECTED_ONCE, }, + kafkaless_transition::{TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX}, metrics::MetricDecorator, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME, }; @@ -14,8 +15,7 @@ use async_trait::async_trait; use data_types::{ Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId, - SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp, - TopicId, TopicMetadata, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX, + SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, }; use iox_time::{SystemProvider, TimeProvider}; use observability_deps::tracing::{debug, info, warn}; @@ -547,10 +547,6 @@ impl RepoCollection for PostgresTxn { self } - fn shards(&mut self) -> &mut dyn ShardRepo { - self - } - fn partitions(&mut self) -> &mut dyn PartitionRepo { self } @@ -1086,109 +1082,9 @@ RETURNING *; } } -#[async_trait] -impl ShardRepo for PostgresTxn { - async fn create_or_get( - &mut self, - topic: &TopicMetadata, - shard_index: ShardIndex, - ) -> Result { - sqlx::query_as::<_, Shard>( - r#" -INSERT INTO shard - ( topic_id, shard_index, min_unpersisted_sequence_number ) -VALUES - ( $1, $2, 0 ) -ON CONFLICT ON CONSTRAINT shard_unique -DO UPDATE SET topic_id = shard.topic_id -RETURNING *;; - "#, - ) - .bind(topic.id) // $1 - .bind(shard_index) // $2 - .fetch_one(&mut self.inner) - .await - .map_err(|e| { - if is_fk_violation(&e) { - Error::ForeignKeyViolation { source: e } - } else { - Error::SqlxError { source: e } - } - }) - } - - async fn get_by_topic_id_and_shard_index( - &mut self, - topic_id: TopicId, - shard_index: ShardIndex, - ) -> Result> { - let rec = sqlx::query_as::<_, Shard>( - r#" -SELECT * -FROM shard -WHERE topic_id = $1 - AND shard_index = $2; - "#, - ) - .bind(topic_id) // $1 - .bind(shard_index) // $2 - .fetch_one(&mut self.inner) - .await; - - if let Err(sqlx::Error::RowNotFound) = rec { - return Ok(None); - } - - let shard = rec.map_err(|e| Error::SqlxError { source: e })?; - - Ok(Some(shard)) - } - - async fn list(&mut self) -> Result> { - sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard;"#) - .fetch_all(&mut self.inner) - .await - .map_err(|e| Error::SqlxError { source: e }) - } - - async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result> { - sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard WHERE topic_id = $1;"#) - .bind(topic.id) // $1 - .fetch_all(&mut self.inner) - .await - .map_err(|e| Error::SqlxError { source: e }) - } - - async fn update_min_unpersisted_sequence_number( - &mut self, - shard_id: ShardId, - sequence_number: SequenceNumber, - ) -> Result<()> { - let _ = sqlx::query( - r#" -UPDATE shard -SET min_unpersisted_sequence_number = $1 -WHERE id = $2; - "#, - ) - .bind(sequence_number.get()) // $1 - .bind(shard_id) // $2 - .execute(&mut self.inner) - .await - .map_err(|e| Error::SqlxError { source: e })?; - - Ok(()) - } -} - #[async_trait] impl PartitionRepo for PostgresTxn { - async fn create_or_get( - &mut self, - key: PartitionKey, - shard_id: ShardId, - table_id: TableId, - ) -> Result { + async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result { // Note: since sort_key is now an array, we must explicitly insert '{}' which is an empty // array rather than NULL which sqlx will throw `UnexpectedNullError` while is is doing // `ColumnDecode` @@ -1201,11 +1097,11 @@ VALUES ( $1, $2, $3, '{}') ON CONFLICT ON CONSTRAINT partition_key_unique DO UPDATE SET partition_key = partition.partition_key -RETURNING *; +RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; "#, ) .bind(key) // $1 - .bind(shard_id) // $2 + .bind(TRANSITION_SHARD_ID) // $2 .bind(table_id) // $3 .fetch_one(&mut self.inner) .await @@ -1217,23 +1113,20 @@ RETURNING *; } })?; - // If the partition_key_unique constraint was hit because there was an - // existing record for (table_id, partition_key) ensure the partition - // key in the DB is mapped to the same shard_id the caller - // requested. - assert_eq!( - v.shard_id, shard_id, - "attempted to overwrite partition with different shard ID" - ); - Ok(v) } async fn get_by_id(&mut self, partition_id: PartitionId) -> Result> { - let rec = sqlx::query_as::<_, Partition>(r#"SELECT * FROM partition WHERE id = $1;"#) - .bind(partition_id) // $1 - .fetch_one(&mut self.inner) - .await; + let rec = sqlx::query_as::<_, Partition>( + r#" +SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +FROM partition +WHERE id = $1; + "#, + ) + .bind(partition_id) // $1 + .fetch_one(&mut self.inner) + .await; if let Err(sqlx::Error::RowNotFound) = rec { return Ok(None); @@ -1247,7 +1140,7 @@ RETURNING *; async fn list_by_table_id(&mut self, table_id: TableId) -> Result> { sqlx::query_as::<_, Partition>( r#" -SELECT * +SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at FROM partition WHERE table_id = $1; "#, @@ -1288,7 +1181,7 @@ WHERE table_id = $1; UPDATE partition SET sort_key = $1 WHERE id = $2 AND sort_key = $3 -RETURNING *; +RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; "#, ) .bind(new_sort_key) // $1 @@ -1461,7 +1354,6 @@ RETURNING * impl ParquetFileRepo for PostgresTxn { async fn create(&mut self, parquet_file_params: ParquetFileParams) -> Result { let ParquetFileParams { - shard_id, namespace_id, table_id, partition_id, @@ -1484,10 +1376,13 @@ INSERT INTO parquet_file ( max_sequence_number, min_time, max_time, file_size_bytes, row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 ) -RETURNING *; +RETURNING + id, table_id, partition_id, object_store_id, + max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at; "#, ) - .bind(shard_id) // $1 + .bind(TRANSITION_SHARD_ID) // $1 .bind(table_id) // $2 .bind(partition_id) // $3 .bind(object_store_id) // $4 @@ -1563,16 +1458,14 @@ RETURNING id; &mut self, namespace_id: NamespaceId, ) -> Result> { - // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large - // `parquet_metadata` column!! sqlx::query_as::<_, ParquetFile>( r#" -SELECT parquet_file.id, parquet_file.shard_id, parquet_file.namespace_id, +SELECT parquet_file.id, parquet_file.namespace_id, parquet_file.table_id, parquet_file.partition_id, parquet_file.object_store_id, parquet_file.max_sequence_number, parquet_file.min_time, parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes, - parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, parquet_file.column_set, - parquet_file.max_l0_created_at + parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, + parquet_file.column_set, parquet_file.max_l0_created_at FROM parquet_file INNER JOIN table_name on table_name.id = parquet_file.table_id WHERE table_name.namespace_id = $1 @@ -1586,11 +1479,9 @@ WHERE table_name.namespace_id = $1 } async fn list_by_table_not_to_delete(&mut self, table_id: TableId) -> Result> { - // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large - // `parquet_metadata` column!! sqlx::query_as::<_, ParquetFile>( r#" -SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id, +SELECT id, namespace_id, table_id, partition_id, object_store_id, max_sequence_number, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file @@ -1650,11 +1541,9 @@ RETURNING id; &mut self, partition_id: PartitionId, ) -> Result> { - // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large - // `parquet_metadata` column!! sqlx::query_as::<_, ParquetFile>( r#" -SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id, +SELECT id, namespace_id, table_id, partition_id, object_store_id, max_sequence_number, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file @@ -1720,11 +1609,9 @@ RETURNING id; &mut self, object_store_id: Uuid, ) -> Result> { - // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large - // `parquet_metadata` column!! let rec = sqlx::query_as::<_, ParquetFile>( r#" -SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id, +SELECT id, namespace_id, table_id, partition_id, object_store_id, max_sequence_number, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file @@ -1783,7 +1670,7 @@ mod tests { use super::*; use crate::create_or_get_default_records; use assert_matches::assert_matches; - use data_types::{ColumnId, ColumnSet}; + use data_types::{ColumnId, ColumnSet, SequenceNumber}; use metric::{Attributes, DurationHistogram, Metric}; use rand::Rng; use sqlx::migrate::MigrateDatabase; @@ -1911,9 +1798,6 @@ mod tests { #[tokio::test] async fn test_catalog() { - // If running an integration test on your laptop, this requires that you have Postgres - // running and that you've done the sqlx migrations. See the README in this crate for - // info to set it up. maybe_skip_integration!(); let postgres = setup_db().await; @@ -1964,23 +1848,13 @@ mod tests { #[tokio::test] async fn test_partition_create_or_get_idempotent() { - // If running an integration test on your laptop, this requires that you have Postgres running - // - // This is a command to run this test on your laptop - // TEST_INTEGRATION=1 TEST_INFLUXDB_IOX_CATALOG_DSN=postgres:postgres://$USER@localhost/iox_shared RUST_BACKTRACE=1 cargo test --package iox_catalog --lib -- postgres::tests::test_partition_create_or_get_idempotent --exact --nocapture - // - // If you do not have Postgres's iox_shared db, here are commands to install Postgres (on mac) and create iox_shared db - // brew install postgresql - // initdb pg - // createdb iox_shared - maybe_skip_integration!(); let postgres = setup_db().await; let postgres: Arc = Arc::new(postgres); let mut txn = postgres.start_transaction().await.expect("txn start"); - let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut()) + let (kafka, query) = create_or_get_default_records(txn.deref_mut()) .await .expect("db init failed"); txn.commit().await.expect("txn commit"); @@ -2003,100 +1877,27 @@ mod tests { .id; let key = "bananas"; - let shard_id = *shards.keys().next().expect("no shard"); let a = postgres .repositories() .await .partitions() - .create_or_get(key.into(), shard_id, table_id) + .create_or_get(key.into(), table_id) .await .expect("should create OK"); - // Call create_or_get for the same (key, table_id, shard_id) - // triplet, setting the same shard ID to ensure the write is - // idempotent. + // Call create_or_get for the same (key, table_id) pair, to ensure the write is idempotent. let b = postgres .repositories() .await .partitions() - .create_or_get(key.into(), shard_id, table_id) + .create_or_get(key.into(), table_id) .await .expect("idempotent write should succeed"); assert_eq!(a, b); } - #[tokio::test] - #[should_panic = "attempted to overwrite partition"] - async fn test_partition_create_or_get_no_overwrite() { - // If running an integration test on your laptop, this requires that you have Postgres - // running and that you've done the sqlx migrations. See the README in this crate for - // info to set it up. - maybe_skip_integration!("attempted to overwrite partition"); - - let postgres = setup_db().await; - - let postgres: Arc = Arc::new(postgres); - let mut txn = postgres.start_transaction().await.expect("txn start"); - let (kafka, query, _) = create_or_get_default_records(2, txn.deref_mut()) - .await - .expect("db init failed"); - txn.commit().await.expect("txn commit"); - - let namespace_id = postgres - .repositories() - .await - .namespaces() - .create("ns3", None, kafka.id, query.id) - .await - .expect("namespace create failed") - .id; - let table_id = postgres - .repositories() - .await - .tables() - .create_or_get("table", namespace_id) - .await - .expect("create table failed") - .id; - - let key = "bananas"; - - let shards = postgres - .repositories() - .await - .shards() - .list() - .await - .expect("failed to list shards"); - assert!( - shards.len() > 1, - "expected more shards to be created, got {}", - shards.len() - ); - - let a = postgres - .repositories() - .await - .partitions() - .create_or_get(key.into(), shards[0].id, table_id) - .await - .expect("should create OK"); - - // Call create_or_get for the same (key, table_id) tuple, setting a - // different shard ID - let b = postgres - .repositories() - .await - .partitions() - .create_or_get(key.into(), shards[1].id, table_id) - .await - .expect("result should not be evaluated"); - - assert_eq!(a, b); - } - #[test] fn test_parse_dsn_file() { assert_eq!( @@ -2190,9 +1991,6 @@ mod tests { paste::paste! { #[tokio::test] async fn []() { - // If running an integration test on your laptop, this requires that you have - // Postgres running and that you've done the sqlx migrations. See the README in - // this crate for info to set it up. maybe_skip_integration!(); let postgres = setup_db().await; @@ -2200,7 +1998,7 @@ mod tests { let postgres: Arc = Arc::new(postgres); let mut txn = postgres.start_transaction().await.expect("txn start"); - let (kafka, query, _shards) = create_or_get_default_records(1, txn.deref_mut()) + let (kafka, query) = create_or_get_default_records(txn.deref_mut()) .await .expect("db init failed"); txn.commit().await.expect("txn commit"); @@ -2362,19 +2160,6 @@ mod tests { #[tokio::test] async fn test_billing_summary_on_parqet_file_creation() { - // If running an integration test on your laptop, this requires that you have Postgres running - // - // This is a command to run this test on your laptop - // TEST_INTEGRATION=1 TEST_INFLUXDB_IOX_CATALOG_DSN=postgres:postgres://$USER@localhost/iox_shared RUST_BACKTRACE=1 cargo test --package iox_catalog --lib -- postgres::tests::test_billing_summary_on_parqet_file_creation --exact --nocapture - // - // If you do not have Postgres's iox_shared db, here are commands to install Postgres (on mac) and create iox_shared db - // brew install postgresql - // initdb pg - // createdb iox_shared - // - // Or if you're on Linux or otherwise don't mind using Docker: - // ./scripts/docker_catalog.sh - maybe_skip_integration!(); let postgres = setup_db().await; @@ -2382,7 +2167,7 @@ mod tests { let postgres: Arc = Arc::new(postgres); let mut txn = postgres.start_transaction().await.expect("txn start"); - let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut()) + let (kafka, query) = create_or_get_default_records(txn.deref_mut()) .await .expect("db init failed"); txn.commit().await.expect("txn commit"); @@ -2405,13 +2190,12 @@ mod tests { .id; let key = "bananas"; - let shard_id = *shards.keys().next().expect("no shard"); let partition_id = postgres .repositories() .await .partitions() - .create_or_get(key.into(), shard_id, table_id) + .create_or_get(key.into(), table_id) .await .expect("should create OK") .id; @@ -2421,7 +2205,6 @@ mod tests { let time_provider = Arc::new(SystemProvider::new()); let time_now = Timestamp::from(time_provider.now()); let mut p1 = ParquetFileParams { - shard_id, namespace_id, table_id, partition_id, diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs index 5abcea3686..ab8a9e0075 100644 --- a/iox_catalog/src/sqlite.rs +++ b/iox_catalog/src/sqlite.rs @@ -4,9 +4,10 @@ use crate::{ interface::{ self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo, ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, - QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo, - TopicMetadataRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE, + QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo, TopicMetadataRepo, + Transaction, MAX_PARQUET_FILES_SELECTED_ONCE, }, + kafkaless_transition::{TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX}, metrics::MetricDecorator, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME, }; @@ -14,8 +15,7 @@ use async_trait::async_trait; use data_types::{ Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId, - SequenceNumber, Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, Timestamp, - TopicId, TopicMetadata, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX, + SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, }; use serde::{Deserialize, Serialize}; use std::ops::Deref; @@ -318,10 +318,6 @@ impl RepoCollection for SqliteTxn { self } - fn shards(&mut self) -> &mut dyn ShardRepo { - self - } - fn partitions(&mut self) -> &mut dyn PartitionRepo { self } @@ -866,108 +862,12 @@ RETURNING *; } } -#[async_trait] -impl ShardRepo for SqliteTxn { - async fn create_or_get( - &mut self, - topic: &TopicMetadata, - shard_index: ShardIndex, - ) -> Result { - sqlx::query_as::<_, Shard>( - r#" -INSERT INTO shard - ( topic_id, shard_index, min_unpersisted_sequence_number ) -VALUES - ( $1, $2, 0 ) -ON CONFLICT (topic_id, shard_index) -DO UPDATE SET topic_id = shard.topic_id -RETURNING *; - "#, - ) - .bind(topic.id) // $1 - .bind(shard_index) // $2 - .fetch_one(self.inner.get_mut()) - .await - .map_err(|e| { - if is_fk_violation(&e) { - Error::ForeignKeyViolation { source: e } - } else { - Error::SqlxError { source: e } - } - }) - } - - async fn get_by_topic_id_and_shard_index( - &mut self, - topic_id: TopicId, - shard_index: ShardIndex, - ) -> Result> { - let rec = sqlx::query_as::<_, Shard>( - r#" -SELECT * -FROM shard -WHERE topic_id = $1 - AND shard_index = $2; - "#, - ) - .bind(topic_id) // $1 - .bind(shard_index) // $2 - .fetch_one(self.inner.get_mut()) - .await; - - if let Err(sqlx::Error::RowNotFound) = rec { - return Ok(None); - } - - let shard = rec.map_err(|e| Error::SqlxError { source: e })?; - - Ok(Some(shard)) - } - - async fn list(&mut self) -> Result> { - sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard;"#) - .fetch_all(self.inner.get_mut()) - .await - .map_err(|e| Error::SqlxError { source: e }) - } - - async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result> { - sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard WHERE topic_id = $1;"#) - .bind(topic.id) // $1 - .fetch_all(self.inner.get_mut()) - .await - .map_err(|e| Error::SqlxError { source: e }) - } - - async fn update_min_unpersisted_sequence_number( - &mut self, - shard_id: ShardId, - sequence_number: SequenceNumber, - ) -> Result<()> { - let _ = sqlx::query( - r#" -UPDATE shard -SET min_unpersisted_sequence_number = $1 -WHERE id = $2; - "#, - ) - .bind(sequence_number.get()) // $1 - .bind(shard_id) // $2 - .execute(self.inner.get_mut()) - .await - .map_err(|e| Error::SqlxError { source: e })?; - - Ok(()) - } -} - // We can't use [`Partition`], as uses Vec which the Sqlite // driver cannot serialise #[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)] struct PartitionPod { id: PartitionId, - shard_id: ShardId, table_id: TableId, partition_key: PartitionKey, sort_key: Json>, @@ -979,7 +879,6 @@ impl From for Partition { fn from(value: PartitionPod) -> Self { Self { id: value.id, - shard_id: value.shard_id, table_id: value.table_id, partition_key: value.partition_key, sort_key: value.sort_key.0, @@ -991,12 +890,7 @@ impl From for Partition { #[async_trait] impl PartitionRepo for SqliteTxn { - async fn create_or_get( - &mut self, - key: PartitionKey, - shard_id: ShardId, - table_id: TableId, - ) -> Result { + async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result { // Note: since sort_key is now an array, we must explicitly insert '{}' which is an empty // array rather than NULL which sqlx will throw `UnexpectedNullError` while is is doing // `ColumnDecode` @@ -1009,11 +903,11 @@ VALUES ( $1, $2, $3, '[]') ON CONFLICT (table_id, partition_key) DO UPDATE SET partition_key = partition.partition_key -RETURNING *; +RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; "#, ) .bind(key) // $1 - .bind(shard_id) // $2 + .bind(TRANSITION_SHARD_ID) // $2 .bind(table_id) // $3 .fetch_one(self.inner.get_mut()) .await @@ -1025,23 +919,20 @@ RETURNING *; } })?; - // If the partition_key_unique constraint was hit because there was an - // existing record for (table_id, partition_key) ensure the partition - // key in the DB is mapped to the same shard_id the caller - // requested. - assert_eq!( - v.shard_id, shard_id, - "attempted to overwrite partition with different shard ID" - ); - Ok(v.into()) } async fn get_by_id(&mut self, partition_id: PartitionId) -> Result> { - let rec = sqlx::query_as::<_, PartitionPod>(r#"SELECT * FROM partition WHERE id = $1;"#) - .bind(partition_id) // $1 - .fetch_one(self.inner.get_mut()) - .await; + let rec = sqlx::query_as::<_, PartitionPod>( + r#" +SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +FROM partition +WHERE id = $1; + "#, + ) + .bind(partition_id) // $1 + .fetch_one(self.inner.get_mut()) + .await; if let Err(sqlx::Error::RowNotFound) = rec { return Ok(None); @@ -1055,7 +946,7 @@ RETURNING *; async fn list_by_table_id(&mut self, table_id: TableId) -> Result> { Ok(sqlx::query_as::<_, PartitionPod>( r#" -SELECT * +SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at FROM partition WHERE table_id = $1; "#, @@ -1099,7 +990,7 @@ WHERE table_id = $1; UPDATE partition SET sort_key = $1 WHERE id = $2 AND sort_key = $3 -RETURNING *; +RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; "#, ) .bind(Json(new_sort_key)) // $1 @@ -1237,7 +1128,12 @@ RETURNING * async fn most_recent_n(&mut self, n: usize) -> Result> { Ok(sqlx::query_as::<_, PartitionPod>( - r#"SELECT * FROM partition ORDER BY id DESC LIMIT $1;"#, + r#" +SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +FROM partition +ORDER BY id DESC +LIMIT $1; + "#, ) .bind(n as i64) // $1 .fetch_all(self.inner.get_mut()) @@ -1285,7 +1181,6 @@ fn to_column_set(v: &Json>) -> ColumnSet { #[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)] struct ParquetFilePod { id: ParquetFileId, - shard_id: ShardId, namespace_id: NamespaceId, table_id: TableId, partition_id: PartitionId, @@ -1306,7 +1201,6 @@ impl From for ParquetFile { fn from(value: ParquetFilePod) -> Self { Self { id: value.id, - shard_id: value.shard_id, namespace_id: value.namespace_id, table_id: value.table_id, partition_id: value.partition_id, @@ -1329,7 +1223,6 @@ impl From for ParquetFile { impl ParquetFileRepo for SqliteTxn { async fn create(&mut self, parquet_file_params: ParquetFileParams) -> Result { let ParquetFileParams { - shard_id, namespace_id, table_id, partition_id, @@ -1352,10 +1245,13 @@ INSERT INTO parquet_file ( max_sequence_number, min_time, max_time, file_size_bytes, row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 ) -RETURNING *; +RETURNING + id, table_id, partition_id, object_store_id, + max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at; "#, ) - .bind(shard_id) // $1 + .bind(TRANSITION_SHARD_ID) // $1 .bind(table_id) // $2 .bind(partition_id) // $3 .bind(object_store_id) // $4 @@ -1435,33 +1331,30 @@ RETURNING id; // `parquet_metadata` column!! Ok(sqlx::query_as::<_, ParquetFilePod>( r#" -SELECT parquet_file.id, parquet_file.shard_id, parquet_file.namespace_id, - parquet_file.table_id, parquet_file.partition_id, parquet_file.object_store_id, - parquet_file.max_sequence_number, parquet_file.min_time, - parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes, - parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, parquet_file.column_set, - parquet_file.max_l0_created_at +SELECT parquet_file.id, parquet_file.namespace_id, parquet_file.table_id, + parquet_file.partition_id, parquet_file.object_store_id, parquet_file.max_sequence_number, + parquet_file.min_time, parquet_file.max_time, parquet_file.to_delete, + parquet_file.file_size_bytes, parquet_file.row_count, parquet_file.compaction_level, + parquet_file.created_at, parquet_file.column_set, parquet_file.max_l0_created_at FROM parquet_file INNER JOIN table_name on table_name.id = parquet_file.table_id WHERE table_name.namespace_id = $1 AND parquet_file.to_delete IS NULL; "#, ) - .bind(namespace_id) // $1 - .fetch_all(self.inner.get_mut()) - .await - .map_err(|e| Error::SqlxError { source: e })? - .into_iter() - .map(Into::into) - .collect()) + .bind(namespace_id) // $1 + .fetch_all(self.inner.get_mut()) + .await + .map_err(|e| Error::SqlxError { source: e })? + .into_iter() + .map(Into::into) + .collect()) } async fn list_by_table_not_to_delete(&mut self, table_id: TableId) -> Result> { - // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large - // `parquet_metadata` column!! Ok(sqlx::query_as::<_, ParquetFilePod>( r#" -SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id, +SELECT id, namespace_id, table_id, partition_id, object_store_id, max_sequence_number, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file @@ -1527,11 +1420,9 @@ RETURNING id; &mut self, partition_id: PartitionId, ) -> Result> { - // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large - // `parquet_metadata` column!! Ok(sqlx::query_as::<_, ParquetFilePod>( r#" -SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id, +SELECT id, namespace_id, table_id, partition_id, object_store_id, max_sequence_number, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file @@ -1600,11 +1491,9 @@ RETURNING id; &mut self, object_store_id: Uuid, ) -> Result> { - // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large - // `parquet_metadata` column!! let rec = sqlx::query_as::<_, ParquetFilePod>( r#" -SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id, +SELECT id, namespace_id, table_id, partition_id, object_store_id, max_sequence_number, min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file @@ -1707,7 +1596,7 @@ mod tests { let sqlite: Arc = Arc::new(sqlite); let mut txn = sqlite.start_transaction().await.expect("txn start"); - let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut()) + let (kafka, query) = create_or_get_default_records(txn.deref_mut()) .await .expect("db init failed"); txn.commit().await.expect("txn commit"); @@ -1730,95 +1619,27 @@ mod tests { .id; let key = "bananas"; - let shard_id = *shards.keys().next().expect("no shard"); let a = sqlite .repositories() .await .partitions() - .create_or_get(key.into(), shard_id, table_id) + .create_or_get(key.into(), table_id) .await .expect("should create OK"); - // Call create_or_get for the same (key, table_id, shard_id) - // triplet, setting the same shard ID to ensure the write is - // idempotent. + // Call create_or_get for the same (key, table_id) pair, to ensure the write is idempotent. let b = sqlite .repositories() .await .partitions() - .create_or_get(key.into(), shard_id, table_id) + .create_or_get(key.into(), table_id) .await .expect("idempotent write should succeed"); assert_eq!(a, b); } - #[tokio::test] - #[should_panic = "attempted to overwrite partition"] - async fn test_partition_create_or_get_no_overwrite() { - let sqlite = setup_db().await; - - let sqlite: Arc = Arc::new(sqlite); - let mut txn = sqlite.start_transaction().await.expect("txn start"); - let (kafka, query, _) = create_or_get_default_records(2, txn.deref_mut()) - .await - .expect("db init failed"); - txn.commit().await.expect("txn commit"); - - let namespace_id = sqlite - .repositories() - .await - .namespaces() - .create("ns3", None, kafka.id, query.id) - .await - .expect("namespace create failed") - .id; - let table_id = sqlite - .repositories() - .await - .tables() - .create_or_get("table", namespace_id) - .await - .expect("create table failed") - .id; - - let key = "bananas"; - - let shards = sqlite - .repositories() - .await - .shards() - .list() - .await - .expect("failed to list shards"); - assert!( - shards.len() > 1, - "expected more shards to be created, got {}", - shards.len() - ); - - let a = sqlite - .repositories() - .await - .partitions() - .create_or_get(key.into(), shards[0].id, table_id) - .await - .expect("should create OK"); - - // Call create_or_get for the same (key, table_id) tuple, setting a - // different shard ID - let b = sqlite - .repositories() - .await - .partitions() - .create_or_get(key.into(), shards[1].id, table_id) - .await - .expect("result should not be evaluated"); - - assert_eq!(a, b); - } - macro_rules! test_column_create_or_get_many_unchecked { ( $name:ident, @@ -1833,7 +1654,7 @@ mod tests { let sqlite: Arc = Arc::new(sqlite); let mut txn = sqlite.start_transaction().await.expect("txn start"); - let (kafka, query, _shards) = create_or_get_default_records(1, txn.deref_mut()) + let (kafka, query) = create_or_get_default_records(txn.deref_mut()) .await .expect("db init failed"); txn.commit().await.expect("txn commit"); @@ -2000,7 +1821,7 @@ mod tests { let sqlite: Arc = Arc::new(sqlite); let mut txn = sqlite.start_transaction().await.expect("txn start"); - let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut()) + let (kafka, query) = create_or_get_default_records(txn.deref_mut()) .await .expect("db init failed"); txn.commit().await.expect("txn commit"); @@ -2023,13 +1844,12 @@ mod tests { .id; let key = "bananas"; - let shard_id = *shards.keys().next().expect("no shard"); let partition_id = sqlite .repositories() .await .partitions() - .create_or_get(key.into(), shard_id, table_id) + .create_or_get(key.into(), table_id) .await .expect("should create OK") .id; @@ -2039,7 +1859,6 @@ mod tests { let time_provider = Arc::new(SystemProvider::new()); let time_now = Timestamp::from(time_provider.now()); let mut p1 = ParquetFileParams { - shard_id, namespace_id, table_id, partition_id, diff --git a/iox_tests/src/builders.rs b/iox_tests/src/builders.rs index fdef83ae8b..48ecc201f1 100644 --- a/iox_tests/src/builders.rs +++ b/iox_tests/src/builders.rs @@ -1,6 +1,6 @@ use data_types::{ ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileId, Partition, PartitionId, - PartitionKey, SequenceNumber, ShardId, SkippedCompaction, Table, TableId, Timestamp, + PartitionKey, SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, }; use uuid::Uuid; @@ -17,7 +17,6 @@ impl ParquetFileBuilder { Self { file: ParquetFile { id: ParquetFileId::new(id), - shard_id: ShardId::new(0), namespace_id: NamespaceId::new(0), table_id: TableId::new(0), partition_id: PartitionId::new(0), @@ -155,7 +154,6 @@ impl PartitionBuilder { Self { partition: Partition { id: PartitionId::new(id), - shard_id: ShardId::new(0), table_id: TableId::new(0), partition_key: PartitionKey::from("key"), sort_key: vec![], diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs index 577ed55a37..c4a3f5ad4a 100644 --- a/iox_tests/src/catalog.rs +++ b/iox_tests/src/catalog.rs @@ -6,8 +6,8 @@ use arrow::{ }; use data_types::{ Column, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceSchema, ParquetFile, - ParquetFileParams, Partition, PartitionId, QueryPool, SequenceNumber, Shard, ShardIndex, Table, - TableId, TableSchema, Timestamp, TopicMetadata, + ParquetFileParams, Partition, PartitionId, QueryPool, SequenceNumber, Table, TableId, + TableSchema, Timestamp, TopicMetadata, }; use datafusion::physical_plan::metrics::Count; use datafusion_util::MemoryStream; @@ -137,21 +137,6 @@ impl TestCatalog { Arc::clone(&self.exec) } - /// Create a shard in the catalog - pub async fn create_shard(self: &Arc, shard_index: i32) -> Arc { - let mut repos = self.catalog.repositories().await; - - let topic = repos.topics().create_or_get("topic").await.unwrap(); - let shard_index = ShardIndex::new(shard_index); - Arc::new( - repos - .shards() - .create_or_get(&topic, shard_index) - .await - .unwrap(), - ) - } - /// Create namespace with specified retention pub async fn create_namespace_with_retention( self: &Arc, @@ -254,23 +239,6 @@ impl TestNamespace { }) } - /// Create a shard for this namespace - pub async fn create_shard(self: &Arc, shard_index: i32) -> Arc { - let mut repos = self.catalog.catalog.repositories().await; - - let shard = repos - .shards() - .create_or_get(&self.topic, ShardIndex::new(shard_index)) - .await - .unwrap(); - - Arc::new(TestShard { - catalog: Arc::clone(&self.catalog), - namespace: Arc::clone(self), - shard, - }) - } - /// Get namespace schema for this namespace. pub async fn schema(&self) -> NamespaceSchema { let mut repos = self.catalog.catalog.repositories().await; @@ -304,15 +272,6 @@ impl TestNamespace { } } -/// A test shard with its namespace in the catalog -#[derive(Debug)] -#[allow(missing_docs)] -pub struct TestShard { - pub catalog: Arc, - pub namespace: Arc, - pub shard: Shard, -} - /// A test table of a namespace in the catalog #[allow(missing_docs)] #[derive(Debug)] @@ -323,16 +282,49 @@ pub struct TestTable { } impl TestTable { - /// Attach a shard to the table - pub fn with_shard(self: &Arc, shard: &Arc) -> Arc { - assert!(Arc::ptr_eq(&self.catalog, &shard.catalog)); - assert!(Arc::ptr_eq(&self.namespace, &shard.namespace)); + /// Creat a partition for the table + pub async fn create_partition(self: &Arc, key: &str) -> Arc { + let mut repos = self.catalog.catalog.repositories().await; - Arc::new(TestTableBoundShard { + let partition = repos + .partitions() + .create_or_get(key.into(), self.table.id) + .await + .unwrap(); + + Arc::new(TestPartition { catalog: Arc::clone(&self.catalog), namespace: Arc::clone(&self.namespace), table: Arc::clone(self), - shard: Arc::clone(shard), + partition, + }) + } + + /// Create a partition with a specified sort key for the table + pub async fn create_partition_with_sort_key( + self: &Arc, + key: &str, + sort_key: &[&str], + ) -> Arc { + let mut repos = self.catalog.catalog.repositories().await; + + let partition = repos + .partitions() + .create_or_get(key.into(), self.table.id) + .await + .unwrap(); + + let partition = repos + .partitions() + .cas_sort_key(partition.id, None, sort_key) + .await + .unwrap(); + + Arc::new(TestPartition { + catalog: Arc::clone(&self.catalog), + namespace: Arc::clone(&self.namespace), + table: Arc::clone(self), + partition, }) } @@ -407,73 +399,13 @@ pub struct TestColumn { pub column: Column, } -/// A test catalog with specified namespace, shard, and table -#[allow(missing_docs)] -pub struct TestTableBoundShard { - pub catalog: Arc, - pub namespace: Arc, - pub table: Arc, - pub shard: Arc, -} - -impl TestTableBoundShard { - /// Creat a partition for the table - pub async fn create_partition(self: &Arc, key: &str) -> Arc { - let mut repos = self.catalog.catalog.repositories().await; - - let partition = repos - .partitions() - .create_or_get(key.into(), self.shard.shard.id, self.table.table.id) - .await - .unwrap(); - - Arc::new(TestPartition { - catalog: Arc::clone(&self.catalog), - namespace: Arc::clone(&self.namespace), - table: Arc::clone(&self.table), - shard: Arc::clone(&self.shard), - partition, - }) - } - - /// Creat a partition with a specified sort key for the table - pub async fn create_partition_with_sort_key( - self: &Arc, - key: &str, - sort_key: &[&str], - ) -> Arc { - let mut repos = self.catalog.catalog.repositories().await; - - let partition = repos - .partitions() - .create_or_get(key.into(), self.shard.shard.id, self.table.table.id) - .await - .unwrap(); - - let partition = repos - .partitions() - .cas_sort_key(partition.id, None, sort_key) - .await - .unwrap(); - - Arc::new(TestPartition { - catalog: Arc::clone(&self.catalog), - namespace: Arc::clone(&self.namespace), - table: Arc::clone(&self.table), - shard: Arc::clone(&self.shard), - partition, - }) - } -} - -/// A test catalog with specified namespace, shard, table, partition +/// A test catalog with specified namespace, table, partition #[allow(missing_docs)] #[derive(Debug)] pub struct TestPartition { pub catalog: Arc, pub namespace: Arc, pub table: Arc, - pub shard: Arc, pub partition: Partition, } @@ -510,7 +442,6 @@ impl TestPartition { catalog: Arc::clone(&self.catalog), namespace: Arc::clone(&self.namespace), table: Arc::clone(&self.table), - shard: Arc::clone(&self.shard), partition, }) } @@ -562,7 +493,6 @@ impl TestPartition { creation_timestamp: now(), namespace_id: self.namespace.namespace.id, namespace_name: self.namespace.namespace.name.clone().into(), - shard_id: self.shard.shard.id, table_id: self.table.table.id, table_name: self.table.table.name.clone().into(), partition_id: self.partition.id, @@ -651,7 +581,6 @@ impl TestPartition { }; let parquet_file_params = ParquetFileParams { - shard_id: self.shard.shard.id, namespace_id: self.namespace.namespace.id, table_id: self.table.table.id, partition_id: self.partition.id, @@ -686,7 +615,6 @@ impl TestPartition { catalog: Arc::clone(&self.catalog), namespace: Arc::clone(&self.namespace), table: Arc::clone(&self.table), - shard: Arc::clone(&self.shard), partition: Arc::clone(self), parquet_file, size_override, @@ -895,7 +823,6 @@ pub struct TestParquetFile { pub catalog: Arc, pub namespace: Arc, pub table: Arc, - pub shard: Arc, pub partition: Arc, pub parquet_file: ParquetFile, pub size_override: Option, diff --git a/iox_tests/src/lib.rs b/iox_tests/src/lib.rs index b3d1ade6b8..403a177f51 100644 --- a/iox_tests/src/lib.rs +++ b/iox_tests/src/lib.rs @@ -14,8 +14,7 @@ mod catalog; pub use catalog::{ - TestCatalog, TestNamespace, TestParquetFile, TestParquetFileBuilder, TestPartition, TestShard, - TestTable, + TestCatalog, TestNamespace, TestParquetFile, TestParquetFileBuilder, TestPartition, TestTable, }; mod builders; diff --git a/ioxd_compactor2/src/lib.rs b/ioxd_compactor2/src/lib.rs index e2439c8cab..19d8ba81a6 100644 --- a/ioxd_compactor2/src/lib.rs +++ b/ioxd_compactor2/src/lib.rs @@ -5,7 +5,7 @@ use compactor2::{ compactor::Compactor2, config::{Config, PartitionsSourceConfig, ShardConfig}, }; -use data_types::{PartitionId, TRANSITION_SHARD_NUMBER}; +use data_types::PartitionId; use hyper::{Body, Request, Response}; use iox_catalog::interface::Catalog; use iox_query::exec::Executor; @@ -28,10 +28,6 @@ use std::{ use tokio_util::sync::CancellationToken; use trace::TraceCollector; -// There is only one shard with index 1 -const TOPIC: &str = "iox-shared"; -const TRANSITION_SHARD_INDEX: i32 = TRANSITION_SHARD_NUMBER; - pub struct Compactor2ServerType { compactor: Compactor2, metric_registry: Arc, @@ -174,16 +170,8 @@ pub async fn create_compactor2_server_type( CompactionType::Cold => compactor2::config::CompactionType::Cold, }; - let shard_id = Config::fetch_shard_id( - Arc::clone(&catalog), - backoff_config.clone(), - TOPIC.to_string(), - TRANSITION_SHARD_INDEX, - ) - .await; let compactor = Compactor2::start(Config { compaction_type, - shard_id, metric_registry: Arc::clone(&metric_registry), catalog, parquet_store_real, diff --git a/ioxd_querier/src/rpc/namespace.rs b/ioxd_querier/src/rpc/namespace.rs index a599a1ae87..03f6e24399 100644 --- a/ioxd_querier/src/rpc/namespace.rs +++ b/ioxd_querier/src/rpc/namespace.rs @@ -118,9 +118,6 @@ mod tests { async fn test_get_namespaces_empty() { let catalog = TestCatalog::new(); - // QuerierDatabase::new returns an error if there are no shards in the catalog - catalog.create_shard(0).await; - let catalog_cache = Arc::new(QuerierCatalogCache::new_testing( catalog.catalog(), catalog.time_provider(), @@ -154,9 +151,6 @@ mod tests { async fn test_get_namespaces() { let catalog = TestCatalog::new(); - // QuerierDatabase::new returns an error if there are no shards in the catalog - catalog.create_shard(0).await; - let catalog_cache = Arc::new(QuerierCatalogCache::new_testing( catalog.catalog(), catalog.time_provider(), diff --git a/parquet_file/src/lib.rs b/parquet_file/src/lib.rs index 8b179fb059..966e5eea1c 100644 --- a/parquet_file/src/lib.rs +++ b/parquet_file/src/lib.rs @@ -20,9 +20,7 @@ pub mod metadata; pub mod serialize; pub mod storage; -use data_types::{ - NamespaceId, ParquetFile, ParquetFileParams, PartitionId, ShardId, TableId, TRANSITION_SHARD_ID, -}; +use data_types::{NamespaceId, ParquetFile, ParquetFileParams, PartitionId, TableId}; use object_store::path::Path; use uuid::Uuid; @@ -32,7 +30,6 @@ use uuid::Uuid; pub struct ParquetFilePath { namespace_id: NamespaceId, table_id: TableId, - shard_id: ShardId, partition_id: PartitionId, object_store_id: Uuid, } @@ -42,14 +39,12 @@ impl ParquetFilePath { pub fn new( namespace_id: NamespaceId, table_id: TableId, - shard_id: ShardId, partition_id: PartitionId, object_store_id: Uuid, ) -> Self { Self { namespace_id, table_id, - shard_id, partition_id, object_store_id, } @@ -60,26 +55,15 @@ impl ParquetFilePath { let Self { namespace_id, table_id, - shard_id, partition_id, object_store_id, } = self; - if shard_id == &TRANSITION_SHARD_ID { - Path::from_iter([ - namespace_id.to_string().as_str(), - table_id.to_string().as_str(), - partition_id.to_string().as_str(), - &format!("{object_store_id}.parquet"), - ]) - } else { - Path::from_iter([ - namespace_id.to_string().as_str(), - table_id.to_string().as_str(), - shard_id.to_string().as_str(), - partition_id.to_string().as_str(), - &format!("{object_store_id}.parquet"), - ]) - } + Path::from_iter([ + namespace_id.to_string().as_str(), + table_id.to_string().as_str(), + partition_id.to_string().as_str(), + &format!("{object_store_id}.parquet"), + ]) } /// Get object store ID. @@ -107,7 +91,6 @@ impl From<&crate::metadata::IoxMetadata> for ParquetFilePath { Self { namespace_id: m.namespace_id, table_id: m.table_id, - shard_id: m.shard_id, partition_id: m.partition_id, object_store_id: m.object_store_id, } @@ -119,7 +102,6 @@ impl From<&ParquetFile> for ParquetFilePath { Self { namespace_id: f.namespace_id, table_id: f.table_id, - shard_id: f.shard_id, partition_id: f.partition_id, object_store_id: f.object_store_id, } @@ -131,7 +113,6 @@ impl From<&ParquetFileParams> for ParquetFilePath { Self { namespace_id: f.namespace_id, table_id: f.table_id, - shard_id: f.shard_id, partition_id: f.partition_id, object_store_id: f.object_store_id, } @@ -147,23 +128,6 @@ mod tests { let pfp = ParquetFilePath::new( NamespaceId::new(1), TableId::new(2), - ShardId::new(3), - PartitionId::new(4), - Uuid::nil(), - ); - let path = pfp.object_store_path(); - assert_eq!( - path.to_string(), - "1/2/3/4/00000000-0000-0000-0000-000000000000.parquet".to_string(), - ); - } - - #[test] - fn parquet_file_without_shard_id() { - let pfp = ParquetFilePath::new( - NamespaceId::new(1), - TableId::new(2), - TRANSITION_SHARD_ID, PartitionId::new(4), Uuid::nil(), ); diff --git a/parquet_file/src/metadata.rs b/parquet_file/src/metadata.rs index e59c31490b..5bf26cec17 100644 --- a/parquet_file/src/metadata.rs +++ b/parquet_file/src/metadata.rs @@ -90,8 +90,8 @@ use base64::{prelude::BASE64_STANDARD, Engine}; use bytes::Bytes; use data_types::{ ColumnId, ColumnSet, ColumnSummary, CompactionLevel, InfluxDbType, NamespaceId, - ParquetFileParams, PartitionId, PartitionKey, SequenceNumber, ShardId, StatValues, Statistics, - TableId, Timestamp, + ParquetFileParams, PartitionId, PartitionKey, SequenceNumber, StatValues, Statistics, TableId, + Timestamp, }; use generated_types::influxdata::iox::ingester::v1 as proto; use iox_time::Time; @@ -262,9 +262,6 @@ pub struct IoxMetadata { /// namespace name of the data pub namespace_name: Arc, - /// shard id of the data - pub shard_id: ShardId, - /// table id of the data pub table_id: TableId, @@ -339,7 +336,6 @@ impl IoxMetadata { creation_timestamp: Some(self.creation_timestamp.date_time().into()), namespace_id: self.namespace_id.get(), namespace_name: self.namespace_name.to_string(), - shard_id: self.shard_id.get(), table_id: self.table_id.get(), table_name: self.table_name.to_string(), partition_id: self.partition_id.get(), @@ -392,7 +388,6 @@ impl IoxMetadata { creation_timestamp, namespace_id: NamespaceId::new(proto_msg.namespace_id), namespace_name, - shard_id: ShardId::new(proto_msg.shard_id), table_id: TableId::new(proto_msg.table_id), table_name, partition_id: PartitionId::new(proto_msg.partition_id), @@ -418,7 +413,6 @@ impl IoxMetadata { creation_timestamp: Time::from_timestamp_nanos(creation_timestamp_ns), namespace_id: NamespaceId::new(1), namespace_name: "external".into(), - shard_id: ShardId::new(1), table_id: TableId::new(1), table_name: table_name.into(), partition_id: PartitionId::new(1), @@ -501,7 +495,6 @@ impl IoxMetadata { }; ParquetFileParams { - shard_id: self.shard_id, namespace_id: self.namespace_id, table_id: self.table_id, partition_id: self.partition_id, @@ -1020,7 +1013,6 @@ mod tests { creation_timestamp: create_time, namespace_id: NamespaceId::new(2), namespace_name: Arc::from("hi"), - shard_id: ShardId::new(1), table_id: TableId::new(3), table_name: Arc::from("weather"), partition_id: PartitionId::new(4), @@ -1045,7 +1037,6 @@ mod tests { creation_timestamp: Time::from_timestamp_nanos(42), namespace_id: NamespaceId::new(1), namespace_name: "bananas".into(), - shard_id: ShardId::new(2), table_id: TableId::new(3), table_name: "platanos".into(), partition_id: PartitionId::new(4), diff --git a/parquet_file/src/serialize.rs b/parquet_file/src/serialize.rs index e7920d3ef5..dfb8b094a2 100644 --- a/parquet_file/src/serialize.rs +++ b/parquet_file/src/serialize.rs @@ -197,7 +197,7 @@ mod tests { record_batch::RecordBatch, }; use bytes::Bytes; - use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, ShardId, TableId}; + use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId}; use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use datafusion_util::MemoryStream; use iox_time::Time; @@ -210,7 +210,6 @@ mod tests { creation_timestamp: Time::from_timestamp_nanos(42), namespace_id: NamespaceId::new(1), namespace_name: "bananas".into(), - shard_id: ShardId::new(2), table_id: TableId::new(3), table_name: "platanos".into(), partition_id: PartitionId::new(4), diff --git a/parquet_file/src/storage.rs b/parquet_file/src/storage.rs index cce3787040..52adf34302 100644 --- a/parquet_file/src/storage.rs +++ b/parquet_file/src/storage.rs @@ -323,7 +323,7 @@ mod tests { array::{ArrayRef, Int64Array, StringArray}, record_batch::RecordBatch, }; - use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, ShardId, TableId}; + use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId}; use datafusion::common::DataFusionError; use datafusion_util::MemoryStream; use iox_time::Time; @@ -575,7 +575,6 @@ mod tests { creation_timestamp: Time::from_timestamp_nanos(42), namespace_id: NamespaceId::new(1), namespace_name: "bananas".into(), - shard_id: ShardId::new(2), table_id: TableId::new(3), table_name: "platanos".into(), partition_id: PartitionId::new(4), diff --git a/parquet_file/tests/metadata.rs b/parquet_file/tests/metadata.rs index 24036e84fd..f8a8d0df9d 100644 --- a/parquet_file/tests/metadata.rs +++ b/parquet_file/tests/metadata.rs @@ -5,8 +5,7 @@ use arrow::{ record_batch::RecordBatch, }; use data_types::{ - ColumnId, CompactionLevel, NamespaceId, PartitionId, SequenceNumber, ShardId, TableId, - Timestamp, + ColumnId, CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId, Timestamp, }; use datafusion_util::MemoryStream; use iox_time::Time; @@ -54,7 +53,6 @@ async fn test_decoded_iox_metadata() { creation_timestamp: Time::from_timestamp_nanos(42), namespace_id: NamespaceId::new(1), namespace_name: "bananas".into(), - shard_id: ShardId::new(2), table_id: TableId::new(3), table_name: "platanos".into(), partition_id: PartitionId::new(4), @@ -196,7 +194,6 @@ async fn test_empty_parquet_file_panic() { creation_timestamp: Time::from_timestamp_nanos(42), namespace_id: NamespaceId::new(1), namespace_name: "bananas".into(), - shard_id: ShardId::new(2), table_id: TableId::new(3), table_name: "platanos".into(), partition_id: PartitionId::new(4), @@ -291,7 +288,6 @@ async fn test_decoded_many_columns_with_null_cols_iox_metadata() { creation_timestamp: Time::from_timestamp_nanos(42), namespace_id: NamespaceId::new(1), namespace_name: "bananas".into(), - shard_id: ShardId::new(2), table_id: TableId::new(3), table_name: "platanos".into(), partition_id: PartitionId::new(4), @@ -380,7 +376,6 @@ async fn test_derive_parquet_file_params() { creation_timestamp: Time::from_timestamp_nanos(1234), namespace_id: NamespaceId::new(1), namespace_name: "bananas".into(), - shard_id: ShardId::new(2), table_id: TableId::new(3), table_name: "platanos".into(), partition_id, @@ -425,7 +420,6 @@ async fn test_derive_parquet_file_params() { // // NOTE: thrift-encoded metadata not checked // TODO: check thrift-encoded metadata which may be the issue of bug 4695 - assert_eq!(catalog_data.shard_id, meta.shard_id); assert_eq!(catalog_data.namespace_id, meta.namespace_id); assert_eq!(catalog_data.table_id, meta.table_id); assert_eq!(catalog_data.partition_id, meta.partition_id); diff --git a/querier/src/cache/parquet_file.rs b/querier/src/cache/parquet_file.rs index d9c61f0426..f231093ad0 100644 --- a/querier/src/cache/parquet_file.rs +++ b/querier/src/cache/parquet_file.rs @@ -347,8 +347,8 @@ mod tests { partition.create_parquet_file(builder).await; let table_id = table.table.id; - let single_file_size = 232; - let two_file_size = 424; + let single_file_size = 224; + let two_file_size = 408; assert!(single_file_size < two_file_size); let cache = make_cache(&catalog); @@ -444,9 +444,8 @@ mod tests { let table = ns.create_table(table_name).await; table.create_column("foo", ColumnType::F64).await; table.create_column("time", ColumnType::Time).await; - let shard1 = ns.create_shard(1).await; - let partition = table.with_shard(&shard1).create_partition("k").await; + let partition = table.create_partition("k").await; (table, partition) } diff --git a/querier/src/cache/partition.rs b/querier/src/cache/partition.rs index 1a7b1ece77..a3de22d0ff 100644 --- a/querier/src/cache/partition.rs +++ b/querier/src/cache/partition.rs @@ -11,7 +11,7 @@ use cache_system::{ loader::{metrics::MetricsLoader, FunctionLoader}, resource_consumption::FunctionEstimator, }; -use data_types::{ColumnId, PartitionId, ShardId}; +use data_types::{ColumnId, PartitionId}; use iox_catalog::interface::Catalog; use iox_time::TimeProvider; use schema::sort::SortKey; @@ -74,10 +74,7 @@ impl PartitionCache { Arc::new(PartitionSortKey::new(sort_key, &extra.column_id_map_rev)) }); - Some(CachedPartition { - shard_id: partition.shard_id, - sort_key, - }) + Some(CachedPartition { sort_key }) } }); let loader = Arc::new(MetricsLoader::new( @@ -118,19 +115,6 @@ impl PartitionCache { } } - /// Get shard ID. - pub async fn shard_id( - &self, - cached_table: Arc, - partition_id: PartitionId, - span: Option, - ) -> Option { - self.cache - .get(partition_id, (cached_table, span)) - .await - .map(|p| p.shard_id) - } - /// Get sort key /// /// Expire partition if the cached sort key does NOT cover the given set of columns. @@ -164,7 +148,6 @@ impl PartitionCache { #[derive(Debug, Clone)] struct CachedPartition { - shard_id: ShardId, sort_key: Option>, } @@ -227,74 +210,6 @@ mod tests { use iox_tests::TestCatalog; use schema::{Schema, SchemaBuilder}; - #[tokio::test] - async fn test_shard_id() { - let catalog = TestCatalog::new(); - - let ns = catalog.create_namespace_1hr_retention("ns").await; - let t = ns.create_table("table").await; - let s1 = ns.create_shard(1).await; - let s2 = ns.create_shard(2).await; - let p1 = t - .with_shard(&s1) - .create_partition("k1") - .await - .partition - .clone(); - let p2 = t - .with_shard(&s2) - .create_partition("k2") - .await - .partition - .clone(); - let cached_table = Arc::new(CachedTable { - id: t.table.id, - schema: schema(), - column_id_map: HashMap::default(), - column_id_map_rev: HashMap::default(), - primary_key_column_ids: vec![], - }); - - let cache = PartitionCache::new( - catalog.catalog(), - BackoffConfig::default(), - catalog.time_provider(), - &catalog.metric_registry(), - test_ram_pool(), - true, - ); - - let id1 = cache - .shard_id(Arc::clone(&cached_table), p1.id, None) - .await - .unwrap(); - assert_eq!(id1, s1.shard.id); - assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1); - - let id2 = cache - .shard_id(Arc::clone(&cached_table), p2.id, None) - .await - .unwrap(); - assert_eq!(id2, s2.shard.id); - assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2); - - let id1 = cache - .shard_id(Arc::clone(&cached_table), p1.id, None) - .await - .unwrap(); - assert_eq!(id1, s1.shard.id); - assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2); - - // non-existing partition - for _ in 0..2 { - let res = cache - .shard_id(Arc::clone(&cached_table), PartitionId::new(i64::MAX), None) - .await; - assert_eq!(res, None); - assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3); - } - } - #[tokio::test] async fn test_sort_key() { let catalog = TestCatalog::new(); @@ -303,16 +218,12 @@ mod tests { let t = ns.create_table("table").await; let c1 = t.create_column("tag", ColumnType::Tag).await; let c2 = t.create_column("time", ColumnType::Time).await; - let s1 = ns.create_shard(1).await; - let s2 = ns.create_shard(2).await; let p1 = t - .with_shard(&s1) .create_partition_with_sort_key("k1", &["tag", "time"]) .await .partition .clone(); let p2 = t - .with_shard(&s2) .create_partition("k2") // no sort key .await .partition @@ -391,26 +302,13 @@ mod tests { let t = ns.create_table("table").await; let c1 = t.create_column("tag", ColumnType::Tag).await; let c2 = t.create_column("time", ColumnType::Time).await; - let s1 = ns.create_shard(1).await; - let s2 = ns.create_shard(2).await; let p1 = t - .with_shard(&s1) .create_partition_with_sort_key("k1", &["tag", "time"]) .await .partition .clone(); - let p2 = t - .with_shard(&s2) - .create_partition("k2") - .await - .partition - .clone(); - let p3 = t - .with_shard(&s2) - .create_partition("k3") - .await - .partition - .clone(); + let p2 = t.create_partition("k2").await.partition.clone(); + let p3 = t.create_partition("k3").await.partition.clone(); let cached_table = Arc::new(CachedTable { id: t.table.id, schema: schema(), @@ -434,22 +332,19 @@ mod tests { true, ); - cache.shard_id(Arc::clone(&cached_table), p2.id, None).await; cache .sort_key(Arc::clone(&cached_table), p3.id, &Vec::new(), None) .await; - assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2); + assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1); - cache.shard_id(Arc::clone(&cached_table), p1.id, None).await; cache .sort_key(Arc::clone(&cached_table), p2.id, &Vec::new(), None) .await; - assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3); + assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2); cache .sort_key(Arc::clone(&cached_table), p1.id, &Vec::new(), None) .await; - cache.shard_id(Arc::clone(&cached_table), p2.id, None).await; assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3); } @@ -461,8 +356,7 @@ mod tests { let t = ns.create_table("table").await; let c1 = t.create_column("foo", ColumnType::Tag).await; let c2 = t.create_column("time", ColumnType::Time).await; - let s = ns.create_shard(1).await; - let p = t.with_shard(&s).create_partition("k1").await; + let p = t.create_partition("k1").await; let p_id = p.partition.id; let p_sort_key = p.partition.sort_key(); let cached_table = Arc::new(CachedTable { diff --git a/querier/src/database.rs b/querier/src/database.rs index 3e3b423902..9b27ee0aef 100644 --- a/querier/src/database.rs +++ b/querier/src/database.rs @@ -234,8 +234,6 @@ mod tests { #[tokio::test] async fn test_namespace() { let catalog = TestCatalog::new(); - // QuerierDatabase::new returns an error if there are no shards in the catalog - catalog.create_shard(0).await; let catalog_cache = Arc::new(CatalogCache::new_testing( catalog.catalog(), @@ -264,8 +262,6 @@ mod tests { #[tokio::test] async fn test_namespaces() { let catalog = TestCatalog::new(); - // QuerierDatabase::new returns an error if there are no shards in the catalog - catalog.create_shard(0).await; let catalog_cache = Arc::new(CatalogCache::new_testing( catalog.catalog(), diff --git a/querier/src/handler.rs b/querier/src/handler.rs index ff18e8d559..94e57a4c4a 100644 --- a/querier/src/handler.rs +++ b/querier/src/handler.rs @@ -160,7 +160,6 @@ impl Drop for QuerierHandlerImpl { mod tests { use super::*; use crate::{cache::CatalogCache, create_ingester_connection_for_testing}; - use data_types::ShardIndex; use iox_catalog::mem::MemCatalog; use iox_query::exec::Executor; use iox_time::{MockProvider, Time}; @@ -204,18 +203,6 @@ mod tests { Arc::clone(&object_store), &Handle::current(), )); - // QuerierDatabase::new returns an error if there are no shards in the catalog - { - let mut repos = catalog.repositories().await; - - let topic = repos.topics().create_or_get("topic").await.unwrap(); - let shard_index = ShardIndex::new(0); - repos - .shards() - .create_or_get(&topic, shard_index) - .await - .unwrap(); - } let database = Arc::new( QuerierDatabase::new( diff --git a/querier/src/ingester/mod.rs b/querier/src/ingester/mod.rs index 4b1a76d4e3..50dd83e30d 100644 --- a/querier/src/ingester/mod.rs +++ b/querier/src/ingester/mod.rs @@ -13,8 +13,8 @@ use async_trait::async_trait; use backoff::{Backoff, BackoffConfig, BackoffError}; use client_util::connection; use data_types::{ - ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, SequenceNumber, ShardId, - ShardIndex, TableSummary, TimestampMinMax, + ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, SequenceNumber, TableSummary, + TimestampMinMax, }; use datafusion::error::DataFusionError; use futures::{stream::FuturesUnordered, TryStreamExt}; @@ -127,16 +127,6 @@ pub enum Error { ingester_address: String, }, - #[snafu(display( - "No ingester found in shard to ingester mapping for shard index {shard_index}" - ))] - NoIngesterFoundForShard { shard_index: ShardIndex }, - - #[snafu(display( - "Shard index {shard_index} was neither mapped to an ingester nor marked ignore" - ))] - ShardNotMapped { shard_index: ShardIndex }, - #[snafu(display("Could not parse `{ingester_uuid}` as a UUID: {source}"))] IngesterUuid { ingester_uuid: String, @@ -498,46 +488,13 @@ async fn execute( decoder.finalize().await } -/// Current partition used while decoding the ingester response stream. -#[derive(Debug)] -enum CurrentPartition { - /// There exists a partition. - Some(IngesterPartition), - - /// There is no existing partition. - None, - - /// Skip the current partition (e.g. because it is gone from the catalog). - Skip, -} - -impl CurrentPartition { - fn take(&mut self) -> Option { - let mut tmp = Self::None; - std::mem::swap(&mut tmp, self); - - match tmp { - Self::None | Self::Skip => None, - Self::Some(p) => Some(p), - } - } - - fn is_skip(&self) -> bool { - matches!(self, Self::Skip) - } - - fn is_some(&self) -> bool { - matches!(self, Self::Some(_)) - } -} - /// Helper to disassemble the data from the ingester Apache Flight arrow stream. /// /// This should be used AFTER the stream was drained because we will perform some catalog IO and /// this should likely not block the ingester. struct IngesterStreamDecoder { finished_partitions: HashMap, - current_partition: CurrentPartition, + current_partition: Option, current_chunk: Option<(Schema, Vec)>, ingester_address: Arc, catalog_cache: Arc, @@ -555,7 +512,7 @@ impl IngesterStreamDecoder { ) -> Self { Self { finished_partitions: HashMap::new(), - current_partition: CurrentPartition::None, + current_partition: None, current_chunk: None, ingester_address, catalog_cache, @@ -571,11 +528,8 @@ impl IngesterStreamDecoder { .current_partition .take() .expect("Partition should have been checked before chunk creation"); - self.current_partition = CurrentPartition::Some(current_partition.try_add_chunk( - ChunkId::new(), - schema, - batches, - )?); + self.current_partition = + Some(current_partition.try_add_chunk(ChunkId::new(), schema, batches)?); } Ok(()) @@ -648,21 +602,6 @@ impl IngesterStreamDecoder { ingester_address: self.ingester_address.as_ref() }, ); - let shard_id = self - .catalog_cache - .partition() - .shard_id( - Arc::clone(&self.cached_table), - partition_id, - self.span_recorder - .child_span("cache GET partition shard ID"), - ) - .await; - - let Some(shard_id) = shard_id else { - self.current_partition = CurrentPartition::Skip; - return Ok(()) - }; // Use a temporary empty partition sort key. We are going to fetch this AFTER we // know all chunks because then we are able to detect all relevant primary key @@ -683,18 +622,13 @@ impl IngesterStreamDecoder { let partition = IngesterPartition::new( ingester_uuid, partition_id, - shard_id, md.completed_persistence_count, status.parquet_max_sequence_number.map(SequenceNumber::new), partition_sort_key, ); - self.current_partition = CurrentPartition::Some(partition); + self.current_partition = Some(partition); } DecodedPayload::Schema(schema) => { - if self.current_partition.is_skip() { - return Ok(()); - } - self.flush_chunk()?; ensure!( self.current_partition.is_some(), @@ -716,10 +650,6 @@ impl IngesterStreamDecoder { self.current_chunk = Some((schema, vec![])); } DecodedPayload::RecordBatch(batch) => { - if self.current_partition.is_skip() { - return Ok(()); - } - let current_chunk = self.current_chunk .as_mut() @@ -771,7 +701,7 @@ fn encode_predicate_as_base64(predicate: &Predicate) -> String { #[async_trait] impl IngesterConnection for IngesterConnectionImpl { - /// Retrieve chunks from the ingester for the particular table, shard, and predicate + /// Retrieve chunks from the ingester for the particular table and predicate async fn partitions( &self, namespace_id: NamespaceId, @@ -871,12 +801,11 @@ impl IngesterConnection for IngesterConnectionImpl { /// Given the catalog hierarchy: /// /// ```text -/// (Catalog) Shard -> (Catalog) Table --> (Catalog) Partition +/// (Catalog) Table --> (Catalog) Partition /// ``` /// -/// An IngesterPartition contains the unpersisted data for a catalog -/// partition from a shard. Thus, there can be more than one -/// IngesterPartition for each table the ingester knows about. +/// An IngesterPartition contains the unpersisted data for a catalog partition. Thus, there can be +/// more than one IngesterPartition for each table the ingester knows about. #[derive(Debug, Clone)] pub struct IngesterPartition { /// If using ingester2/rpc write path, the ingester UUID will be present and will identify @@ -887,7 +816,6 @@ pub struct IngesterPartition { ingester_uuid: Option, partition_id: PartitionId, - shard_id: ShardId, /// If using ingester2/rpc write path, this will be the number of Parquet files this ingester /// UUID has persisted for this partition. @@ -910,7 +838,6 @@ impl IngesterPartition { pub fn new( ingester_uuid: Option, partition_id: PartitionId, - shard_id: ShardId, completed_persistence_count: u64, parquet_max_sequence_number: Option, partition_sort_key: Option>, @@ -918,7 +845,6 @@ impl IngesterPartition { Self { ingester_uuid, partition_id, - shard_id, completed_persistence_count, parquet_max_sequence_number, partition_sort_key, @@ -996,10 +922,6 @@ impl IngesterPartition { self.partition_id } - pub(crate) fn shard_id(&self) -> ShardId { - self.shard_id - } - pub(crate) fn completed_persistence_count(&self) -> u64 { self.completed_persistence_count } @@ -1322,64 +1244,6 @@ mod tests { assert!(partitions.is_empty()); } - #[tokio::test] - async fn test_flight_unknown_partitions() { - let ingester_uuid = Uuid::new_v4(); - let record_batch = lp_to_record_batch("table foo=1 1"); - - let schema = record_batch.schema(); - - let mock_flight_client = Arc::new( - MockFlightClient::new([( - "addr1", - Ok(MockQueryData { - results: vec![ - metadata( - 1000, - Some(PartitionStatus { - parquet_max_sequence_number: Some(11), - }), - ingester_uuid.to_string(), - 3, - ), - metadata( - 1001, - Some(PartitionStatus { - parquet_max_sequence_number: Some(11), - }), - ingester_uuid.to_string(), - 4, - ), - Ok(( - DecodedPayload::Schema(Arc::clone(&schema)), - IngesterQueryResponseMetadata::default(), - )), - metadata( - 1002, - Some(PartitionStatus { - parquet_max_sequence_number: Some(11), - }), - ingester_uuid.to_string(), - 5, - ), - Ok(( - DecodedPayload::Schema(Arc::clone(&schema)), - IngesterQueryResponseMetadata::default(), - )), - Ok(( - DecodedPayload::RecordBatch(record_batch), - IngesterQueryResponseMetadata::default(), - )), - ], - }), - )]) - .await, - ); - let ingester_conn = mock_flight_client.ingester_conn().await; - let partitions = get_partitions(&ingester_conn).await.unwrap(); - assert!(partitions.is_empty()); - } - #[tokio::test] async fn test_flight_no_batches() { let ingester_uuid = Uuid::new_v4(); @@ -1515,7 +1379,7 @@ mod tests { } #[tokio::test] - async fn test_flight_many_batches_no_shard() { + async fn test_flight_many_batches() { let ingester_uuid1 = Uuid::new_v4(); let ingester_uuid2 = Uuid::new_v4(); @@ -1958,12 +1822,9 @@ mod tests { let ns = catalog.create_namespace_1hr_retention("namespace").await; let table = ns.create_table("table").await; - let s0 = ns.create_shard(0).await; - let s1 = ns.create_shard(1).await; - - table.with_shard(&s0).create_partition("k1").await; - table.with_shard(&s0).create_partition("k2").await; - table.with_shard(&s1).create_partition("k3").await; + table.create_partition("k1").await; + table.create_partition("k2").await; + table.create_partition("k3").await; Self { catalog, @@ -2038,7 +1899,6 @@ mod tests { let ingester_partition = IngesterPartition::new( Some(ingester_uuid), PartitionId::new(1), - ShardId::new(1), 0, parquet_max_sequence_number, None, @@ -2068,7 +1928,6 @@ mod tests { let err = IngesterPartition::new( Some(ingester_uuid), PartitionId::new(1), - ShardId::new(1), 0, parquet_max_sequence_number, None, diff --git a/querier/src/namespace/query_access.rs b/querier/src/namespace/query_access.rs index e09777918c..4ce9353935 100644 --- a/querier/src/namespace/query_access.rs +++ b/querier/src/namespace/query_access.rs @@ -224,9 +224,6 @@ mod tests { // namespace with infinite retention policy let ns = catalog.create_namespace_with_retention("ns", None).await; - let shard1 = ns.create_shard(1).await; - let shard2 = ns.create_shard(2).await; - let table_cpu = ns.create_table("cpu").await; let table_mem = ns.create_table("mem").await; @@ -238,11 +235,11 @@ mod tests { table_mem.create_column("time", ColumnType::Time).await; table_mem.create_column("perc", ColumnType::F64).await; - let partition_cpu_a_1 = table_cpu.with_shard(&shard1).create_partition("a").await; - let partition_cpu_a_2 = table_cpu.with_shard(&shard2).create_partition("a").await; - let partition_cpu_b_1 = table_cpu.with_shard(&shard1).create_partition("b").await; - let partition_mem_c_1 = table_mem.with_shard(&shard1).create_partition("c").await; - let partition_mem_c_2 = table_mem.with_shard(&shard2).create_partition("c").await; + let partition_cpu_a_1 = table_cpu.create_partition("a").await; + let partition_cpu_a_2 = table_cpu.create_partition("a").await; + let partition_cpu_b_1 = table_cpu.create_partition("b").await; + let partition_mem_c_1 = table_mem.create_partition("c").await; + let partition_mem_c_2 = table_mem.create_partition("c").await; let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(1)) @@ -322,8 +319,6 @@ mod tests { .flag_for_delete() .await; - table_mem.with_shard(&shard1); - let querier_namespace = Arc::new(querier_namespace(&ns).await); let traces = Arc::new(RingBufferTraceCollector::new(100)); diff --git a/querier/src/parquet/mod.rs b/querier/src/parquet/mod.rs index a72fc22811..37b89eacde 100644 --- a/querier/src/parquet/mod.rs +++ b/querier/src/parquet/mod.rs @@ -233,7 +233,6 @@ pub mod tests { ] .join("\n"); let ns = catalog.create_namespace_1hr_retention("ns").await; - let shard = ns.create_shard(1).await; let table = ns.create_table("table").await; table.create_column("tag1", ColumnType::Tag).await; table.create_column("tag2", ColumnType::Tag).await; @@ -243,7 +242,6 @@ pub mod tests { table.create_column("field_float", ColumnType::F64).await; table.create_column("time", ColumnType::Time).await; let partition = table - .with_shard(&shard) .create_partition("part") .await .update_sort_key(SortKey::from_columns(["tag1", "tag2", "tag4", "time"])) diff --git a/querier/src/table/mod.rs b/querier/src/table/mod.rs index 03f1bc5f2c..9b57c3fb50 100644 --- a/querier/src/table/mod.rs +++ b/querier/src/table/mod.rs @@ -491,14 +491,13 @@ mod tests { let outside_retention = inside_retention - Duration::from_secs(2 * 60 * 60).as_nanos() as i64; // 2 hours ago - let shard = ns.create_shard(1).await; let table = ns.create_table("cpu").await; table.create_column("host", ColumnType::Tag).await; table.create_column("time", ColumnType::Time).await; table.create_column("load", ColumnType::F64).await; - let partition = table.with_shard(&shard).create_partition("a").await; + let partition = table.create_partition("a").await; let querier_table = TestQuerierTable::new(&catalog, &table).await; @@ -577,12 +576,9 @@ mod tests { let table1 = ns.create_table("table1").await; let table2 = ns.create_table("table2").await; - let shard1 = ns.create_shard(1).await; - let shard2 = ns.create_shard(2).await; - - let partition11 = table1.with_shard(&shard1).create_partition("k").await; - let partition12 = table1.with_shard(&shard2).create_partition("k").await; - let partition21 = table2.with_shard(&shard1).create_partition("k").await; + let partition11 = table1.create_partition("k").await; + let partition12 = table1.create_partition("k").await; + let partition21 = table2.create_partition("k").await; table1.create_column("time", ColumnType::Time).await; table1.create_column("foo", ColumnType::F64).await; @@ -704,12 +700,11 @@ mod tests { let catalog = TestCatalog::new(); let ns = catalog.create_namespace_1hr_retention("ns").await; let table = ns.create_table("table").await; - let shard = ns.create_shard(1).await; - let partition = table.with_shard(&shard).create_partition("k").await; + let partition = table.create_partition("k").await; let schema = make_schema_two_fields_two_tags(&table).await; // let add a partion from the ingester - let builder = IngesterPartitionBuilder::new(schema, &shard, &partition) + let builder = IngesterPartitionBuilder::new(schema, &partition) .with_lp(["table,tag1=val1,tag2=val2 foo=3,bar=4 11"]); let ingester_partition = @@ -773,12 +768,10 @@ mod tests { let catalog = TestCatalog::new(); let ns = catalog.create_namespace_1hr_retention("ns").await; let table = ns.create_table("table1").await; - let shard = ns.create_shard(1).await; - let partition = table.with_shard(&shard).create_partition("k").await; + let partition = table.create_partition("k").await; let schema = make_schema(&table).await; - let builder = - IngesterPartitionBuilder::new(schema, &shard, &partition).with_lp(["table foo=1 1"]); + let builder = IngesterPartitionBuilder::new(schema, &partition).with_lp(["table foo=1 1"]); // Parquet file between with max sequence number 2 let pf_builder = TestParquetFileBuilder::default() diff --git a/querier/src/table/state_reconciler.rs b/querier/src/table/state_reconciler.rs index fe1f839d0f..3b22495cc7 100644 --- a/querier/src/table/state_reconciler.rs +++ b/querier/src/table/state_reconciler.rs @@ -220,12 +220,11 @@ mod tests { interface::{IngesterPartitionInfo, ParquetFileInfo}, *, }; - use data_types::{CompactionLevel, SequenceNumber, ShardId}; + use data_types::{CompactionLevel, SequenceNumber}; #[derive(Debug)] struct MockIngesterPartitionInfo { partition_id: PartitionId, - shard_id: ShardId, parquet_max_sequence_number: Option, } @@ -234,10 +233,6 @@ mod tests { self.partition_id } - fn shard_id(&self) -> ShardId { - self.shard_id - } - fn parquet_max_sequence_number(&self) -> Option { self.parquet_max_sequence_number } diff --git a/querier/src/table/state_reconciler/interface.rs b/querier/src/table/state_reconciler/interface.rs index 1fd4eaa6ca..ba7bd95afe 100644 --- a/querier/src/table/state_reconciler/interface.rs +++ b/querier/src/table/state_reconciler/interface.rs @@ -1,7 +1,7 @@ //! Interface for reconciling Ingester and catalog state use crate::{ingester::IngesterPartition, parquet::QuerierParquetChunk}; -use data_types::{CompactionLevel, ParquetFile, PartitionId, SequenceNumber, ShardId}; +use data_types::{CompactionLevel, ParquetFile, PartitionId, SequenceNumber}; use std::{ops::Deref, sync::Arc}; /// Information about an ingester partition. @@ -9,7 +9,6 @@ use std::{ops::Deref, sync::Arc}; /// This is mostly the same as [`IngesterPartition`] but allows easier mocking. pub trait IngesterPartitionInfo { fn partition_id(&self) -> PartitionId; - fn shard_id(&self) -> ShardId; fn parquet_max_sequence_number(&self) -> Option; } @@ -18,10 +17,6 @@ impl IngesterPartitionInfo for IngesterPartition { self.deref().partition_id() } - fn shard_id(&self) -> ShardId { - self.deref().shard_id() - } - fn parquet_max_sequence_number(&self) -> Option { self.deref().parquet_max_sequence_number() } @@ -35,10 +30,6 @@ where self.deref().partition_id() } - fn shard_id(&self) -> ShardId { - self.deref().shard_id() - } - fn parquet_max_sequence_number(&self) -> Option { self.deref().parquet_max_sequence_number() } diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs index b69e114b92..ecf7f40873 100644 --- a/querier/src/table/test_util.rs +++ b/querier/src/table/test_util.rs @@ -6,7 +6,7 @@ use crate::{ use arrow::record_batch::RecordBatch; use data_types::{ChunkId, SequenceNumber}; use iox_catalog::interface::{get_schema_by_name, SoftDeletedRows}; -use iox_tests::{TestCatalog, TestPartition, TestShard, TestTable}; +use iox_tests::{TestCatalog, TestPartition, TestTable}; use mutable_batch_lp::test_helpers::lp_to_mutable_batch; use schema::{sort::SortKey, Projection, Schema}; use std::{sync::Arc, time::Duration}; @@ -64,7 +64,6 @@ pub(crate) fn lp_to_record_batch(lp: &str) -> RecordBatch { #[derive(Debug, Clone)] pub(crate) struct IngesterPartitionBuilder { schema: Schema, - shard: Arc, partition: Arc, ingester_chunk_id: u128, @@ -75,14 +74,9 @@ pub(crate) struct IngesterPartitionBuilder { } impl IngesterPartitionBuilder { - pub(crate) fn new( - schema: Schema, - shard: &Arc, - partition: &Arc, - ) -> Self { + pub(crate) fn new(schema: Schema, partition: &Arc) -> Self { Self { schema, - shard: Arc::clone(shard), partition: Arc::clone(partition), partition_sort_key: None, ingester_chunk_id: 1, @@ -115,7 +109,6 @@ impl IngesterPartitionBuilder { IngesterPartition::new( Some(Uuid::new_v4()), self.partition.partition.id, - self.shard.shard.id, 0, parquet_max_sequence_number, self.partition_sort_key.clone(), diff --git a/service_grpc_catalog/src/lib.rs b/service_grpc_catalog/src/lib.rs index 350fad4d30..e19cba4762 100644 --- a/service_grpc_catalog/src/lib.rs +++ b/service_grpc_catalog/src/lib.rs @@ -200,7 +200,6 @@ mod tests { use super::*; use data_types::{ ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp, - TRANSITION_SHARD_INDEX, }; use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService; use iox_catalog::mem::MemCatalog; @@ -222,11 +221,6 @@ mod tests { .create_or_get("iox-shared") .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, TRANSITION_SHARD_INDEX) - .await - .unwrap(); let namespace = repos .namespaces() .create("catalog_partition_test", None, topic.id, pool.id) @@ -239,11 +233,10 @@ mod tests { .unwrap(); let partition = repos .partitions() - .create_or_get("foo".into(), shard.id, table.id) + .create_or_get("foo".into(), table.id) .await .unwrap(); let p1params = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace.id, table_id: table.id, partition_id: partition.id, @@ -299,11 +292,6 @@ mod tests { .create_or_get("iox-shared") .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, TRANSITION_SHARD_INDEX) - .await - .unwrap(); let namespace = repos .namespaces() .create("catalog_partition_test", None, topic.id, pool.id) @@ -316,12 +304,12 @@ mod tests { .unwrap(); partition1 = repos .partitions() - .create_or_get("foo".into(), shard.id, table.id) + .create_or_get("foo".into(), table.id) .await .unwrap(); partition2 = repos .partitions() - .create_or_get("bar".into(), shard.id, table.id) + .create_or_get("bar".into(), table.id) .await .unwrap(); diff --git a/service_grpc_object_store/src/lib.rs b/service_grpc_object_store/src/lib.rs index 89e1a69e34..1abd7f825a 100644 --- a/service_grpc_object_store/src/lib.rs +++ b/service_grpc_object_store/src/lib.rs @@ -70,7 +70,6 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService { let path = ParquetFilePath::new( parquet_file.namespace_id, parquet_file.table_id, - parquet_file.shard_id, parquet_file.partition_id, parquet_file.object_store_id, ); @@ -98,8 +97,7 @@ mod tests { use super::*; use bytes::Bytes; use data_types::{ - ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, ShardIndex, - Timestamp, + ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp, }; use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService; use iox_catalog::mem::MemCatalog; @@ -120,11 +118,6 @@ mod tests { .create_or_get("iox-shared") .await .unwrap(); - let shard = repos - .shards() - .create_or_get(&topic, ShardIndex::new(1)) - .await - .unwrap(); let namespace = repos .namespaces() .create("catalog_partition_test", None, topic.id, pool.id) @@ -137,11 +130,10 @@ mod tests { .unwrap(); let partition = repos .partitions() - .create_or_get("foo".into(), shard.id, table.id) + .create_or_get("foo".into(), table.id) .await .unwrap(); let p1params = ParquetFileParams { - shard_id: shard.id, namespace_id: namespace.id, table_id: table.id, partition_id: partition.id, @@ -166,7 +158,6 @@ mod tests { let path = ParquetFilePath::new( p1.namespace_id, p1.table_id, - p1.shard_id, p1.partition_id, p1.object_store_id, );