fix: Remove ChunkAddr

2022-05-12 15:45:10 -04:00 · 2022-05-12 15:45:10 -04:00 · faba90d992
parent 975dd288d4
commit faba90d992
8 changed files with 30 additions and 137 deletions
--- a/compactor/src/query.rs
+++ b/compactor/src/query.rs
@ -1,8 +1,8 @@
 //! Queryable Compactor Data

 use data_types::{
-    ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary,
-    Timestamp, TimestampMinMax, Tombstone,
+    ChunkId, ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary, Timestamp,
+    TimestampMinMax, Tombstone,
 };
 use datafusion::physical_plan::SendableRecordBatchStream;
 use observability_deps::tracing::trace;
@ -161,11 +161,6 @@ impl QueryChunk for QueryableParquetChunk {
        ChunkId::new_id_for_ng(timestamp_nano_u128)
    }

-    // This function should not be used in this context
-    fn addr(&self) -> ChunkAddr {
-        unimplemented!()
-    }
-
    /// Returns the name of the table stored in this chunk
    fn table_name(&self) -> &str {
        &self.table_name
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@ -893,35 +893,6 @@ pub struct ProcessedTombstone {
    pub parquet_file_id: ParquetFileId,
 }

-/// Address of the chunk within the catalog
-#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
-pub struct ChunkAddr {
-    /// Database name
-    pub db_name: Arc<str>,
-
-    /// What table does the chunk belong to?
-    pub table_name: Arc<str>,
-
-    /// What partition does the chunk belong to?
-    pub partition_key: Arc<str>,
-
-    /// The ID of the chunk
-    pub chunk_id: ChunkId,
-}
-
-impl std::fmt::Display for ChunkAddr {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "Chunk('{}':'{}':'{}':{})",
-            self.db_name,
-            self.table_name,
-            self.partition_key,
-            self.chunk_id.get()
-        )
-    }
-}
-
 /// ID of a chunk.
 ///
 /// This ID is unique within a single partition.
--- a/ingester/src/query.rs
+++ b/ingester/src/query.rs
@ -4,7 +4,7 @@ use crate::data::{QueryableBatch, SnapshotBatch};
 use arrow::record_batch::RecordBatch;
 use arrow_util::util::merge_record_batches;
 use data_types::{
-    ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary,
+    ChunkId, ChunkOrder, DeletePredicate, PartitionId, SequenceNumber, TableSummary,
    TimestampMinMax, Tombstone,
 };
 use datafusion::{
@ -151,11 +151,6 @@ impl QueryChunk for QueryableBatch {
        ChunkId::new_test(0)
    }

-    // This function should not be used in PersistingBatch context
-    fn addr(&self) -> ChunkAddr {
-        unimplemented!()
-    }
-
    /// Returns the name of the table stored in this chunk
    fn table_name(&self) -> &str {
        &self.table_name
--- a/querier/src/chunk/mod.rs
+++ b/querier/src/chunk/mod.rs
@ -3,8 +3,8 @@
 use crate::cache::CatalogCache;
 use arrow::record_batch::RecordBatch;
 use data_types::{
-    ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, ParquetFile, ParquetFileId,
-    ParquetFileWithMetadata, PartitionId, SequenceNumber, SequencerId, TimestampMinMax,
+    ChunkId, ChunkOrder, DeletePredicate, ParquetFileId, ParquetFileWithMetadata, PartitionId,
+    SequenceNumber, SequencerId, TimestampMinMax,
 };
 use futures::StreamExt;
 use iox_catalog::interface::Catalog;
@ -23,8 +23,11 @@ mod query_access;
 /// Immutable metadata attached to a [`QuerierChunk`].
 #[derive(Debug)]
 pub struct ChunkMeta {
-    /// Chunk address.
-    addr: ChunkAddr,
+    /// The ID of the chunk
+    chunk_id: ChunkId,
+
+    /// Table name
+    table_name: Arc<str>,

    /// Chunk order.
    order: ChunkOrder,
@ -49,11 +52,6 @@ pub struct ChunkMeta {
 }

 impl ChunkMeta {
-    /// Chunk address.
-    pub fn addr(&self) -> &ChunkAddr {
-        &self.addr
-    }
-
    /// Chunk order.
    pub fn order(&self) -> ChunkOrder {
        self.order
@ -234,10 +232,13 @@ impl ParquetChunkAdapter {
        parquet_file_with_metadata: ParquetFileWithMetadata,
    ) -> Option<QuerierChunk> {
        let decoded_parquet_file = DecodedParquetFile::new(parquet_file_with_metadata);
+        let parquet_file = decoded_parquet_file.parquet_file;
        let chunk = Arc::new(self.new_parquet_chunk(&decoded_parquet_file).await?);
-
-        let addr = self
-            .old_gen_chunk_addr(&decoded_parquet_file.parquet_file)
+        let chunk_id = ChunkId::from(Uuid::from_u128(parquet_file.id.get() as _));
+        let table_name = self
+            .catalog_cache
+            .table()
+            .name(parquet_file.table_id)
            .await?;

        let iox_metadata = &decoded_parquet_file.iox_metadata;
@ -255,56 +256,18 @@ impl ParquetChunkAdapter {
            .await;

        let meta = Arc::new(ChunkMeta {
-            addr,
+            chunk_id,
+            table_name,
            order,
            sort_key: iox_metadata.sort_key.clone(),
            partition_sort_key,
            sequencer_id: iox_metadata.sequencer_id,
            partition_id: iox_metadata.partition_id,
-            min_sequence_number: decoded_parquet_file.parquet_file.min_sequence_number,
-            max_sequence_number: decoded_parquet_file.parquet_file.max_sequence_number,
+            min_sequence_number: parquet_file.min_sequence_number,
+            max_sequence_number: parquet_file.max_sequence_number,
        });

-        Some(QuerierChunk::new_parquet(
-            decoded_parquet_file.parquet_file.id,
-            chunk,
-            meta,
-        ))
-    }
-
-    /// Get chunk addr for old gen.
-    ///
-    /// Mapping of NG->old:
-    /// - `namespace.name -> db_name`
-    /// - `table.name -> table_name`
-    /// - `sequencer.id X partition.name -> partition_key`
-    /// - `parquet_file.id -> chunk_id`
-    ///
-    /// Returns `None` if some data required to create this chunk is already gone from the catalog.
-    pub async fn old_gen_chunk_addr(&self, parquet_file: &ParquetFile) -> Option<ChunkAddr> {
-        Some(ChunkAddr {
-            db_name: self
-                .catalog_cache
-                .namespace()
-                .name(
-                    self.catalog_cache
-                        .table()
-                        .namespace_id(parquet_file.table_id)
-                        .await?,
-                )
-                .await?,
-            table_name: self
-                .catalog_cache
-                .table()
-                .name(parquet_file.table_id)
-                .await?,
-            partition_key: self
-                .catalog_cache
-                .partition()
-                .old_gen_partition_key(parquet_file.partition_id)
-                .await,
-            chunk_id: ChunkId::from(Uuid::from_u128(parquet_file.id.get() as _)),
-        })
+        Some(QuerierChunk::new_parquet(parquet_file.id, chunk, meta))
    }
 }

@ -368,12 +331,6 @@ pub mod tests {
        // create chunk
        let chunk = adapter.new_querier_chunk(parquet_file).await.unwrap();

-        // check chunk addr
-        assert_eq!(
-            chunk.meta().addr().to_string(),
-            "Chunk('ns':'table':'1-part':00000000-0000-0000-0000-000000000001)",
-        );
-
        // check chunk schema
        let expected_schema = SchemaBuilder::new()
            .field("field_int", DataType::Int64)
--- a/querier/src/chunk/query_access.rs
+++ b/querier/src/chunk/query_access.rs
@ -1,6 +1,6 @@
 use crate::chunk::{ChunkStorage, QuerierChunk};
 use data_types::{
-    ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary, TimestampMinMax,
+    ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary, TimestampMinMax,
 };
 use observability_deps::tracing::debug;
 use predicate::PredicateMatch;
@ -54,15 +54,11 @@ impl QueryChunkMeta for QuerierChunk {

 impl QueryChunk for QuerierChunk {
    fn id(&self) -> ChunkId {
-        self.meta().addr().chunk_id
-    }
-
-    fn addr(&self) -> ChunkAddr {
-        self.meta().addr().clone()
+        self.meta().chunk_id
    }

    fn table_name(&self) -> &str {
-        self.meta().addr().table_name.as_ref()
+        self.meta().table_name.as_ref()
    }

    fn may_contain_pk_duplicates(&self) -> bool {
--- a/querier/src/ingester/mod.rs
+++ b/querier/src/ingester/mod.rs
@ -7,8 +7,8 @@ use arrow::{datatypes::DataType, error::ArrowError, record_batch::RecordBatch};
 use async_trait::async_trait;
 use client_util::connection;
 use data_types::{
-    ChunkAddr, ChunkId, ChunkOrder, ColumnSummary, InfluxDbType, PartitionId, SequenceNumber,
-    SequencerId, StatValues, Statistics, TableSummary, TimestampMinMax,
+    ChunkId, ChunkOrder, ColumnSummary, InfluxDbType, PartitionId, SequenceNumber, SequencerId,
+    StatValues, Statistics, TableSummary, TimestampMinMax,
 };
 use datafusion_util::MemoryStream;
 use futures::{stream::FuturesUnordered, TryStreamExt};
@ -414,6 +414,7 @@ async fn execute_get_write_infos(
 #[derive(Debug, Clone)]
 pub struct IngesterPartition {
    chunk_id: ChunkId,
+    #[allow(dead_code)]
    namespace_name: Arc<str>,
    table_name: Arc<str>,
    partition_id: PartitionId,
@ -538,15 +539,6 @@ impl QueryChunk for IngesterPartition {
        self.chunk_id
    }

-    fn addr(&self) -> data_types::ChunkAddr {
-        ChunkAddr {
-            db_name: Arc::clone(&self.namespace_name),
-            table_name: Arc::clone(&self.table_name),
-            partition_key: Arc::clone(&self.old_gen_partition_key),
-            chunk_id: self.chunk_id,
-        }
-    }
-
    fn table_name(&self) -> &str {
        self.table_name.as_ref()
    }
--- a/query/src/lib.rs
+++ b/query/src/lib.rs
@ -10,8 +10,7 @@

 use async_trait::async_trait;
 use data_types::{
-    ChunkAddr, ChunkId, ChunkOrder, DeletePredicate, InfluxDbType, PartitionId, TableSummary,
-    TimestampMinMax,
+    ChunkId, ChunkOrder, DeletePredicate, InfluxDbType, PartitionId, TableSummary, TimestampMinMax,
 };
 use datafusion::physical_plan::SendableRecordBatchStream;
 use exec::{stringset::StringSet, IOxSessionContext};
@ -181,9 +180,6 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
    /// particular partition.
    fn id(&self) -> ChunkId;

-    /// Returns the ChunkAddr of this chunk
-    fn addr(&self) -> ChunkAddr;
-
    /// Returns the name of the table stored in this chunk
    fn table_name(&self) -> &str;

--- a/query/src/test.rs
+++ b/query/src/test.rs
@ -20,8 +20,8 @@ use arrow::{
 };
 use async_trait::async_trait;
 use data_types::{
-    ChunkAddr, ChunkId, ChunkOrder, ColumnSummary, DeletePredicate, InfluxDbType, PartitionId,
-    StatValues, Statistics, TableSummary, TimestampMinMax,
+    ChunkId, ChunkOrder, ColumnSummary, DeletePredicate, InfluxDbType, PartitionId, StatValues,
+    Statistics, TableSummary, TimestampMinMax,
 };
 use datafusion::physical_plan::SendableRecordBatchStream;
 use datafusion_util::stream_from_batches;
@ -906,15 +906,6 @@ impl QueryChunk for TestChunk {
        self.id
    }

-    fn addr(&self) -> ChunkAddr {
-        ChunkAddr {
-            db_name: Arc::from("TestChunkDb"),
-            table_name: Arc::from(self.table_name.as_str()),
-            partition_key: Arc::from("TestChunkPartitionKey"),
-            chunk_id: self.id,
-        }
-    }
-
    fn table_name(&self) -> &str {
        &self.table_name
    }