Merge pull request #1997 from influxdata/cn/alt-table-summary
feat: Make a TableSummaryAndTimes type for incremental replacement of TableSummarypull/24376/head
commit
cedd6269c7
|
@ -1,6 +1,7 @@
|
|||
//! This module contains structs that describe the metadata for a partition
|
||||
//! including schema, summary statistics, and file locations in storage.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
borrow::{Borrow, Cow},
|
||||
|
@ -54,6 +55,35 @@ impl FromIterator<Self> for TableSummary {
|
|||
}
|
||||
}
|
||||
|
||||
/// Temporary transition struct that has times of first/last write. Will eventually replace
|
||||
/// TableSummary entirely.
|
||||
#[derive(Debug)]
|
||||
pub struct TableSummaryAndTimes {
|
||||
/// Table name
|
||||
pub name: String,
|
||||
|
||||
/// Per column statistics
|
||||
pub columns: Vec<ColumnSummary>,
|
||||
|
||||
/// Time at which the first data was written into this table. Note
|
||||
/// this is not the same as the timestamps on the data itself
|
||||
pub time_of_first_write: DateTime<Utc>,
|
||||
|
||||
/// Most recent time at which data write was initiated into this
|
||||
/// chunk. Note this is not the same as the timestamps on the data
|
||||
/// itself
|
||||
pub time_of_last_write: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl From<TableSummaryAndTimes> for TableSummary {
|
||||
fn from(other: TableSummaryAndTimes) -> Self {
|
||||
Self {
|
||||
name: other.name,
|
||||
columns: other.columns,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata and statistics information for a table. This can be
|
||||
/// either for the portion of a Table stored within a single chunk or
|
||||
/// aggregated across chunks.
|
||||
|
|
|
@ -4,7 +4,7 @@ use crate::{
|
|||
};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use chrono::{DateTime, Utc};
|
||||
use data_types::partition_metadata::{ColumnSummary, InfluxDbType, TableSummary};
|
||||
use data_types::partition_metadata::{ColumnSummary, InfluxDbType, TableSummaryAndTimes};
|
||||
use entry::{Sequence, TableBatch};
|
||||
use hashbrown::HashMap;
|
||||
use internal_types::{
|
||||
|
@ -237,7 +237,7 @@ impl MBChunk {
|
|||
}
|
||||
|
||||
/// Returns a vec of the summary statistics of the tables in this chunk
|
||||
pub fn table_summary(&self) -> TableSummary {
|
||||
pub fn table_summary(&self) -> TableSummaryAndTimes {
|
||||
let mut columns: Vec<_> = self
|
||||
.columns
|
||||
.iter()
|
||||
|
@ -255,9 +255,11 @@ impl MBChunk {
|
|||
|
||||
columns.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
TableSummary {
|
||||
TableSummaryAndTimes {
|
||||
name: self.table_name.to_string(),
|
||||
columns,
|
||||
time_of_first_write: self.time_of_first_write,
|
||||
time_of_last_write: self.time_of_last_write,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -521,55 +523,50 @@ mod tests {
|
|||
assert!(chunk.time_of_first_write < after_write);
|
||||
|
||||
let summary = chunk.table_summary();
|
||||
|
||||
assert_eq!(
|
||||
summary,
|
||||
TableSummary {
|
||||
name: "cpu".to_string(),
|
||||
columns: vec![
|
||||
ColumnSummary {
|
||||
name: "env".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Tag),
|
||||
stats: Statistics::String(StatValues {
|
||||
min: Some("prod".to_string()),
|
||||
max: Some("stage".to_string()),
|
||||
count: 3,
|
||||
distinct_count: Some(NonZeroU64::new(3).unwrap())
|
||||
})
|
||||
},
|
||||
ColumnSummary {
|
||||
name: "host".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Tag),
|
||||
stats: Statistics::String(StatValues {
|
||||
min: Some("a".to_string()),
|
||||
max: Some("c".to_string()),
|
||||
count: 4,
|
||||
distinct_count: Some(NonZeroU64::new(3).unwrap())
|
||||
})
|
||||
},
|
||||
ColumnSummary {
|
||||
name: "time".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Timestamp),
|
||||
stats: Statistics::I64(StatValues {
|
||||
min: Some(1),
|
||||
max: Some(2),
|
||||
count: 4,
|
||||
distinct_count: None
|
||||
})
|
||||
},
|
||||
ColumnSummary {
|
||||
name: "val".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Field),
|
||||
stats: Statistics::F64(StatValues {
|
||||
min: Some(2.),
|
||||
max: Some(23.),
|
||||
count: 4,
|
||||
distinct_count: None
|
||||
})
|
||||
},
|
||||
]
|
||||
}
|
||||
)
|
||||
assert_eq!(summary.name, "cpu");
|
||||
let expected_column_summaries = vec![
|
||||
ColumnSummary {
|
||||
name: "env".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Tag),
|
||||
stats: Statistics::String(StatValues {
|
||||
min: Some("prod".to_string()),
|
||||
max: Some("stage".to_string()),
|
||||
count: 3,
|
||||
distinct_count: Some(NonZeroU64::new(3).unwrap()),
|
||||
}),
|
||||
},
|
||||
ColumnSummary {
|
||||
name: "host".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Tag),
|
||||
stats: Statistics::String(StatValues {
|
||||
min: Some("a".to_string()),
|
||||
max: Some("c".to_string()),
|
||||
count: 4,
|
||||
distinct_count: Some(NonZeroU64::new(3).unwrap()),
|
||||
}),
|
||||
},
|
||||
ColumnSummary {
|
||||
name: "time".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Timestamp),
|
||||
stats: Statistics::I64(StatValues {
|
||||
min: Some(1),
|
||||
max: Some(2),
|
||||
count: 4,
|
||||
distinct_count: None,
|
||||
}),
|
||||
},
|
||||
ColumnSummary {
|
||||
name: "val".to_string(),
|
||||
influxdb_type: Some(InfluxDbType::Field),
|
||||
stats: Statistics::F64(StatValues {
|
||||
min: Some(2.),
|
||||
max: Some(23.),
|
||||
count: 4,
|
||||
distinct_count: None,
|
||||
}),
|
||||
},
|
||||
];
|
||||
assert_eq!(summary.columns, expected_column_summaries);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -238,31 +238,32 @@ impl ChunkMetrics {
|
|||
impl CatalogChunk {
|
||||
/// Creates a new open chunk from the provided MUB chunk.
|
||||
///
|
||||
/// Panics if the provided chunk is empty, otherwise creates a new open chunk and records a
|
||||
/// write at the current time.
|
||||
/// Panics if the provided chunk is empty, otherwise creates a new open chunk.
|
||||
pub(super) fn new_open(
|
||||
addr: ChunkAddr,
|
||||
chunk: mutable_buffer::chunk::MBChunk,
|
||||
metrics: ChunkMetrics,
|
||||
) -> Self {
|
||||
assert_eq!(chunk.table_name(), &addr.table_name);
|
||||
|
||||
let first_write = chunk.table_summary().time_of_first_write;
|
||||
let last_write = chunk.table_summary().time_of_last_write;
|
||||
|
||||
let stage = ChunkStage::Open { mb_chunk: chunk };
|
||||
|
||||
metrics
|
||||
.state
|
||||
.inc_with_labels(&[KeyValue::new("state", "open")]);
|
||||
|
||||
let mut chunk = Self {
|
||||
Self {
|
||||
addr,
|
||||
stage,
|
||||
lifecycle_action: None,
|
||||
metrics,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_of_first_write: Some(first_write),
|
||||
time_of_last_write: Some(last_write),
|
||||
time_closed: None,
|
||||
};
|
||||
chunk.record_write();
|
||||
chunk
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new RUB chunk from the provided RUB chunk and metadata
|
||||
|
@ -475,7 +476,7 @@ impl CatalogChunk {
|
|||
match &self.stage {
|
||||
ChunkStage::Open { mb_chunk, .. } => {
|
||||
// The stats for open chunks change so can't be cached
|
||||
Arc::new(mb_chunk.table_summary())
|
||||
Arc::new(mb_chunk.table_summary().into())
|
||||
}
|
||||
ChunkStage::Frozen { meta, .. } => Arc::clone(&meta.table_summary),
|
||||
ChunkStage::Persisted { meta, .. } => Arc::clone(&meta.table_summary),
|
||||
|
@ -533,7 +534,7 @@ impl CatalogChunk {
|
|||
|
||||
// Cache table summary + schema
|
||||
let metadata = ChunkMetadata {
|
||||
table_summary: Arc::new(mb_chunk.table_summary()),
|
||||
table_summary: Arc::new(mb_chunk.table_summary().into()),
|
||||
schema: s.full_schema(),
|
||||
};
|
||||
|
||||
|
@ -836,6 +837,8 @@ mod tests {
|
|||
let mb_chunk = make_mb_chunk(&addr.table_name, sequencer_id);
|
||||
let chunk = CatalogChunk::new_open(addr, mb_chunk, ChunkMetrics::new_unregistered());
|
||||
assert!(matches!(chunk.stage(), &ChunkStage::Open { .. }));
|
||||
assert!(chunk.time_of_first_write.is_some());
|
||||
assert!(chunk.time_of_last_write.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
@ -109,7 +109,7 @@ impl DbChunk {
|
|||
chunk: Arc::clone(&snapshot),
|
||||
};
|
||||
let meta = ChunkMetadata {
|
||||
table_summary: Arc::new(mb_chunk.table_summary()),
|
||||
table_summary: Arc::new(mb_chunk.table_summary().into()),
|
||||
schema: snapshot.full_schema(),
|
||||
};
|
||||
(state, Arc::new(meta))
|
||||
|
|
Loading…
Reference in New Issue