fix: rename `estimated_bytes` to `memory_bytes` and expose `object_store_bytes` in ChunkSummary and system.chunks (#2017)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Andrew Lamb 2021-07-15 12:00:24 -04:00 committed by GitHub
parent 5a9c73a4dd
commit 3fd6430fb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 207 additions and 132 deletions

View File

@ -126,8 +126,11 @@ pub struct ChunkSummary {
/// Is there any outstanding lifecycle action for this chunk?
pub lifecycle_action: Option<ChunkLifecycleAction>,
/// The total estimated size of this chunk, in bytes
pub estimated_bytes: usize,
/// The number of bytes used to store this chunk in memory
pub memory_bytes: usize,
/// The number of bytes used to store this chunk in object storage
pub object_store_bytes: usize,
/// The total number of rows in this chunk
pub row_count: usize,
@ -153,7 +156,7 @@ pub struct ChunkColumnSummary {
pub name: Arc<str>,
/// Estimated size, in bytes, consumed by this column.
pub estimated_bytes: usize,
pub memory_bytes: usize,
}
/// Contains additional per-column details about physical storage of a chunk
@ -168,13 +171,15 @@ pub struct DetailedChunkSummary {
impl ChunkSummary {
/// Construct a ChunkSummary that has None for all timestamps
#[allow(clippy::too_many_arguments)]
pub fn new_without_timestamps(
partition_key: Arc<str>,
table_name: Arc<str>,
id: u32,
storage: ChunkStorage,
lifecycle_action: Option<ChunkLifecycleAction>,
estimated_bytes: usize,
memory_bytes: usize,
object_store_bytes: usize,
row_count: usize,
) -> Self {
Self {
@ -183,11 +188,47 @@ impl ChunkSummary {
id,
storage,
lifecycle_action,
estimated_bytes,
memory_bytes,
object_store_bytes,
row_count,
time_of_first_write: None,
time_of_last_write: None,
time_closed: None,
}
}
/// Return a new ChunkSummary with None for all timestamps
pub fn normalize(self) -> Self {
let ChunkSummary {
partition_key,
table_name,
id,
storage,
lifecycle_action,
memory_bytes,
object_store_bytes,
row_count,
..
} = self;
Self::new_without_timestamps(
partition_key,
table_name,
id,
storage,
lifecycle_action,
memory_bytes,
object_store_bytes,
row_count,
)
}
/// Normalizes a set of ChunkSummaries for comparison by removing timestamps
pub fn normalize_summaries(summaries: Vec<Self>) -> Vec<Self> {
let mut summaries = summaries
.into_iter()
.map(|summary| summary.normalize())
.collect::<Vec<_>>();
summaries.sort_unstable();
summaries
}
}

View File

@ -86,7 +86,7 @@ OBSERVER;
Example SQL to show the total estimated storage size by database:
SELECT database_name, storage, count(*) as num_chunks,
sum(estimated_bytes)/1024/1024 as estimated_mb
sum(memory_bytes)/1024/1024 as estimated_mb
FROM chunks
GROUP BY database_name, storage
ORDER BY estimated_mb desc;
@ -164,7 +164,7 @@ Here are some interesting reports you can run when in `OBSERVER` mode:
```sql
SELECT
database_name, count(*) as num_chunks,
sum(estimated_bytes)/1024/1024 as estimated_mb
sum(memory_bytes)/1024/1024 as estimated_mb
FROM chunks
GROUP BY database_name
ORDER BY estimated_mb desc
@ -175,7 +175,7 @@ LIMIT 20;
```sql
SELECT
database_name, storage, count(*) as num_chunks,
sum(estimated_bytes)/1024/1024 as estimated_mb
sum(memory_bytes)/1024/1024 as estimated_mb
FROM chunks
GROUP BY database_name, storage
ORDER BY estimated_mb desc
@ -187,7 +187,7 @@ LIMIT 20;
```sql
SELECT
database_name, table_name, storage, count(*) as num_chunks,
sum(estimated_bytes)/1024/1024 as estimated_mb
sum(memory_bytes)/1024/1024 as estimated_mb
FROM chunks
GROUP BY database_name, table_name, storage
ORDER BY estimated_mb desc
@ -217,7 +217,7 @@ LIMIT 20;
### Time range stored per table
This query provides an estimate, by table, of how long of a time range
This query provides an estimate, by table, of how long of a time range
and the estimated number of rows per second it holds in IOx
(the `1,000,000,000` is the conversion from nanoseconds)

View File

@ -62,8 +62,11 @@ message Chunk {
// Is there any outstanding lifecycle action for this chunk?
ChunkLifecycleAction lifecycle_action = 10;
// The total estimated size of this chunk, in bytes
uint64 estimated_bytes = 4;
// The number of bytes used to store this chunk in memory
uint64 memory_bytes = 4;
// The number of bytes used to store this chunk in object storage
uint64 object_store_bytes = 11;
// The number of rows in this chunk
uint64 row_count = 9;

View File

@ -13,7 +13,8 @@ impl From<ChunkSummary> for management::Chunk {
id,
storage,
lifecycle_action,
estimated_bytes,
memory_bytes,
object_store_bytes,
row_count,
time_of_first_write,
time_of_last_write,
@ -25,7 +26,8 @@ impl From<ChunkSummary> for management::Chunk {
let lifecycle_action: management::ChunkLifecycleAction = lifecycle_action.into();
let lifecycle_action = lifecycle_action.into(); // convert to i32
let estimated_bytes = estimated_bytes as u64;
let memory_bytes = memory_bytes as u64;
let object_store_bytes = object_store_bytes as u64;
let row_count = row_count as u64;
let partition_key = partition_key.to_string();
@ -41,7 +43,8 @@ impl From<ChunkSummary> for management::Chunk {
id,
storage,
lifecycle_action,
estimated_bytes,
memory_bytes,
object_store_bytes,
row_count,
time_of_first_write,
time_of_last_write,
@ -114,12 +117,14 @@ impl TryFrom<management::Chunk> for ChunkSummary {
partition_key,
table_name,
id,
estimated_bytes,
memory_bytes,
object_store_bytes,
row_count,
..
} = proto;
let estimated_bytes = estimated_bytes as usize;
let memory_bytes = memory_bytes as usize;
let object_store_bytes = object_store_bytes as usize;
let row_count = row_count as usize;
let partition_key = Arc::from(partition_key.as_str());
let table_name = Arc::from(table_name.as_str());
@ -130,7 +135,8 @@ impl TryFrom<management::Chunk> for ChunkSummary {
id,
storage,
lifecycle_action,
estimated_bytes,
memory_bytes,
object_store_bytes,
row_count,
time_of_first_write,
time_of_last_write,
@ -184,7 +190,8 @@ mod test {
partition_key: "foo".to_string(),
table_name: "bar".to_string(),
id: 42,
estimated_bytes: 1234,
memory_bytes: 1234,
object_store_bytes: 567,
row_count: 321,
storage: management::ChunkStorage::ObjectStoreOnly.into(),
lifecycle_action: management::ChunkLifecycleAction::Moving.into(),
@ -198,7 +205,8 @@ mod test {
partition_key: Arc::from("foo"),
table_name: Arc::from("bar"),
id: 42,
estimated_bytes: 1234,
memory_bytes: 1234,
object_store_bytes: 567,
row_count: 321,
storage: ChunkStorage::ObjectStoreOnly,
lifecycle_action: Some(ChunkLifecycleAction::Moving),
@ -220,7 +228,8 @@ mod test {
partition_key: Arc::from("foo"),
table_name: Arc::from("bar"),
id: 42,
estimated_bytes: 1234,
memory_bytes: 1234,
object_store_bytes: 567,
row_count: 321,
storage: ChunkStorage::ObjectStoreOnly,
lifecycle_action: Some(ChunkLifecycleAction::Persisting),
@ -235,7 +244,8 @@ mod test {
partition_key: "foo".to_string(),
table_name: "bar".to_string(),
id: 42,
estimated_bytes: 1234,
memory_bytes: 1234,
object_store_bytes: 567,
row_count: 321,
storage: management::ChunkStorage::ObjectStoreOnly.into(),
lifecycle_action: management::ChunkLifecycleAction::Persisting.into(),

View File

@ -265,16 +265,16 @@ async fn sql_select_from_system_chunks() {
// test timestamps, etc)
let expected = vec![
"+----+---------------+------------+-------------------+-----------------+-----------+",
"| id | partition_key | table_name | storage | estimated_bytes | row_count |",
"+----+---------------+------------+-------------------+-----------------+-----------+",
"| 0 | 1970-01-01T00 | h2o | OpenMutableBuffer | 213 | 3 |",
"| 0 | 1970-01-01T00 | o2 | OpenMutableBuffer | 177 | 2 |",
"+----+---------------+------------+-------------------+-----------------+-----------+",
"+----+---------------+------------+-------------------+--------------+-----------+",
"| id | partition_key | table_name | storage | memory_bytes | row_count |",
"+----+---------------+------------+-------------------+--------------+-----------+",
"| 0 | 1970-01-01T00 | h2o | OpenMutableBuffer | 213 | 3 |",
"| 0 | 1970-01-01T00 | o2 | OpenMutableBuffer | 177 | 2 |",
"+----+---------------+------------+-------------------+--------------+-----------+",
];
run_sql_test_case!(
TwoMeasurementsManyFieldsOneChunk {},
"SELECT id, partition_key, table_name, storage, estimated_bytes, row_count from system.chunks",
"SELECT id, partition_key, table_name, storage, memory_bytes, row_count from system.chunks",
&expected
);
}
@ -316,24 +316,24 @@ async fn sql_select_from_system_chunk_columns() {
// with different chunk configurations.
let expected = vec![
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | estimated_bytes |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
"| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | Boston | Boston | 252 |",
"| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
"| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | MA | MA | 240 |",
"| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
"| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 50 | 250 | 51 |",
"| 1970-01-01T00 | 0 | o2 | city | OpenMutableBuffer | 1 | Boston | Boston | 35 |",
"| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |",
"| 1970-01-01T00 | 0 | o2 | state | OpenMutableBuffer | 2 | CA | MA | 41 |",
"| 1970-01-01T00 | 0 | o2 | temp | OpenMutableBuffer | 2 | 53.4 | 79 | 25 |",
"| 1970-01-01T00 | 0 | o2 | time | OpenMutableBuffer | 2 | 50 | 300 | 25 |",
"| 1970-01-01T00 | 1 | h2o | city | OpenMutableBuffer | 1 | Boston | Boston | 31 |",
"| 1970-01-01T00 | 1 | h2o | other_temp | OpenMutableBuffer | 1 | 72.4 | 72.4 | 17 |",
"| 1970-01-01T00 | 1 | h2o | state | OpenMutableBuffer | 1 | CA | CA | 27 |",
"| 1970-01-01T00 | 1 | h2o | time | OpenMutableBuffer | 1 | 350 | 350 | 17 |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | memory_bytes |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
"| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | Boston | Boston | 252 |",
"| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
"| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | MA | MA | 240 |",
"| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
"| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 50 | 250 | 51 |",
"| 1970-01-01T00 | 0 | o2 | city | OpenMutableBuffer | 1 | Boston | Boston | 35 |",
"| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |",
"| 1970-01-01T00 | 0 | o2 | state | OpenMutableBuffer | 2 | CA | MA | 41 |",
"| 1970-01-01T00 | 0 | o2 | temp | OpenMutableBuffer | 2 | 53.4 | 79 | 25 |",
"| 1970-01-01T00 | 0 | o2 | time | OpenMutableBuffer | 2 | 50 | 300 | 25 |",
"| 1970-01-01T00 | 1 | h2o | city | OpenMutableBuffer | 1 | Boston | Boston | 31 |",
"| 1970-01-01T00 | 1 | h2o | other_temp | OpenMutableBuffer | 1 | 72.4 | 72.4 | 17 |",
"| 1970-01-01T00 | 1 | h2o | state | OpenMutableBuffer | 1 | CA | CA | 27 |",
"| 1970-01-01T00 | 1 | h2o | time | OpenMutableBuffer | 1 | 350 | 350 | 17 |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
];
run_sql_test_case!(
TwoMeasurementsManyFieldsTwoChunks {},

View File

@ -161,16 +161,16 @@ impl Table {
.iter()
.flat_map(|rg| rg.column_sizes())
// combine statistics for columns across row groups
.fold(BTreeMap::new(), |mut map, (name, estimated_bytes)| {
.fold(BTreeMap::new(), |mut map, (name, memory_bytes)| {
let entry = map.entry(name).or_insert(0);
*entry += estimated_bytes;
*entry += memory_bytes;
map
})
// Now turn into Vec<ChunkColumnSummary>
.into_iter()
.map(|(name, estimated_bytes)| ChunkColumnSummary {
.map(|(name, memory_bytes)| ChunkColumnSummary {
name: name.into(),
estimated_bytes,
memory_bytes,
})
.collect()
}
@ -1116,11 +1116,11 @@ mod test {
let expected = vec![
ChunkColumnSummary {
name: "count".into(),
estimated_bytes: 110,
memory_bytes: 110,
},
ChunkColumnSummary {
name: "time".into(),
estimated_bytes: 107,
memory_bytes: 107,
},
];
assert_eq!(table.column_sizes(), expected);

View File

@ -2262,36 +2262,6 @@ mod tests {
assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![1]);
}
/// Normalizes a set of ChunkSummaries for comparison by removing timestamps
fn normalize_summaries(summaries: Vec<ChunkSummary>) -> Vec<ChunkSummary> {
let mut summaries = summaries
.into_iter()
.map(|summary| {
let ChunkSummary {
partition_key,
table_name,
id,
storage,
lifecycle_action,
estimated_bytes,
row_count,
..
} = summary;
ChunkSummary::new_without_timestamps(
partition_key,
table_name,
id,
storage,
lifecycle_action,
estimated_bytes,
row_count,
)
})
.collect::<Vec<_>>();
summaries.sort_unstable();
summaries
}
#[tokio::test]
async fn partition_chunk_summaries() {
// Test that chunk id listing is hooked up
@ -2306,7 +2276,7 @@ mod tests {
print!("Partitions: {:?}", db.partition_keys().unwrap());
let chunk_summaries = db.partition_chunk_summaries("1970-01-05T15");
let chunk_summaries = normalize_summaries(chunk_summaries);
let chunk_summaries = ChunkSummary::normalize_summaries(chunk_summaries);
let expected = vec![ChunkSummary::new_without_timestamps(
Arc::from("1970-01-05T15"),
@ -2314,7 +2284,8 @@ mod tests {
0,
ChunkStorage::OpenMutableBuffer,
None,
70,
70, // memory_size
0, // os_size
1,
)];
@ -2322,7 +2293,7 @@ mod tests {
.chunk_summaries()
.unwrap()
.into_iter()
.map(|x| x.estimated_bytes)
.map(|x| x.memory_bytes)
.sum();
assert_eq!(db.catalog.metrics().memory().mutable_buffer(), size);
@ -2412,7 +2383,7 @@ mod tests {
write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000").await;
let chunk_summaries = db.chunk_summaries().expect("expected summary to return");
let chunk_summaries = normalize_summaries(chunk_summaries);
let chunk_summaries = ChunkSummary::normalize_summaries(chunk_summaries);
let lifecycle_action = None;
@ -2424,6 +2395,7 @@ mod tests {
ChunkStorage::ReadBufferAndObjectStore,
lifecycle_action,
2115, // size of RB and OS chunks
1132, // size of parquet file
1,
),
ChunkSummary::new_without_timestamps(
@ -2433,6 +2405,7 @@ mod tests {
ChunkStorage::OpenMutableBuffer,
lifecycle_action,
64,
0, // no OS chunks
1,
),
ChunkSummary::new_without_timestamps(
@ -2442,6 +2415,7 @@ mod tests {
ChunkStorage::ClosedMutableBuffer,
lifecycle_action,
2398,
0, // no OS chunks
1,
),
ChunkSummary::new_without_timestamps(
@ -2451,13 +2425,14 @@ mod tests {
ChunkStorage::OpenMutableBuffer,
lifecycle_action,
87,
0, // no OS chunks
1,
),
];
assert_eq!(
expected, chunk_summaries,
"expected:\n{:#?}\n\nactual:{:#?}\n\n",
"\n\nexpected:\n{:#?}\n\nactual:{:#?}\n\n",
expected, chunk_summaries
);

View File

@ -476,7 +476,8 @@ impl CatalogChunk {
id: self.addr.chunk_id,
storage,
lifecycle_action,
estimated_bytes: self.size(),
memory_bytes: self.memory_bytes(),
object_store_bytes: self.object_store_bytes(),
row_count,
time_of_first_write: self.time_of_first_write,
time_of_last_write: self.time_of_last_write,
@ -491,7 +492,7 @@ impl CatalogChunk {
fn to_summary(v: (&str, usize)) -> ChunkColumnSummary {
ChunkColumnSummary {
name: v.0.into(),
estimated_bytes: v.1,
memory_bytes: v.1,
}
}
@ -529,7 +530,7 @@ impl CatalogChunk {
}
/// Returns an approximation of the amount of process memory consumed by the chunk
pub fn size(&self) -> usize {
pub fn memory_bytes(&self) -> usize {
match &self.stage {
ChunkStage::Open { mb_chunk, .. } => mb_chunk.size(),
ChunkStage::Frozen { representation, .. } => match &representation {
@ -550,6 +551,15 @@ impl CatalogChunk {
}
}
/// Returns the number of bytes of object storage consumed by this chunk
pub fn object_store_bytes(&self) -> usize {
match &self.stage {
ChunkStage::Open { .. } => 0,
ChunkStage::Frozen { .. } => 0,
ChunkStage::Persisted { parquet, .. } => parquet.file_size_bytes(),
}
}
/// Returns a mutable reference to the mutable buffer storage for chunks in the Open state
pub fn mutable_buffer(&mut self) -> Result<&mut MBChunk> {
match &mut self.stage {

View File

@ -197,7 +197,8 @@ fn chunk_summaries_schema() -> SchemaRef {
Field::new("table_name", DataType::Utf8, false),
Field::new("storage", DataType::Utf8, false),
Field::new("lifecycle_action", DataType::Utf8, true),
Field::new("estimated_bytes", DataType::UInt64, false),
Field::new("memory_bytes", DataType::UInt64, false),
Field::new("object_store_bytes", DataType::UInt64, false),
Field::new("row_count", DataType::UInt64, false),
Field::new("time_of_first_write", ts.clone(), true),
Field::new("time_of_last_write", ts.clone(), true),
@ -223,9 +224,13 @@ fn from_chunk_summaries(schema: SchemaRef, chunks: Vec<ChunkSummary>) -> Result<
.iter()
.map(|c| c.lifecycle_action.map(|a| a.name()))
.collect::<StringArray>();
let estimated_bytes = chunks
let memory_bytes = chunks
.iter()
.map(|c| Some(c.estimated_bytes as u64))
.map(|c| Some(c.memory_bytes as u64))
.collect::<UInt64Array>();
let object_store_bytes = chunks
.iter()
.map(|c| Some(c.object_store_bytes as u64).filter(|&v| v > 0))
.collect::<UInt64Array>();
let row_counts = chunks
.iter()
@ -255,7 +260,8 @@ fn from_chunk_summaries(schema: SchemaRef, chunks: Vec<ChunkSummary>) -> Result<
Arc::new(table_name),
Arc::new(storage),
Arc::new(lifecycle_action),
Arc::new(estimated_bytes),
Arc::new(memory_bytes),
Arc::new(object_store_bytes),
Arc::new(row_counts),
Arc::new(time_of_first_write),
Arc::new(time_of_last_write),
@ -380,7 +386,7 @@ fn chunk_columns_schema() -> SchemaRef {
Field::new("row_count", DataType::UInt64, true),
Field::new("min_value", DataType::Utf8, true),
Field::new("max_value", DataType::Utf8, true),
Field::new("estimated_bytes", DataType::UInt64, true),
Field::new("memory_bytes", DataType::UInt64, true),
]))
}
@ -396,7 +402,7 @@ fn assemble_chunk_columns(
.map(|column_summary| {
(
column_summary.name.as_ref(),
column_summary.estimated_bytes as u64,
column_summary.memory_bytes as u64,
)
})
.collect()
@ -413,7 +419,7 @@ fn assemble_chunk_columns(
let mut row_count = UInt64Builder::new(row_estimate);
let mut min_values = StringBuilder::new(row_estimate);
let mut max_values = StringBuilder::new(row_estimate);
let mut estimated_bytes = UInt64Builder::new(row_estimate);
let mut memory_bytes = UInt64Builder::new(row_estimate);
// Note no rows are produced for partitions with no chunks, or
// tables with no partitions: There are other tables to list tables
@ -442,7 +448,7 @@ fn assemble_chunk_columns(
let size = column_index.remove(column.name.as_str());
estimated_bytes.append_option(size)?;
memory_bytes.append_option(size)?;
}
}
@ -457,7 +463,7 @@ fn assemble_chunk_columns(
Arc::new(row_count.finish()),
Arc::new(min_values.finish()),
Arc::new(max_values.finish()),
Arc::new(estimated_bytes.finish()),
Arc::new(memory_bytes.finish()),
],
)
}
@ -616,7 +622,8 @@ mod tests {
id: 0,
storage: ChunkStorage::OpenMutableBuffer,
lifecycle_action: None,
estimated_bytes: 23754,
memory_bytes: 23754,
object_store_bytes: 0,
row_count: 11,
time_of_first_write: Some(DateTime::from_utc(
NaiveDateTime::from_timestamp(10, 0),
@ -628,10 +635,11 @@ mod tests {
ChunkSummary {
partition_key: Arc::from("p1"),
table_name: Arc::from("table1"),
id: 0,
id: 1,
storage: ChunkStorage::OpenMutableBuffer,
lifecycle_action: Some(ChunkLifecycleAction::Persisting),
estimated_bytes: 23454,
memory_bytes: 23455,
object_store_bytes: 0,
row_count: 22,
time_of_first_write: None,
time_of_last_write: Some(DateTime::from_utc(
@ -640,15 +648,35 @@ mod tests {
)),
time_closed: None,
},
ChunkSummary {
partition_key: Arc::from("p1"),
table_name: Arc::from("table1"),
id: 2,
storage: ChunkStorage::ObjectStoreOnly,
lifecycle_action: None,
memory_bytes: 1234,
object_store_bytes: 5678,
row_count: 33,
time_of_first_write: Some(DateTime::from_utc(
NaiveDateTime::from_timestamp(100, 0),
Utc,
)),
time_of_last_write: Some(DateTime::from_utc(
NaiveDateTime::from_timestamp(200, 0),
Utc,
)),
time_closed: None,
},
];
let expected = vec![
"+----+---------------+------------+-------------------+------------------------------+-----------------+-----------+---------------------+---------------------+-------------+",
"| id | partition_key | table_name | storage | lifecycle_action | estimated_bytes | row_count | time_of_first_write | time_of_last_write | time_closed |",
"+----+---------------+------------+-------------------+------------------------------+-----------------+-----------+---------------------+---------------------+-------------+",
"| 0 | p1 | table1 | OpenMutableBuffer | | 23754 | 11 | 1970-01-01 00:00:10 | | |",
"| 0 | p1 | table1 | OpenMutableBuffer | Persisting to Object Storage | 23454 | 22 | | 1970-01-01 00:01:20 | |",
"+----+---------------+------------+-------------------+------------------------------+-----------------+-----------+---------------------+---------------------+-------------+",
"+----+---------------+------------+-------------------+------------------------------+--------------+--------------------+-----------+---------------------+---------------------+-------------+",
"| id | partition_key | table_name | storage | lifecycle_action | memory_bytes | object_store_bytes | row_count | time_of_first_write | time_of_last_write | time_closed |",
"+----+---------------+------------+-------------------+------------------------------+--------------+--------------------+-----------+---------------------+---------------------+-------------+",
"| 0 | p1 | table1 | OpenMutableBuffer | | 23754 | | 11 | 1970-01-01 00:00:10 | | |",
"| 1 | p1 | table1 | OpenMutableBuffer | Persisting to Object Storage | 23455 | | 22 | | 1970-01-01 00:01:20 | |",
"| 2 | p1 | table1 | ObjectStoreOnly | | 1234 | 5678 | 33 | 1970-01-01 00:01:40 | 1970-01-01 00:03:20 | |",
"+----+---------------+------------+-------------------+------------------------------+--------------+--------------------+-----------+---------------------+---------------------+-------------+",
];
let schema = chunk_summaries_schema();
@ -825,7 +853,8 @@ mod tests {
id: 42,
storage: ChunkStorage::ReadBuffer,
lifecycle_action,
estimated_bytes: 23754,
memory_bytes: 23754,
object_store_bytes: 0,
row_count: 11,
time_of_first_write: None,
time_of_last_write: None,
@ -834,11 +863,11 @@ mod tests {
columns: vec![
ChunkColumnSummary {
name: "c1".into(),
estimated_bytes: 11,
memory_bytes: 11,
},
ChunkColumnSummary {
name: "c2".into(),
estimated_bytes: 12,
memory_bytes: 12,
},
],
},
@ -859,7 +888,8 @@ mod tests {
id: 43,
storage: ChunkStorage::OpenMutableBuffer,
lifecycle_action,
estimated_bytes: 23754,
memory_bytes: 23754,
object_store_bytes: 0,
row_count: 11,
time_of_first_write: None,
time_of_last_write: None,
@ -867,7 +897,7 @@ mod tests {
},
columns: vec![ChunkColumnSummary {
name: "c1".into(),
estimated_bytes: 100,
memory_bytes: 100,
}],
},
),
@ -887,7 +917,8 @@ mod tests {
id: 44,
storage: ChunkStorage::OpenMutableBuffer,
lifecycle_action,
estimated_bytes: 23754,
memory_bytes: 23754,
object_store_bytes: 0,
row_count: 11,
time_of_first_write: None,
time_of_last_write: None,
@ -895,21 +926,21 @@ mod tests {
},
columns: vec![ChunkColumnSummary {
name: "c3".into(),
estimated_bytes: 200,
memory_bytes: 200,
}],
},
),
];
let expected = vec![
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | estimated_bytes |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
"| p1 | 42 | t1 | c1 | ReadBuffer | 55 | bar | foo | 11 |",
"| p1 | 42 | t1 | c2 | ReadBuffer | 66 | 11 | 43 | 12 |",
"| p2 | 43 | t1 | c1 | OpenMutableBuffer | 667 | 110 | 430 | 100 |",
"| p2 | 44 | t2 | c3 | OpenMutableBuffer | 4 | -1 | 2 | 200 |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | memory_bytes |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
"| p1 | 42 | t1 | c1 | ReadBuffer | 55 | bar | foo | 11 |",
"| p1 | 42 | t1 | c2 | ReadBuffer | 66 | 11 | 43 | 12 |",
"| p2 | 43 | t1 | c1 | OpenMutableBuffer | 667 | 110 | 430 | 100 |",
"| p2 | 44 | t2 | c3 | OpenMutableBuffer | 4 | -1 | 2 | 200 |",
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
];
let batch = assemble_chunk_columns(chunk_columns_schema(), summaries).unwrap();

View File

@ -83,7 +83,7 @@ OBSERVER;
Example SQL to show the total estimated storage size by database:
SELECT database_name, storage, count(*) as num_chunks,
sum(estimated_bytes)/1024/1024 as estimated_mb
sum(memory_bytes)/1024/1024 as estimated_mb
FROM chunks
GROUP BY database_name, storage
ORDER BY estimated_mb desc;

View File

@ -116,7 +116,7 @@ SHOW COLUMNS FROM my_table; ;; Show columns in the table
SELECT
partition_key, table_name, storage,
count(*) as chunk_count,
sum(estimated_bytes)/(1024*1024) as size_mb
sum(memory_bytes)/(1024*1024) as size_mb
FROM
system.chunks
GROUP BY

View File

@ -317,7 +317,8 @@ async fn test_chunk_get() {
id: 0,
storage: ChunkStorage::OpenMutableBuffer.into(),
lifecycle_action,
estimated_bytes: 100,
memory_bytes: 100,
object_store_bytes: 0,
row_count: 2,
time_of_first_write: None,
time_of_last_write: None,
@ -329,7 +330,8 @@ async fn test_chunk_get() {
id: 0,
storage: ChunkStorage::OpenMutableBuffer.into(),
lifecycle_action,
estimated_bytes: 82,
memory_bytes: 82,
object_store_bytes: 0,
row_count: 1,
time_of_first_write: None,
time_of_last_write: None,
@ -498,7 +500,8 @@ async fn test_list_partition_chunks() {
id: 0,
storage: ChunkStorage::OpenMutableBuffer.into(),
lifecycle_action: ChunkLifecycleAction::Unspecified.into(),
estimated_bytes: 100,
memory_bytes: 100,
object_store_bytes: 0,
row_count: 2,
time_of_first_write: None,
time_of_last_write: None,
@ -819,7 +822,8 @@ fn normalize_chunks(chunks: Vec<Chunk>) -> Vec<Chunk> {
id,
storage,
lifecycle_action,
estimated_bytes,
memory_bytes,
object_store_bytes,
row_count,
..
} = summary;
@ -829,11 +833,12 @@ fn normalize_chunks(chunks: Vec<Chunk>) -> Vec<Chunk> {
id,
storage,
lifecycle_action,
estimated_bytes,
row_count,
time_of_first_write: None,
time_of_last_write: None,
time_closed: None,
memory_bytes,
object_store_bytes,
}
})
.collect::<Vec<_>>()

View File

@ -197,7 +197,7 @@ async fn test_get_chunks() {
.and(predicate::str::contains(
r#""storage": "OpenMutableBuffer","#,
))
.and(predicate::str::contains(r#""estimated_bytes": 100"#))
.and(predicate::str::contains(r#""memory_bytes": 100"#))
// Check for a non empty timestamp such as
// "time_of_first_write": "2021-03-30T17:11:10.723866Z",
.and(predicate::str::contains(r#""time_of_first_write": "20"#));