fix: rename `estimated_bytes` to `memory_bytes` and expose `object_store_bytes` in ChunkSummary and system.chunks (#2017)
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>pull/24376/head
parent
5a9c73a4dd
commit
3fd6430fb6
|
@ -126,8 +126,11 @@ pub struct ChunkSummary {
|
|||
/// Is there any outstanding lifecycle action for this chunk?
|
||||
pub lifecycle_action: Option<ChunkLifecycleAction>,
|
||||
|
||||
/// The total estimated size of this chunk, in bytes
|
||||
pub estimated_bytes: usize,
|
||||
/// The number of bytes used to store this chunk in memory
|
||||
pub memory_bytes: usize,
|
||||
|
||||
/// The number of bytes used to store this chunk in object storage
|
||||
pub object_store_bytes: usize,
|
||||
|
||||
/// The total number of rows in this chunk
|
||||
pub row_count: usize,
|
||||
|
@ -153,7 +156,7 @@ pub struct ChunkColumnSummary {
|
|||
pub name: Arc<str>,
|
||||
|
||||
/// Estimated size, in bytes, consumed by this column.
|
||||
pub estimated_bytes: usize,
|
||||
pub memory_bytes: usize,
|
||||
}
|
||||
|
||||
/// Contains additional per-column details about physical storage of a chunk
|
||||
|
@ -168,13 +171,15 @@ pub struct DetailedChunkSummary {
|
|||
|
||||
impl ChunkSummary {
|
||||
/// Construct a ChunkSummary that has None for all timestamps
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new_without_timestamps(
|
||||
partition_key: Arc<str>,
|
||||
table_name: Arc<str>,
|
||||
id: u32,
|
||||
storage: ChunkStorage,
|
||||
lifecycle_action: Option<ChunkLifecycleAction>,
|
||||
estimated_bytes: usize,
|
||||
memory_bytes: usize,
|
||||
object_store_bytes: usize,
|
||||
row_count: usize,
|
||||
) -> Self {
|
||||
Self {
|
||||
|
@ -183,11 +188,47 @@ impl ChunkSummary {
|
|||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closed: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a new ChunkSummary with None for all timestamps
|
||||
pub fn normalize(self) -> Self {
|
||||
let ChunkSummary {
|
||||
partition_key,
|
||||
table_name,
|
||||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
..
|
||||
} = self;
|
||||
Self::new_without_timestamps(
|
||||
partition_key,
|
||||
table_name,
|
||||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
)
|
||||
}
|
||||
|
||||
/// Normalizes a set of ChunkSummaries for comparison by removing timestamps
|
||||
pub fn normalize_summaries(summaries: Vec<Self>) -> Vec<Self> {
|
||||
let mut summaries = summaries
|
||||
.into_iter()
|
||||
.map(|summary| summary.normalize())
|
||||
.collect::<Vec<_>>();
|
||||
summaries.sort_unstable();
|
||||
summaries
|
||||
}
|
||||
}
|
||||
|
|
10
docs/sql.md
10
docs/sql.md
|
@ -86,7 +86,7 @@ OBSERVER;
|
|||
Example SQL to show the total estimated storage size by database:
|
||||
|
||||
SELECT database_name, storage, count(*) as num_chunks,
|
||||
sum(estimated_bytes)/1024/1024 as estimated_mb
|
||||
sum(memory_bytes)/1024/1024 as estimated_mb
|
||||
FROM chunks
|
||||
GROUP BY database_name, storage
|
||||
ORDER BY estimated_mb desc;
|
||||
|
@ -164,7 +164,7 @@ Here are some interesting reports you can run when in `OBSERVER` mode:
|
|||
```sql
|
||||
SELECT
|
||||
database_name, count(*) as num_chunks,
|
||||
sum(estimated_bytes)/1024/1024 as estimated_mb
|
||||
sum(memory_bytes)/1024/1024 as estimated_mb
|
||||
FROM chunks
|
||||
GROUP BY database_name
|
||||
ORDER BY estimated_mb desc
|
||||
|
@ -175,7 +175,7 @@ LIMIT 20;
|
|||
```sql
|
||||
SELECT
|
||||
database_name, storage, count(*) as num_chunks,
|
||||
sum(estimated_bytes)/1024/1024 as estimated_mb
|
||||
sum(memory_bytes)/1024/1024 as estimated_mb
|
||||
FROM chunks
|
||||
GROUP BY database_name, storage
|
||||
ORDER BY estimated_mb desc
|
||||
|
@ -187,7 +187,7 @@ LIMIT 20;
|
|||
```sql
|
||||
SELECT
|
||||
database_name, table_name, storage, count(*) as num_chunks,
|
||||
sum(estimated_bytes)/1024/1024 as estimated_mb
|
||||
sum(memory_bytes)/1024/1024 as estimated_mb
|
||||
FROM chunks
|
||||
GROUP BY database_name, table_name, storage
|
||||
ORDER BY estimated_mb desc
|
||||
|
@ -217,7 +217,7 @@ LIMIT 20;
|
|||
|
||||
### Time range stored per table
|
||||
|
||||
This query provides an estimate, by table, of how long of a time range
|
||||
This query provides an estimate, by table, of how long of a time range
|
||||
and the estimated number of rows per second it holds in IOx
|
||||
(the `1,000,000,000` is the conversion from nanoseconds)
|
||||
|
||||
|
|
|
@ -62,8 +62,11 @@ message Chunk {
|
|||
// Is there any outstanding lifecycle action for this chunk?
|
||||
ChunkLifecycleAction lifecycle_action = 10;
|
||||
|
||||
// The total estimated size of this chunk, in bytes
|
||||
uint64 estimated_bytes = 4;
|
||||
// The number of bytes used to store this chunk in memory
|
||||
uint64 memory_bytes = 4;
|
||||
|
||||
// The number of bytes used to store this chunk in object storage
|
||||
uint64 object_store_bytes = 11;
|
||||
|
||||
// The number of rows in this chunk
|
||||
uint64 row_count = 9;
|
||||
|
|
|
@ -13,7 +13,8 @@ impl From<ChunkSummary> for management::Chunk {
|
|||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
|
@ -25,7 +26,8 @@ impl From<ChunkSummary> for management::Chunk {
|
|||
let lifecycle_action: management::ChunkLifecycleAction = lifecycle_action.into();
|
||||
let lifecycle_action = lifecycle_action.into(); // convert to i32
|
||||
|
||||
let estimated_bytes = estimated_bytes as u64;
|
||||
let memory_bytes = memory_bytes as u64;
|
||||
let object_store_bytes = object_store_bytes as u64;
|
||||
let row_count = row_count as u64;
|
||||
|
||||
let partition_key = partition_key.to_string();
|
||||
|
@ -41,7 +43,8 @@ impl From<ChunkSummary> for management::Chunk {
|
|||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
|
@ -114,12 +117,14 @@ impl TryFrom<management::Chunk> for ChunkSummary {
|
|||
partition_key,
|
||||
table_name,
|
||||
id,
|
||||
estimated_bytes,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
..
|
||||
} = proto;
|
||||
|
||||
let estimated_bytes = estimated_bytes as usize;
|
||||
let memory_bytes = memory_bytes as usize;
|
||||
let object_store_bytes = object_store_bytes as usize;
|
||||
let row_count = row_count as usize;
|
||||
let partition_key = Arc::from(partition_key.as_str());
|
||||
let table_name = Arc::from(table_name.as_str());
|
||||
|
@ -130,7 +135,8 @@ impl TryFrom<management::Chunk> for ChunkSummary {
|
|||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
|
@ -184,7 +190,8 @@ mod test {
|
|||
partition_key: "foo".to_string(),
|
||||
table_name: "bar".to_string(),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
memory_bytes: 1234,
|
||||
object_store_bytes: 567,
|
||||
row_count: 321,
|
||||
storage: management::ChunkStorage::ObjectStoreOnly.into(),
|
||||
lifecycle_action: management::ChunkLifecycleAction::Moving.into(),
|
||||
|
@ -198,7 +205,8 @@ mod test {
|
|||
partition_key: Arc::from("foo"),
|
||||
table_name: Arc::from("bar"),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
memory_bytes: 1234,
|
||||
object_store_bytes: 567,
|
||||
row_count: 321,
|
||||
storage: ChunkStorage::ObjectStoreOnly,
|
||||
lifecycle_action: Some(ChunkLifecycleAction::Moving),
|
||||
|
@ -220,7 +228,8 @@ mod test {
|
|||
partition_key: Arc::from("foo"),
|
||||
table_name: Arc::from("bar"),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
memory_bytes: 1234,
|
||||
object_store_bytes: 567,
|
||||
row_count: 321,
|
||||
storage: ChunkStorage::ObjectStoreOnly,
|
||||
lifecycle_action: Some(ChunkLifecycleAction::Persisting),
|
||||
|
@ -235,7 +244,8 @@ mod test {
|
|||
partition_key: "foo".to_string(),
|
||||
table_name: "bar".to_string(),
|
||||
id: 42,
|
||||
estimated_bytes: 1234,
|
||||
memory_bytes: 1234,
|
||||
object_store_bytes: 567,
|
||||
row_count: 321,
|
||||
storage: management::ChunkStorage::ObjectStoreOnly.into(),
|
||||
lifecycle_action: management::ChunkLifecycleAction::Persisting.into(),
|
||||
|
|
|
@ -265,16 +265,16 @@ async fn sql_select_from_system_chunks() {
|
|||
// test timestamps, etc)
|
||||
|
||||
let expected = vec![
|
||||
"+----+---------------+------------+-------------------+-----------------+-----------+",
|
||||
"| id | partition_key | table_name | storage | estimated_bytes | row_count |",
|
||||
"+----+---------------+------------+-------------------+-----------------+-----------+",
|
||||
"| 0 | 1970-01-01T00 | h2o | OpenMutableBuffer | 213 | 3 |",
|
||||
"| 0 | 1970-01-01T00 | o2 | OpenMutableBuffer | 177 | 2 |",
|
||||
"+----+---------------+------------+-------------------+-----------------+-----------+",
|
||||
"+----+---------------+------------+-------------------+--------------+-----------+",
|
||||
"| id | partition_key | table_name | storage | memory_bytes | row_count |",
|
||||
"+----+---------------+------------+-------------------+--------------+-----------+",
|
||||
"| 0 | 1970-01-01T00 | h2o | OpenMutableBuffer | 213 | 3 |",
|
||||
"| 0 | 1970-01-01T00 | o2 | OpenMutableBuffer | 177 | 2 |",
|
||||
"+----+---------------+------------+-------------------+--------------+-----------+",
|
||||
];
|
||||
run_sql_test_case!(
|
||||
TwoMeasurementsManyFieldsOneChunk {},
|
||||
"SELECT id, partition_key, table_name, storage, estimated_bytes, row_count from system.chunks",
|
||||
"SELECT id, partition_key, table_name, storage, memory_bytes, row_count from system.chunks",
|
||||
&expected
|
||||
);
|
||||
}
|
||||
|
@ -316,24 +316,24 @@ async fn sql_select_from_system_chunk_columns() {
|
|||
// with different chunk configurations.
|
||||
|
||||
let expected = vec![
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
|
||||
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | estimated_bytes |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
|
||||
"| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | Boston | Boston | 252 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | MA | MA | 240 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 50 | 250 | 51 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | city | OpenMutableBuffer | 1 | Boston | Boston | 35 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | state | OpenMutableBuffer | 2 | CA | MA | 41 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | temp | OpenMutableBuffer | 2 | 53.4 | 79 | 25 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | time | OpenMutableBuffer | 2 | 50 | 300 | 25 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | city | OpenMutableBuffer | 1 | Boston | Boston | 31 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | other_temp | OpenMutableBuffer | 1 | 72.4 | 72.4 | 17 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | state | OpenMutableBuffer | 1 | CA | CA | 27 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | time | OpenMutableBuffer | 1 | 350 | 350 | 17 |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
|
||||
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | memory_bytes |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
|
||||
"| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | Boston | Boston | 252 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | MA | MA | 240 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 1 | 70.4 | 70.4 | 425 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 50 | 250 | 51 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | city | OpenMutableBuffer | 1 | Boston | Boston | 35 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | state | OpenMutableBuffer | 2 | CA | MA | 41 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | temp | OpenMutableBuffer | 2 | 53.4 | 79 | 25 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | time | OpenMutableBuffer | 2 | 50 | 300 | 25 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | city | OpenMutableBuffer | 1 | Boston | Boston | 31 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | other_temp | OpenMutableBuffer | 1 | 72.4 | 72.4 | 17 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | state | OpenMutableBuffer | 1 | CA | CA | 27 |",
|
||||
"| 1970-01-01T00 | 1 | h2o | time | OpenMutableBuffer | 1 | 350 | 350 | 17 |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
|
||||
];
|
||||
run_sql_test_case!(
|
||||
TwoMeasurementsManyFieldsTwoChunks {},
|
||||
|
|
|
@ -161,16 +161,16 @@ impl Table {
|
|||
.iter()
|
||||
.flat_map(|rg| rg.column_sizes())
|
||||
// combine statistics for columns across row groups
|
||||
.fold(BTreeMap::new(), |mut map, (name, estimated_bytes)| {
|
||||
.fold(BTreeMap::new(), |mut map, (name, memory_bytes)| {
|
||||
let entry = map.entry(name).or_insert(0);
|
||||
*entry += estimated_bytes;
|
||||
*entry += memory_bytes;
|
||||
map
|
||||
})
|
||||
// Now turn into Vec<ChunkColumnSummary>
|
||||
.into_iter()
|
||||
.map(|(name, estimated_bytes)| ChunkColumnSummary {
|
||||
.map(|(name, memory_bytes)| ChunkColumnSummary {
|
||||
name: name.into(),
|
||||
estimated_bytes,
|
||||
memory_bytes,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
@ -1116,11 +1116,11 @@ mod test {
|
|||
let expected = vec![
|
||||
ChunkColumnSummary {
|
||||
name: "count".into(),
|
||||
estimated_bytes: 110,
|
||||
memory_bytes: 110,
|
||||
},
|
||||
ChunkColumnSummary {
|
||||
name: "time".into(),
|
||||
estimated_bytes: 107,
|
||||
memory_bytes: 107,
|
||||
},
|
||||
];
|
||||
assert_eq!(table.column_sizes(), expected);
|
||||
|
|
|
@ -2262,36 +2262,6 @@ mod tests {
|
|||
assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![1]);
|
||||
}
|
||||
|
||||
/// Normalizes a set of ChunkSummaries for comparison by removing timestamps
|
||||
fn normalize_summaries(summaries: Vec<ChunkSummary>) -> Vec<ChunkSummary> {
|
||||
let mut summaries = summaries
|
||||
.into_iter()
|
||||
.map(|summary| {
|
||||
let ChunkSummary {
|
||||
partition_key,
|
||||
table_name,
|
||||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
row_count,
|
||||
..
|
||||
} = summary;
|
||||
ChunkSummary::new_without_timestamps(
|
||||
partition_key,
|
||||
table_name,
|
||||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
row_count,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
summaries.sort_unstable();
|
||||
summaries
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn partition_chunk_summaries() {
|
||||
// Test that chunk id listing is hooked up
|
||||
|
@ -2306,7 +2276,7 @@ mod tests {
|
|||
print!("Partitions: {:?}", db.partition_keys().unwrap());
|
||||
|
||||
let chunk_summaries = db.partition_chunk_summaries("1970-01-05T15");
|
||||
let chunk_summaries = normalize_summaries(chunk_summaries);
|
||||
let chunk_summaries = ChunkSummary::normalize_summaries(chunk_summaries);
|
||||
|
||||
let expected = vec![ChunkSummary::new_without_timestamps(
|
||||
Arc::from("1970-01-05T15"),
|
||||
|
@ -2314,7 +2284,8 @@ mod tests {
|
|||
0,
|
||||
ChunkStorage::OpenMutableBuffer,
|
||||
None,
|
||||
70,
|
||||
70, // memory_size
|
||||
0, // os_size
|
||||
1,
|
||||
)];
|
||||
|
||||
|
@ -2322,7 +2293,7 @@ mod tests {
|
|||
.chunk_summaries()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|x| x.estimated_bytes)
|
||||
.map(|x| x.memory_bytes)
|
||||
.sum();
|
||||
|
||||
assert_eq!(db.catalog.metrics().memory().mutable_buffer(), size);
|
||||
|
@ -2412,7 +2383,7 @@ mod tests {
|
|||
write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000").await;
|
||||
|
||||
let chunk_summaries = db.chunk_summaries().expect("expected summary to return");
|
||||
let chunk_summaries = normalize_summaries(chunk_summaries);
|
||||
let chunk_summaries = ChunkSummary::normalize_summaries(chunk_summaries);
|
||||
|
||||
let lifecycle_action = None;
|
||||
|
||||
|
@ -2424,6 +2395,7 @@ mod tests {
|
|||
ChunkStorage::ReadBufferAndObjectStore,
|
||||
lifecycle_action,
|
||||
2115, // size of RB and OS chunks
|
||||
1132, // size of parquet file
|
||||
1,
|
||||
),
|
||||
ChunkSummary::new_without_timestamps(
|
||||
|
@ -2433,6 +2405,7 @@ mod tests {
|
|||
ChunkStorage::OpenMutableBuffer,
|
||||
lifecycle_action,
|
||||
64,
|
||||
0, // no OS chunks
|
||||
1,
|
||||
),
|
||||
ChunkSummary::new_without_timestamps(
|
||||
|
@ -2442,6 +2415,7 @@ mod tests {
|
|||
ChunkStorage::ClosedMutableBuffer,
|
||||
lifecycle_action,
|
||||
2398,
|
||||
0, // no OS chunks
|
||||
1,
|
||||
),
|
||||
ChunkSummary::new_without_timestamps(
|
||||
|
@ -2451,13 +2425,14 @@ mod tests {
|
|||
ChunkStorage::OpenMutableBuffer,
|
||||
lifecycle_action,
|
||||
87,
|
||||
0, // no OS chunks
|
||||
1,
|
||||
),
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
expected, chunk_summaries,
|
||||
"expected:\n{:#?}\n\nactual:{:#?}\n\n",
|
||||
"\n\nexpected:\n{:#?}\n\nactual:{:#?}\n\n",
|
||||
expected, chunk_summaries
|
||||
);
|
||||
|
||||
|
|
|
@ -476,7 +476,8 @@ impl CatalogChunk {
|
|||
id: self.addr.chunk_id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes: self.size(),
|
||||
memory_bytes: self.memory_bytes(),
|
||||
object_store_bytes: self.object_store_bytes(),
|
||||
row_count,
|
||||
time_of_first_write: self.time_of_first_write,
|
||||
time_of_last_write: self.time_of_last_write,
|
||||
|
@ -491,7 +492,7 @@ impl CatalogChunk {
|
|||
fn to_summary(v: (&str, usize)) -> ChunkColumnSummary {
|
||||
ChunkColumnSummary {
|
||||
name: v.0.into(),
|
||||
estimated_bytes: v.1,
|
||||
memory_bytes: v.1,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -529,7 +530,7 @@ impl CatalogChunk {
|
|||
}
|
||||
|
||||
/// Returns an approximation of the amount of process memory consumed by the chunk
|
||||
pub fn size(&self) -> usize {
|
||||
pub fn memory_bytes(&self) -> usize {
|
||||
match &self.stage {
|
||||
ChunkStage::Open { mb_chunk, .. } => mb_chunk.size(),
|
||||
ChunkStage::Frozen { representation, .. } => match &representation {
|
||||
|
@ -550,6 +551,15 @@ impl CatalogChunk {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns the number of bytes of object storage consumed by this chunk
|
||||
pub fn object_store_bytes(&self) -> usize {
|
||||
match &self.stage {
|
||||
ChunkStage::Open { .. } => 0,
|
||||
ChunkStage::Frozen { .. } => 0,
|
||||
ChunkStage::Persisted { parquet, .. } => parquet.file_size_bytes(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a mutable reference to the mutable buffer storage for chunks in the Open state
|
||||
pub fn mutable_buffer(&mut self) -> Result<&mut MBChunk> {
|
||||
match &mut self.stage {
|
||||
|
|
|
@ -197,7 +197,8 @@ fn chunk_summaries_schema() -> SchemaRef {
|
|||
Field::new("table_name", DataType::Utf8, false),
|
||||
Field::new("storage", DataType::Utf8, false),
|
||||
Field::new("lifecycle_action", DataType::Utf8, true),
|
||||
Field::new("estimated_bytes", DataType::UInt64, false),
|
||||
Field::new("memory_bytes", DataType::UInt64, false),
|
||||
Field::new("object_store_bytes", DataType::UInt64, false),
|
||||
Field::new("row_count", DataType::UInt64, false),
|
||||
Field::new("time_of_first_write", ts.clone(), true),
|
||||
Field::new("time_of_last_write", ts.clone(), true),
|
||||
|
@ -223,9 +224,13 @@ fn from_chunk_summaries(schema: SchemaRef, chunks: Vec<ChunkSummary>) -> Result<
|
|||
.iter()
|
||||
.map(|c| c.lifecycle_action.map(|a| a.name()))
|
||||
.collect::<StringArray>();
|
||||
let estimated_bytes = chunks
|
||||
let memory_bytes = chunks
|
||||
.iter()
|
||||
.map(|c| Some(c.estimated_bytes as u64))
|
||||
.map(|c| Some(c.memory_bytes as u64))
|
||||
.collect::<UInt64Array>();
|
||||
let object_store_bytes = chunks
|
||||
.iter()
|
||||
.map(|c| Some(c.object_store_bytes as u64).filter(|&v| v > 0))
|
||||
.collect::<UInt64Array>();
|
||||
let row_counts = chunks
|
||||
.iter()
|
||||
|
@ -255,7 +260,8 @@ fn from_chunk_summaries(schema: SchemaRef, chunks: Vec<ChunkSummary>) -> Result<
|
|||
Arc::new(table_name),
|
||||
Arc::new(storage),
|
||||
Arc::new(lifecycle_action),
|
||||
Arc::new(estimated_bytes),
|
||||
Arc::new(memory_bytes),
|
||||
Arc::new(object_store_bytes),
|
||||
Arc::new(row_counts),
|
||||
Arc::new(time_of_first_write),
|
||||
Arc::new(time_of_last_write),
|
||||
|
@ -380,7 +386,7 @@ fn chunk_columns_schema() -> SchemaRef {
|
|||
Field::new("row_count", DataType::UInt64, true),
|
||||
Field::new("min_value", DataType::Utf8, true),
|
||||
Field::new("max_value", DataType::Utf8, true),
|
||||
Field::new("estimated_bytes", DataType::UInt64, true),
|
||||
Field::new("memory_bytes", DataType::UInt64, true),
|
||||
]))
|
||||
}
|
||||
|
||||
|
@ -396,7 +402,7 @@ fn assemble_chunk_columns(
|
|||
.map(|column_summary| {
|
||||
(
|
||||
column_summary.name.as_ref(),
|
||||
column_summary.estimated_bytes as u64,
|
||||
column_summary.memory_bytes as u64,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
|
@ -413,7 +419,7 @@ fn assemble_chunk_columns(
|
|||
let mut row_count = UInt64Builder::new(row_estimate);
|
||||
let mut min_values = StringBuilder::new(row_estimate);
|
||||
let mut max_values = StringBuilder::new(row_estimate);
|
||||
let mut estimated_bytes = UInt64Builder::new(row_estimate);
|
||||
let mut memory_bytes = UInt64Builder::new(row_estimate);
|
||||
|
||||
// Note no rows are produced for partitions with no chunks, or
|
||||
// tables with no partitions: There are other tables to list tables
|
||||
|
@ -442,7 +448,7 @@ fn assemble_chunk_columns(
|
|||
|
||||
let size = column_index.remove(column.name.as_str());
|
||||
|
||||
estimated_bytes.append_option(size)?;
|
||||
memory_bytes.append_option(size)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -457,7 +463,7 @@ fn assemble_chunk_columns(
|
|||
Arc::new(row_count.finish()),
|
||||
Arc::new(min_values.finish()),
|
||||
Arc::new(max_values.finish()),
|
||||
Arc::new(estimated_bytes.finish()),
|
||||
Arc::new(memory_bytes.finish()),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
@ -616,7 +622,8 @@ mod tests {
|
|||
id: 0,
|
||||
storage: ChunkStorage::OpenMutableBuffer,
|
||||
lifecycle_action: None,
|
||||
estimated_bytes: 23754,
|
||||
memory_bytes: 23754,
|
||||
object_store_bytes: 0,
|
||||
row_count: 11,
|
||||
time_of_first_write: Some(DateTime::from_utc(
|
||||
NaiveDateTime::from_timestamp(10, 0),
|
||||
|
@ -628,10 +635,11 @@ mod tests {
|
|||
ChunkSummary {
|
||||
partition_key: Arc::from("p1"),
|
||||
table_name: Arc::from("table1"),
|
||||
id: 0,
|
||||
id: 1,
|
||||
storage: ChunkStorage::OpenMutableBuffer,
|
||||
lifecycle_action: Some(ChunkLifecycleAction::Persisting),
|
||||
estimated_bytes: 23454,
|
||||
memory_bytes: 23455,
|
||||
object_store_bytes: 0,
|
||||
row_count: 22,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: Some(DateTime::from_utc(
|
||||
|
@ -640,15 +648,35 @@ mod tests {
|
|||
)),
|
||||
time_closed: None,
|
||||
},
|
||||
ChunkSummary {
|
||||
partition_key: Arc::from("p1"),
|
||||
table_name: Arc::from("table1"),
|
||||
id: 2,
|
||||
storage: ChunkStorage::ObjectStoreOnly,
|
||||
lifecycle_action: None,
|
||||
memory_bytes: 1234,
|
||||
object_store_bytes: 5678,
|
||||
row_count: 33,
|
||||
time_of_first_write: Some(DateTime::from_utc(
|
||||
NaiveDateTime::from_timestamp(100, 0),
|
||||
Utc,
|
||||
)),
|
||||
time_of_last_write: Some(DateTime::from_utc(
|
||||
NaiveDateTime::from_timestamp(200, 0),
|
||||
Utc,
|
||||
)),
|
||||
time_closed: None,
|
||||
},
|
||||
];
|
||||
|
||||
let expected = vec![
|
||||
"+----+---------------+------------+-------------------+------------------------------+-----------------+-----------+---------------------+---------------------+-------------+",
|
||||
"| id | partition_key | table_name | storage | lifecycle_action | estimated_bytes | row_count | time_of_first_write | time_of_last_write | time_closed |",
|
||||
"+----+---------------+------------+-------------------+------------------------------+-----------------+-----------+---------------------+---------------------+-------------+",
|
||||
"| 0 | p1 | table1 | OpenMutableBuffer | | 23754 | 11 | 1970-01-01 00:00:10 | | |",
|
||||
"| 0 | p1 | table1 | OpenMutableBuffer | Persisting to Object Storage | 23454 | 22 | | 1970-01-01 00:01:20 | |",
|
||||
"+----+---------------+------------+-------------------+------------------------------+-----------------+-----------+---------------------+---------------------+-------------+",
|
||||
"+----+---------------+------------+-------------------+------------------------------+--------------+--------------------+-----------+---------------------+---------------------+-------------+",
|
||||
"| id | partition_key | table_name | storage | lifecycle_action | memory_bytes | object_store_bytes | row_count | time_of_first_write | time_of_last_write | time_closed |",
|
||||
"+----+---------------+------------+-------------------+------------------------------+--------------+--------------------+-----------+---------------------+---------------------+-------------+",
|
||||
"| 0 | p1 | table1 | OpenMutableBuffer | | 23754 | | 11 | 1970-01-01 00:00:10 | | |",
|
||||
"| 1 | p1 | table1 | OpenMutableBuffer | Persisting to Object Storage | 23455 | | 22 | | 1970-01-01 00:01:20 | |",
|
||||
"| 2 | p1 | table1 | ObjectStoreOnly | | 1234 | 5678 | 33 | 1970-01-01 00:01:40 | 1970-01-01 00:03:20 | |",
|
||||
"+----+---------------+------------+-------------------+------------------------------+--------------+--------------------+-----------+---------------------+---------------------+-------------+",
|
||||
];
|
||||
|
||||
let schema = chunk_summaries_schema();
|
||||
|
@ -825,7 +853,8 @@ mod tests {
|
|||
id: 42,
|
||||
storage: ChunkStorage::ReadBuffer,
|
||||
lifecycle_action,
|
||||
estimated_bytes: 23754,
|
||||
memory_bytes: 23754,
|
||||
object_store_bytes: 0,
|
||||
row_count: 11,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
|
@ -834,11 +863,11 @@ mod tests {
|
|||
columns: vec![
|
||||
ChunkColumnSummary {
|
||||
name: "c1".into(),
|
||||
estimated_bytes: 11,
|
||||
memory_bytes: 11,
|
||||
},
|
||||
ChunkColumnSummary {
|
||||
name: "c2".into(),
|
||||
estimated_bytes: 12,
|
||||
memory_bytes: 12,
|
||||
},
|
||||
],
|
||||
},
|
||||
|
@ -859,7 +888,8 @@ mod tests {
|
|||
id: 43,
|
||||
storage: ChunkStorage::OpenMutableBuffer,
|
||||
lifecycle_action,
|
||||
estimated_bytes: 23754,
|
||||
memory_bytes: 23754,
|
||||
object_store_bytes: 0,
|
||||
row_count: 11,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
|
@ -867,7 +897,7 @@ mod tests {
|
|||
},
|
||||
columns: vec![ChunkColumnSummary {
|
||||
name: "c1".into(),
|
||||
estimated_bytes: 100,
|
||||
memory_bytes: 100,
|
||||
}],
|
||||
},
|
||||
),
|
||||
|
@ -887,7 +917,8 @@ mod tests {
|
|||
id: 44,
|
||||
storage: ChunkStorage::OpenMutableBuffer,
|
||||
lifecycle_action,
|
||||
estimated_bytes: 23754,
|
||||
memory_bytes: 23754,
|
||||
object_store_bytes: 0,
|
||||
row_count: 11,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
|
@ -895,21 +926,21 @@ mod tests {
|
|||
},
|
||||
columns: vec![ChunkColumnSummary {
|
||||
name: "c3".into(),
|
||||
estimated_bytes: 200,
|
||||
memory_bytes: 200,
|
||||
}],
|
||||
},
|
||||
),
|
||||
];
|
||||
|
||||
let expected = vec![
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
|
||||
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | estimated_bytes |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
|
||||
"| p1 | 42 | t1 | c1 | ReadBuffer | 55 | bar | foo | 11 |",
|
||||
"| p1 | 42 | t1 | c2 | ReadBuffer | 66 | 11 | 43 | 12 |",
|
||||
"| p2 | 43 | t1 | c1 | OpenMutableBuffer | 667 | 110 | 430 | 100 |",
|
||||
"| p2 | 44 | t2 | c3 | OpenMutableBuffer | 4 | -1 | 2 | 200 |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+-----------------+",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
|
||||
"| partition_key | chunk_id | table_name | column_name | storage | row_count | min_value | max_value | memory_bytes |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
|
||||
"| p1 | 42 | t1 | c1 | ReadBuffer | 55 | bar | foo | 11 |",
|
||||
"| p1 | 42 | t1 | c2 | ReadBuffer | 66 | 11 | 43 | 12 |",
|
||||
"| p2 | 43 | t1 | c1 | OpenMutableBuffer | 667 | 110 | 430 | 100 |",
|
||||
"| p2 | 44 | t2 | c3 | OpenMutableBuffer | 4 | -1 | 2 | 200 |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+-----------+-----------+--------------+",
|
||||
];
|
||||
|
||||
let batch = assemble_chunk_columns(chunk_columns_schema(), summaries).unwrap();
|
||||
|
|
|
@ -83,7 +83,7 @@ OBSERVER;
|
|||
Example SQL to show the total estimated storage size by database:
|
||||
|
||||
SELECT database_name, storage, count(*) as num_chunks,
|
||||
sum(estimated_bytes)/1024/1024 as estimated_mb
|
||||
sum(memory_bytes)/1024/1024 as estimated_mb
|
||||
FROM chunks
|
||||
GROUP BY database_name, storage
|
||||
ORDER BY estimated_mb desc;
|
||||
|
|
|
@ -116,7 +116,7 @@ SHOW COLUMNS FROM my_table; ;; Show columns in the table
|
|||
SELECT
|
||||
partition_key, table_name, storage,
|
||||
count(*) as chunk_count,
|
||||
sum(estimated_bytes)/(1024*1024) as size_mb
|
||||
sum(memory_bytes)/(1024*1024) as size_mb
|
||||
FROM
|
||||
system.chunks
|
||||
GROUP BY
|
||||
|
|
|
@ -317,7 +317,8 @@ async fn test_chunk_get() {
|
|||
id: 0,
|
||||
storage: ChunkStorage::OpenMutableBuffer.into(),
|
||||
lifecycle_action,
|
||||
estimated_bytes: 100,
|
||||
memory_bytes: 100,
|
||||
object_store_bytes: 0,
|
||||
row_count: 2,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
|
@ -329,7 +330,8 @@ async fn test_chunk_get() {
|
|||
id: 0,
|
||||
storage: ChunkStorage::OpenMutableBuffer.into(),
|
||||
lifecycle_action,
|
||||
estimated_bytes: 82,
|
||||
memory_bytes: 82,
|
||||
object_store_bytes: 0,
|
||||
row_count: 1,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
|
@ -498,7 +500,8 @@ async fn test_list_partition_chunks() {
|
|||
id: 0,
|
||||
storage: ChunkStorage::OpenMutableBuffer.into(),
|
||||
lifecycle_action: ChunkLifecycleAction::Unspecified.into(),
|
||||
estimated_bytes: 100,
|
||||
memory_bytes: 100,
|
||||
object_store_bytes: 0,
|
||||
row_count: 2,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
|
@ -819,7 +822,8 @@ fn normalize_chunks(chunks: Vec<Chunk>) -> Vec<Chunk> {
|
|||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
row_count,
|
||||
..
|
||||
} = summary;
|
||||
|
@ -829,11 +833,12 @@ fn normalize_chunks(chunks: Vec<Chunk>) -> Vec<Chunk> {
|
|||
id,
|
||||
storage,
|
||||
lifecycle_action,
|
||||
estimated_bytes,
|
||||
row_count,
|
||||
time_of_first_write: None,
|
||||
time_of_last_write: None,
|
||||
time_closed: None,
|
||||
memory_bytes,
|
||||
object_store_bytes,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
|
|
|
@ -197,7 +197,7 @@ async fn test_get_chunks() {
|
|||
.and(predicate::str::contains(
|
||||
r#""storage": "OpenMutableBuffer","#,
|
||||
))
|
||||
.and(predicate::str::contains(r#""estimated_bytes": 100"#))
|
||||
.and(predicate::str::contains(r#""memory_bytes": 100"#))
|
||||
// Check for a non empty timestamp such as
|
||||
// "time_of_first_write": "2021-03-30T17:11:10.723866Z",
|
||||
.and(predicate::str::contains(r#""time_of_first_write": "20"#));
|
||||
|
|
Loading…
Reference in New Issue