Merge branch 'main' into crepererum/issue1313
commit
efe077da8f
|
@ -1,11 +1,11 @@
|
||||||
use std::collections::{BTreeSet, HashMap};
|
use std::collections::BTreeSet;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow::record_batch::RecordBatch;
|
use arrow::record_batch::RecordBatch;
|
||||||
use data_types::timestamp::TimestampRange;
|
use data_types::timestamp::TimestampRange;
|
||||||
use internal_types::schema::{Schema, TIME_COLUMN_NAME};
|
use internal_types::schema::{Schema, TIME_COLUMN_NAME};
|
||||||
use internal_types::selection::Selection;
|
use internal_types::selection::Selection;
|
||||||
use snafu::{OptionExt, ResultExt, Snafu};
|
use snafu::{ensure, ResultExt, Snafu};
|
||||||
|
|
||||||
use super::Chunk;
|
use super::Chunk;
|
||||||
use data_types::{error::ErrorLogger, partition_metadata::Statistics};
|
use data_types::{error::ErrorLogger, partition_metadata::Statistics};
|
||||||
|
@ -26,45 +26,27 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||||
/// A queryable snapshot of a mutable buffer chunk
|
/// A queryable snapshot of a mutable buffer chunk
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ChunkSnapshot {
|
pub struct ChunkSnapshot {
|
||||||
/// Maps table name to `TableSnapshot`
|
|
||||||
records: HashMap<String, TableSnapshot>,
|
|
||||||
// TODO: Memory tracking
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct TableSnapshot {
|
|
||||||
schema: Schema,
|
schema: Schema,
|
||||||
batch: RecordBatch,
|
batch: RecordBatch,
|
||||||
|
table_name: Arc<str>,
|
||||||
timestamp_range: Option<TimestampRange>,
|
timestamp_range: Option<TimestampRange>,
|
||||||
}
|
// TODO: Memory tracking
|
||||||
|
|
||||||
impl TableSnapshot {
|
|
||||||
fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
|
|
||||||
match (self.timestamp_range, timestamp_range) {
|
|
||||||
(Some(a), Some(b)) => !a.disjoint(b),
|
|
||||||
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
|
|
||||||
// the predicate
|
|
||||||
(_, None) => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ChunkSnapshot {
|
impl ChunkSnapshot {
|
||||||
pub fn new(chunk: &Chunk) -> Self {
|
pub fn new(chunk: &Chunk) -> Self {
|
||||||
let mut records: HashMap<String, TableSnapshot> = Default::default();
|
|
||||||
let table = &chunk.table;
|
let table = &chunk.table;
|
||||||
|
|
||||||
let schema = table
|
let schema = table
|
||||||
.schema(&chunk.dictionary, Selection::All)
|
.schema(&chunk.dictionary, Selection::All)
|
||||||
.log_if_error("ChunkSnapshot getting table schema")
|
.log_if_error("ChunkSnapshot getting table schema")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let batch = table
|
let batch = table
|
||||||
.to_arrow(&chunk.dictionary, Selection::All)
|
.to_arrow(&chunk.dictionary, Selection::All)
|
||||||
.log_if_error("ChunkSnapshot converting table to arrow")
|
.log_if_error("ChunkSnapshot converting table to arrow")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let name = chunk.table_name.as_ref();
|
|
||||||
|
|
||||||
let timestamp_range =
|
let timestamp_range =
|
||||||
chunk
|
chunk
|
||||||
.dictionary
|
.dictionary
|
||||||
|
@ -82,73 +64,66 @@ impl ChunkSnapshot {
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
records.insert(
|
Self {
|
||||||
name.to_string(),
|
schema,
|
||||||
TableSnapshot {
|
batch,
|
||||||
schema,
|
table_name: Arc::clone(&chunk.table_name),
|
||||||
batch,
|
timestamp_range,
|
||||||
timestamp_range,
|
}
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
Self { records }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// returns true if there is no data in this snapshot
|
/// returns true if there is no data in this snapshot
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.records.is_empty()
|
self.batch.num_rows() == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return true if this snapshot has the specified table name
|
/// Return true if this snapshot has the specified table name
|
||||||
pub fn has_table(&self, table_name: &str) -> bool {
|
pub fn has_table(&self, table_name: &str) -> bool {
|
||||||
self.records.get(table_name).is_some()
|
self.table_name.as_ref() == table_name
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return Schema for the specified table / columns
|
/// Return Schema for the specified table / columns
|
||||||
pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
|
pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
|
||||||
let table = self
|
// Temporary #1295
|
||||||
.records
|
ensure!(
|
||||||
.get(table_name)
|
self.table_name.as_ref() == table_name,
|
||||||
.context(TableNotFound { table_name })?;
|
TableNotFound { table_name }
|
||||||
|
);
|
||||||
|
|
||||||
Ok(match selection {
|
Ok(match selection {
|
||||||
Selection::All => table.schema.clone(),
|
Selection::All => self.schema.clone(),
|
||||||
Selection::Some(columns) => {
|
Selection::Some(columns) => {
|
||||||
let columns = table.schema.select(columns).context(SelectColumns)?;
|
let columns = self.schema.select(columns).context(SelectColumns)?;
|
||||||
table.schema.project(&columns)
|
self.schema.project(&columns)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a list of tables with writes matching the given timestamp_range
|
/// Returns a list of tables with writes matching the given timestamp_range
|
||||||
pub fn table_names(
|
pub fn table_names(&self, timestamp_range: Option<TimestampRange>) -> BTreeSet<String> {
|
||||||
&self,
|
let mut ret = BTreeSet::new();
|
||||||
timestamp_range: Option<TimestampRange>,
|
if self.matches_predicate(×tamp_range) {
|
||||||
) -> impl Iterator<Item = &String> + '_ {
|
ret.insert(self.table_name.to_string());
|
||||||
self.records
|
}
|
||||||
.iter()
|
ret
|
||||||
.flat_map(move |(table_name, table_snapshot)| {
|
|
||||||
table_snapshot
|
|
||||||
.matches_predicate(×tamp_range)
|
|
||||||
.then(|| table_name)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a RecordBatch with the given selection
|
/// Returns a RecordBatch with the given selection
|
||||||
pub fn read_filter(&self, table_name: &str, selection: Selection<'_>) -> Result<RecordBatch> {
|
pub fn read_filter(&self, table_name: &str, selection: Selection<'_>) -> Result<RecordBatch> {
|
||||||
let table = self
|
// Temporary #1295
|
||||||
.records
|
ensure!(
|
||||||
.get(table_name)
|
self.table_name.as_ref() == table_name,
|
||||||
.context(TableNotFound { table_name })?;
|
TableNotFound { table_name }
|
||||||
|
);
|
||||||
|
|
||||||
Ok(match selection {
|
Ok(match selection {
|
||||||
Selection::All => table.batch.clone(),
|
Selection::All => self.batch.clone(),
|
||||||
Selection::Some(columns) => {
|
Selection::Some(columns) => {
|
||||||
let projection = table.schema.select(columns).context(SelectColumns)?;
|
let projection = self.schema.select(columns).context(SelectColumns)?;
|
||||||
let schema = table.schema.project(&projection).into();
|
let schema = self.schema.project(&projection).into();
|
||||||
let columns = projection
|
let columns = projection
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|x| Arc::clone(table.batch.column(x)))
|
.map(|x| Arc::clone(self.batch.column(x)))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
RecordBatch::try_new(schema, columns).expect("failed to project record batch")
|
RecordBatch::try_new(schema, columns).expect("failed to project record batch")
|
||||||
|
@ -162,8 +137,12 @@ impl ChunkSnapshot {
|
||||||
table_name: &str,
|
table_name: &str,
|
||||||
selection: Selection<'_>,
|
selection: Selection<'_>,
|
||||||
) -> Option<BTreeSet<String>> {
|
) -> Option<BTreeSet<String>> {
|
||||||
let table = self.records.get(table_name)?;
|
// Temporary #1295
|
||||||
let fields = table.schema.inner().fields().iter();
|
if self.table_name.as_ref() != table_name {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let fields = self.schema.inner().fields().iter();
|
||||||
|
|
||||||
Some(match selection {
|
Some(match selection {
|
||||||
Selection::Some(cols) => fields
|
Selection::Some(cols) => fields
|
||||||
|
@ -178,4 +157,13 @@ impl ChunkSnapshot {
|
||||||
Selection::All => fields.map(|x| x.name().clone()).collect(),
|
Selection::All => fields.map(|x| x.name().clone()).collect(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
|
||||||
|
match (self.timestamp_range, timestamp_range) {
|
||||||
|
(Some(a), Some(b)) => !a.disjoint(b),
|
||||||
|
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
|
||||||
|
// the predicate
|
||||||
|
(_, None) => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -164,9 +164,7 @@ impl PartitionChunk for DbChunk {
|
||||||
|
|
||||||
fn all_table_names(&self, known_tables: &mut StringSet) {
|
fn all_table_names(&self, known_tables: &mut StringSet) {
|
||||||
match &self.state {
|
match &self.state {
|
||||||
State::MutableBuffer { chunk, .. } => {
|
State::MutableBuffer { chunk, .. } => known_tables.append(&mut chunk.table_names(None)),
|
||||||
known_tables.extend(chunk.table_names(None).cloned())
|
|
||||||
}
|
|
||||||
State::ReadBuffer { chunk, .. } => {
|
State::ReadBuffer { chunk, .. } => {
|
||||||
// TODO - align APIs so they behave in the same way...
|
// TODO - align APIs so they behave in the same way...
|
||||||
let rb_names = chunk.all_table_names(known_tables);
|
let rb_names = chunk.all_table_names(known_tables);
|
||||||
|
@ -194,7 +192,7 @@ impl PartitionChunk for DbChunk {
|
||||||
// TODO: Support more predicates
|
// TODO: Support more predicates
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
chunk.table_names(predicate.range).cloned().collect()
|
chunk.table_names(predicate.range)
|
||||||
}
|
}
|
||||||
State::ReadBuffer { chunk, .. } => {
|
State::ReadBuffer { chunk, .. } => {
|
||||||
// If not supported, ReadBuffer can't answer with
|
// If not supported, ReadBuffer can't answer with
|
||||||
|
|
Loading…
Reference in New Issue