feat: saimplement a few more functions as part of supporting query dfrom parquet files
parent
641ae52cff
commit
4e2d59d9a5
|
@ -12,7 +12,7 @@ pub struct TimestampRange {
|
|||
|
||||
impl TimestampRange {
|
||||
pub fn new(start: i64, end: i64) -> Self {
|
||||
debug_assert!(end > start);
|
||||
debug_assert!(end >= start);
|
||||
Self { start, end }
|
||||
}
|
||||
|
||||
|
|
|
@ -128,6 +128,7 @@ impl Chunk {
|
|||
.context(NamedTableError { table_name })
|
||||
}
|
||||
|
||||
// Return all tables of this chunk whose timestamp overlaps with the give one
|
||||
pub fn table_names(
|
||||
&self,
|
||||
timestamp_range: Option<TimestampRange>,
|
||||
|
@ -140,4 +141,26 @@ impl Chunk {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Return columns names of a given table that belong to the given column
|
||||
// selection
|
||||
pub fn column_names(
|
||||
&self,
|
||||
table_name: &str,
|
||||
selection: Selection<'_>,
|
||||
) -> Option<BTreeSet<String>> {
|
||||
let table = self
|
||||
.tables
|
||||
.iter()
|
||||
.find(|t| t.has_table(table_name))
|
||||
.context(NamedTableNotFoundInChunk {
|
||||
table_name,
|
||||
chunk_id: self.id(),
|
||||
});
|
||||
|
||||
match table {
|
||||
Ok(table) => table.column_names(selection),
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use snafu::{ResultExt, Snafu};
|
||||
use std::mem;
|
||||
use std::{collections::BTreeSet, mem};
|
||||
|
||||
use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
|
||||
use internal_types::{schema::Schema, selection::Selection};
|
||||
|
@ -74,7 +74,7 @@ impl Table {
|
|||
self.object_store_path.clone()
|
||||
}
|
||||
|
||||
/// return schema of this table for specified selection columns
|
||||
/// Return schema of this table for specified selection columns
|
||||
pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
|
||||
Ok(match selection {
|
||||
Selection::All => self.table_schema.clone(),
|
||||
|
@ -85,12 +85,31 @@ impl Table {
|
|||
})
|
||||
}
|
||||
|
||||
// Check if 2 time ranges overlap
|
||||
pub fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
|
||||
match (self.timestamp_range, timestamp_range) {
|
||||
(Some(a), Some(b)) => !a.disjoint(b),
|
||||
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
|
||||
// the predicate
|
||||
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match
|
||||
* the predicate */
|
||||
(_, None) => true,
|
||||
}
|
||||
}
|
||||
|
||||
// Return columns names of this table that belong to the given column selection
|
||||
pub fn column_names(&self, selection: Selection<'_>) -> Option<BTreeSet<String>> {
|
||||
let fields = self.table_schema.inner().fields().iter();
|
||||
|
||||
Some(match selection {
|
||||
Selection::Some(cols) => fields
|
||||
.filter_map(|x| {
|
||||
if cols.contains(&x.name().as_str()) {
|
||||
Some(x.name().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
Selection::All => fields.map(|x| x.name().clone()).collect(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -548,13 +548,7 @@ impl Db {
|
|||
.context(SchemaConversion)?;
|
||||
let table_time_range = match time_range {
|
||||
None => None,
|
||||
Some((start, end)) => {
|
||||
if start < end {
|
||||
Some(TimestampRange::new(start, end))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Some((start, end)) => Some(TimestampRange::new(start, end)),
|
||||
};
|
||||
parquet_chunk.add_table(stats, path, schema, table_time_range);
|
||||
}
|
||||
|
|
|
@ -343,8 +343,12 @@ impl PartitionChunk for DBChunk {
|
|||
})?,
|
||||
))
|
||||
}
|
||||
Self::ParquetFile { .. } => {
|
||||
unimplemented!("parquet file not implemented for column_names")
|
||||
Self::ParquetFile { chunk, .. } => {
|
||||
if !predicate.is_empty() {
|
||||
// TODO: Support predicates when MB supports it
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(chunk.column_names(table_name, columns))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -399,7 +403,9 @@ impl PartitionChunk for DBChunk {
|
|||
Ok(Some(values))
|
||||
}
|
||||
Self::ParquetFile { .. } => {
|
||||
unimplemented!("parquet file not implemented for column_values")
|
||||
// Since DataFusion can read Parquet, there is no advantage to
|
||||
// manually implementing this vs just letting DataFusion do its thing
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue