feat: saimplement a few more functions as part of supporting query dfrom parquet files
parent
641ae52cff
commit
4e2d59d9a5
|
@ -12,7 +12,7 @@ pub struct TimestampRange {
|
||||||
|
|
||||||
impl TimestampRange {
|
impl TimestampRange {
|
||||||
pub fn new(start: i64, end: i64) -> Self {
|
pub fn new(start: i64, end: i64) -> Self {
|
||||||
debug_assert!(end > start);
|
debug_assert!(end >= start);
|
||||||
Self { start, end }
|
Self { start, end }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -128,6 +128,7 @@ impl Chunk {
|
||||||
.context(NamedTableError { table_name })
|
.context(NamedTableError { table_name })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return all tables of this chunk whose timestamp overlaps with the give one
|
||||||
pub fn table_names(
|
pub fn table_names(
|
||||||
&self,
|
&self,
|
||||||
timestamp_range: Option<TimestampRange>,
|
timestamp_range: Option<TimestampRange>,
|
||||||
|
@ -140,4 +141,26 @@ impl Chunk {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return columns names of a given table that belong to the given column
|
||||||
|
// selection
|
||||||
|
pub fn column_names(
|
||||||
|
&self,
|
||||||
|
table_name: &str,
|
||||||
|
selection: Selection<'_>,
|
||||||
|
) -> Option<BTreeSet<String>> {
|
||||||
|
let table = self
|
||||||
|
.tables
|
||||||
|
.iter()
|
||||||
|
.find(|t| t.has_table(table_name))
|
||||||
|
.context(NamedTableNotFoundInChunk {
|
||||||
|
table_name,
|
||||||
|
chunk_id: self.id(),
|
||||||
|
});
|
||||||
|
|
||||||
|
match table {
|
||||||
|
Ok(table) => table.column_names(selection),
|
||||||
|
Err(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use snafu::{ResultExt, Snafu};
|
use snafu::{ResultExt, Snafu};
|
||||||
use std::mem;
|
use std::{collections::BTreeSet, mem};
|
||||||
|
|
||||||
use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
|
use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
|
||||||
use internal_types::{schema::Schema, selection::Selection};
|
use internal_types::{schema::Schema, selection::Selection};
|
||||||
|
@ -74,7 +74,7 @@ impl Table {
|
||||||
self.object_store_path.clone()
|
self.object_store_path.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// return schema of this table for specified selection columns
|
/// Return schema of this table for specified selection columns
|
||||||
pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
|
pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
|
||||||
Ok(match selection {
|
Ok(match selection {
|
||||||
Selection::All => self.table_schema.clone(),
|
Selection::All => self.table_schema.clone(),
|
||||||
|
@ -85,12 +85,31 @@ impl Table {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if 2 time ranges overlap
|
||||||
pub fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
|
pub fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
|
||||||
match (self.timestamp_range, timestamp_range) {
|
match (self.timestamp_range, timestamp_range) {
|
||||||
(Some(a), Some(b)) => !a.disjoint(b),
|
(Some(a), Some(b)) => !a.disjoint(b),
|
||||||
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
|
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match
|
||||||
// the predicate
|
* the predicate */
|
||||||
(_, None) => true,
|
(_, None) => true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return columns names of this table that belong to the given column selection
|
||||||
|
pub fn column_names(&self, selection: Selection<'_>) -> Option<BTreeSet<String>> {
|
||||||
|
let fields = self.table_schema.inner().fields().iter();
|
||||||
|
|
||||||
|
Some(match selection {
|
||||||
|
Selection::Some(cols) => fields
|
||||||
|
.filter_map(|x| {
|
||||||
|
if cols.contains(&x.name().as_str()) {
|
||||||
|
Some(x.name().clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
Selection::All => fields.map(|x| x.name().clone()).collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -548,13 +548,7 @@ impl Db {
|
||||||
.context(SchemaConversion)?;
|
.context(SchemaConversion)?;
|
||||||
let table_time_range = match time_range {
|
let table_time_range = match time_range {
|
||||||
None => None,
|
None => None,
|
||||||
Some((start, end)) => {
|
Some((start, end)) => Some(TimestampRange::new(start, end)),
|
||||||
if start < end {
|
|
||||||
Some(TimestampRange::new(start, end))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
parquet_chunk.add_table(stats, path, schema, table_time_range);
|
parquet_chunk.add_table(stats, path, schema, table_time_range);
|
||||||
}
|
}
|
||||||
|
|
|
@ -343,8 +343,12 @@ impl PartitionChunk for DBChunk {
|
||||||
})?,
|
})?,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
Self::ParquetFile { .. } => {
|
Self::ParquetFile { chunk, .. } => {
|
||||||
unimplemented!("parquet file not implemented for column_names")
|
if !predicate.is_empty() {
|
||||||
|
// TODO: Support predicates when MB supports it
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
Ok(chunk.column_names(table_name, columns))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -399,7 +403,9 @@ impl PartitionChunk for DBChunk {
|
||||||
Ok(Some(values))
|
Ok(Some(values))
|
||||||
}
|
}
|
||||||
Self::ParquetFile { .. } => {
|
Self::ParquetFile { .. } => {
|
||||||
unimplemented!("parquet file not implemented for column_values")
|
// Since DataFusion can read Parquet, there is no advantage to
|
||||||
|
// manually implementing this vs just letting DataFusion do its thing
|
||||||
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue