feat: saimplement a few more functions as part of supporting query dfrom parquet files

pull/24376/head
Nga Tran 2021-04-14 16:06:47 -04:00
parent 641ae52cff
commit 4e2d59d9a5
5 changed files with 57 additions and 15 deletions

View File

@ -12,7 +12,7 @@ pub struct TimestampRange {
impl TimestampRange { impl TimestampRange {
pub fn new(start: i64, end: i64) -> Self { pub fn new(start: i64, end: i64) -> Self {
debug_assert!(end > start); debug_assert!(end >= start);
Self { start, end } Self { start, end }
} }

View File

@ -128,6 +128,7 @@ impl Chunk {
.context(NamedTableError { table_name }) .context(NamedTableError { table_name })
} }
// Return all tables of this chunk whose timestamp overlaps with the give one
pub fn table_names( pub fn table_names(
&self, &self,
timestamp_range: Option<TimestampRange>, timestamp_range: Option<TimestampRange>,
@ -140,4 +141,26 @@ impl Chunk {
} }
}) })
} }
// Return columns names of a given table that belong to the given column
// selection
pub fn column_names(
&self,
table_name: &str,
selection: Selection<'_>,
) -> Option<BTreeSet<String>> {
let table = self
.tables
.iter()
.find(|t| t.has_table(table_name))
.context(NamedTableNotFoundInChunk {
table_name,
chunk_id: self.id(),
});
match table {
Ok(table) => table.column_names(selection),
Err(_) => None,
}
}
} }

View File

@ -1,5 +1,5 @@
use snafu::{ResultExt, Snafu}; use snafu::{ResultExt, Snafu};
use std::mem; use std::{collections::BTreeSet, mem};
use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange}; use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
use internal_types::{schema::Schema, selection::Selection}; use internal_types::{schema::Schema, selection::Selection};
@ -74,7 +74,7 @@ impl Table {
self.object_store_path.clone() self.object_store_path.clone()
} }
/// return schema of this table for specified selection columns /// Return schema of this table for specified selection columns
pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> { pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
Ok(match selection { Ok(match selection {
Selection::All => self.table_schema.clone(), Selection::All => self.table_schema.clone(),
@ -85,12 +85,31 @@ impl Table {
}) })
} }
// Check if 2 time ranges overlap
pub fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool { pub fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
match (self.timestamp_range, timestamp_range) { match (self.timestamp_range, timestamp_range) {
(Some(a), Some(b)) => !a.disjoint(b), (Some(a), Some(b)) => !a.disjoint(b),
(None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */ (None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match
// the predicate * the predicate */
(_, None) => true, (_, None) => true,
} }
} }
// Return columns names of this table that belong to the given column selection
pub fn column_names(&self, selection: Selection<'_>) -> Option<BTreeSet<String>> {
let fields = self.table_schema.inner().fields().iter();
Some(match selection {
Selection::Some(cols) => fields
.filter_map(|x| {
if cols.contains(&x.name().as_str()) {
Some(x.name().clone())
} else {
None
}
})
.collect(),
Selection::All => fields.map(|x| x.name().clone()).collect(),
})
}
} }

View File

@ -548,13 +548,7 @@ impl Db {
.context(SchemaConversion)?; .context(SchemaConversion)?;
let table_time_range = match time_range { let table_time_range = match time_range {
None => None, None => None,
Some((start, end)) => { Some((start, end)) => Some(TimestampRange::new(start, end)),
if start < end {
Some(TimestampRange::new(start, end))
} else {
None
}
}
}; };
parquet_chunk.add_table(stats, path, schema, table_time_range); parquet_chunk.add_table(stats, path, schema, table_time_range);
} }

View File

@ -343,8 +343,12 @@ impl PartitionChunk for DBChunk {
})?, })?,
)) ))
} }
Self::ParquetFile { .. } => { Self::ParquetFile { chunk, .. } => {
unimplemented!("parquet file not implemented for column_names") if !predicate.is_empty() {
// TODO: Support predicates when MB supports it
return Ok(None);
}
Ok(chunk.column_names(table_name, columns))
} }
} }
} }
@ -399,7 +403,9 @@ impl PartitionChunk for DBChunk {
Ok(Some(values)) Ok(Some(values))
} }
Self::ParquetFile { .. } => { Self::ParquetFile { .. } => {
unimplemented!("parquet file not implemented for column_values") // Since DataFusion can read Parquet, there is no advantage to
// manually implementing this vs just letting DataFusion do its thing
Ok(None)
} }
} }
} }