parent
09aaf18fb4
commit
016dd93d9c
|
@ -33,6 +33,11 @@ impl ParquetChunk {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return raw parquet file metadata.
|
||||
pub fn parquet_file(&self) -> &Arc<ParquetFile> {
|
||||
&self.parquet_file
|
||||
}
|
||||
|
||||
/// Return object store id
|
||||
pub fn object_store_id(&self) -> Uuid {
|
||||
self.parquet_file.object_store_id
|
||||
|
|
|
@ -46,15 +46,11 @@ impl CachedParquetFiles {
|
|||
}
|
||||
|
||||
/// return the underying files as a new Vec
|
||||
pub fn vec(&self) -> Vec<Arc<ParquetFile>> {
|
||||
#[cfg(test)]
|
||||
fn vec(&self) -> Vec<Arc<ParquetFile>> {
|
||||
self.files.as_ref().clone()
|
||||
}
|
||||
|
||||
/// return the number of cached files
|
||||
pub fn len(&self) -> usize {
|
||||
self.files.len()
|
||||
}
|
||||
|
||||
/// Estimate the memory consumption of this object and its contents
|
||||
fn size(&self) -> usize {
|
||||
// simplify accounting by ensuring len and capacity of vector are the same
|
||||
|
|
|
@ -208,6 +208,11 @@ impl QuerierParquetChunk {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return raw parquet file metadata.
|
||||
pub fn parquet_file(&self) -> &Arc<ParquetFile> {
|
||||
self.parquet_chunk.parquet_file()
|
||||
}
|
||||
|
||||
/// Set delete predicates of the given chunk.
|
||||
pub fn with_delete_predicates(self, delete_predicates: Vec<Arc<DeletePredicate>>) -> Self {
|
||||
Self {
|
||||
|
|
|
@ -6,10 +6,10 @@ use crate::{
|
|||
IngesterConnection,
|
||||
};
|
||||
use data_types::{KafkaPartition, PartitionId, TableId};
|
||||
use futures::join;
|
||||
use futures::{join, StreamExt, TryStreamExt};
|
||||
use iox_query::{provider::ChunkPruner, QueryChunk};
|
||||
use observability_deps::tracing::debug;
|
||||
use predicate::Predicate;
|
||||
use predicate::{Predicate, PredicateMatch};
|
||||
use schema::Schema;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{
|
||||
|
@ -163,6 +163,41 @@ impl QuerierTable {
|
|||
catalog_cache.tombstone().get(self.id())
|
||||
);
|
||||
|
||||
// filter out parquet files early
|
||||
let n_parquet_files_pre_filter = parquet_files.files.len();
|
||||
let parquet_files: Vec<_> = futures::stream::iter(parquet_files.files.iter())
|
||||
.filter_map(|cached_parquet_file| {
|
||||
let chunk_adapter = Arc::clone(&self.chunk_adapter);
|
||||
async move {
|
||||
chunk_adapter
|
||||
.new_parquet_chunk(
|
||||
Arc::clone(&self.namespace_name),
|
||||
Arc::clone(cached_parquet_file),
|
||||
)
|
||||
.await
|
||||
}
|
||||
})
|
||||
.filter_map(|chunk| {
|
||||
let res = chunk
|
||||
.apply_predicate_to_metadata(predicate)
|
||||
.map(|pmatch| {
|
||||
let keep = !matches!(pmatch, PredicateMatch::Zero);
|
||||
keep.then(|| chunk)
|
||||
})
|
||||
.transpose();
|
||||
async move { res }
|
||||
})
|
||||
.try_collect()
|
||||
.await
|
||||
.unwrap();
|
||||
debug!(
|
||||
namespace=%self.namespace_name,
|
||||
table_name=%self.table_name(),
|
||||
n_parquet_files_pre_filter,
|
||||
n_parquet_files_post_filter=parquet_files.len(),
|
||||
"Applied predicate-based filter to parquet file"
|
||||
);
|
||||
|
||||
self.reconciler
|
||||
.reconcile(partitions, tombstones.to_vec(), parquet_files)
|
||||
.await
|
||||
|
@ -336,25 +371,28 @@ mod tests {
|
|||
assert!(querier_table.chunks().await.unwrap().is_empty());
|
||||
|
||||
let file111 = partition11
|
||||
.create_parquet_file_with_min_max("table1 foo=1 11", 1, 2, now_nanos(), now_nanos())
|
||||
.create_parquet_file_with_min_max("table1 foo=1 11", 1, 2, 11, 11)
|
||||
.await;
|
||||
let file112 = partition11
|
||||
.create_parquet_file_with_min_max("table1 foo=2 22", 3, 4, now_nanos(), now_nanos())
|
||||
.create_parquet_file_with_min_max("table1 foo=2 22", 3, 4, 22, 22)
|
||||
.await;
|
||||
let file113 = partition11
|
||||
.create_parquet_file_with_min_max("table1 foo=3 33", 5, 6, now_nanos(), now_nanos())
|
||||
.create_parquet_file_with_min_max("table1 foo=3 33", 5, 6, 33, 33)
|
||||
.await;
|
||||
let file114 = partition11
|
||||
.create_parquet_file_with_min_max("table1 foo=4 44", 7, 8, now_nanos(), now_nanos())
|
||||
.create_parquet_file_with_min_max("table1 foo=4 44", 7, 8, 44, 44)
|
||||
.await;
|
||||
let file115 = partition11
|
||||
.create_parquet_file_with_min_max("table1 foo=5 55", 9, 10, now_nanos(), now_nanos())
|
||||
.create_parquet_file_with_min_max("table1 foo=5 55", 9, 10, 55, 55)
|
||||
.await;
|
||||
let file121 = partition12
|
||||
.create_parquet_file_with_min_max("table1 foo=5 55", 1, 2, now_nanos(), now_nanos())
|
||||
.create_parquet_file_with_min_max("table1 foo=5 55", 1, 2, 55, 55)
|
||||
.await;
|
||||
let _file122 = partition12
|
||||
.create_parquet_file_with_min_max("table1 foo=10 100", 1, 2, 100, 100)
|
||||
.await;
|
||||
let _file211 = partition21
|
||||
.create_parquet_file_with_min_max("table2 foo=6 66", 1, 2, now_nanos(), now_nanos())
|
||||
.create_parquet_file_with_min_max("table2 foo=6 66", 1, 2, 66, 66)
|
||||
.await;
|
||||
|
||||
file111.flag_for_delete().await;
|
||||
|
@ -377,8 +415,10 @@ mod tests {
|
|||
// now we have some files
|
||||
// this contains all files except for:
|
||||
// - file111: marked for delete
|
||||
// - file122: filtered by predicate
|
||||
// - file221: wrong table
|
||||
let mut chunks = querier_table.chunks().await.unwrap();
|
||||
let pred = Predicate::new().with_range(0, 100);
|
||||
let mut chunks = querier_table.chunks_with_predicate(&pred).await.unwrap();
|
||||
chunks.sort_by_key(|c| c.id());
|
||||
assert_eq!(chunks.len(), 5);
|
||||
|
||||
|
@ -821,7 +861,14 @@ mod tests {
|
|||
/// Invokes querier_table.chunks modeling the ingester sending the partitions in this table
|
||||
async fn chunks(&self) -> Result<Vec<Arc<dyn QueryChunk>>> {
|
||||
let pred = Predicate::default();
|
||||
self.chunks_with_predicate(&pred).await
|
||||
}
|
||||
|
||||
/// Invokes querier_table.chunks modeling the ingester sending the partitions in this table
|
||||
async fn chunks_with_predicate(
|
||||
&self,
|
||||
pred: &Predicate,
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>> {
|
||||
self.querier_table
|
||||
.ingester_connection
|
||||
.as_ref()
|
||||
|
@ -831,7 +878,7 @@ mod tests {
|
|||
.unwrap()
|
||||
.next_response(Ok(self.ingester_partitions.clone()));
|
||||
|
||||
self.querier_table.chunks(&pred).await
|
||||
self.querier_table.chunks(pred).await
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@ use std::{
|
|||
};
|
||||
|
||||
use crate::{
|
||||
cache::parquet_file::CachedParquetFiles,
|
||||
chunk::{ChunkAdapter, QuerierParquetChunk, QuerierRBChunk},
|
||||
ingester::IngesterChunk,
|
||||
tombstone::QuerierTombstone,
|
||||
|
@ -56,7 +55,7 @@ impl Reconciler {
|
|||
&self,
|
||||
ingester_partitions: Vec<IngesterPartition>,
|
||||
tombstones: Vec<Arc<Tombstone>>,
|
||||
parquet_files: Arc<CachedParquetFiles>,
|
||||
parquet_files: Vec<QuerierParquetChunk>,
|
||||
) -> Result<Vec<Arc<dyn QueryChunk>>, ReconcileError> {
|
||||
let mut chunks = self
|
||||
.build_chunks_from_parquet(&ingester_partitions, tombstones, parquet_files)
|
||||
|
@ -78,7 +77,7 @@ impl Reconciler {
|
|||
&self,
|
||||
ingester_partitions: &[IngesterPartition],
|
||||
tombstones: Vec<Arc<Tombstone>>,
|
||||
parquet_files: Arc<CachedParquetFiles>,
|
||||
parquet_files: Vec<QuerierParquetChunk>,
|
||||
) -> Result<Vec<Box<dyn UpdatableQuerierChunk>>, ReconcileError> {
|
||||
debug!(
|
||||
namespace=%self.namespace_name(),
|
||||
|
@ -104,10 +103,10 @@ impl Reconciler {
|
|||
.push(tombstone);
|
||||
}
|
||||
|
||||
let parquet_files = filter_parquet_files(ingester_partitions, parquet_files.vec())?;
|
||||
let parquet_files = filter_parquet_files(ingester_partitions, parquet_files)?;
|
||||
|
||||
debug!(
|
||||
parquet_ids=?parquet_files.iter().map(|f| f.id).collect::<Vec<_>>(),
|
||||
parquet_ids=?parquet_files.iter().map(|f| f.parquet_file().id).collect::<Vec<_>>(),
|
||||
namespace=%self.namespace_name(),
|
||||
table_name=%self.table_name(),
|
||||
"Parquet files after filtering"
|
||||
|
@ -117,10 +116,13 @@ impl Reconciler {
|
|||
let chunks_from_parquet: Vec<_> = futures::stream::iter(parquet_files)
|
||||
// use `.map` instead of `then` or `filter_map` because the next step is `buffered_unordered` and that
|
||||
// requires a stream of futures
|
||||
.map(|cached_parquet_file| async {
|
||||
self.chunk_adapter
|
||||
.new_rb_chunk(Arc::clone(&self.namespace_name), cached_parquet_file)
|
||||
.await
|
||||
.map(|chunk| {
|
||||
let parquet_file = Arc::clone(chunk.parquet_file());
|
||||
async {
|
||||
self.chunk_adapter
|
||||
.new_rb_chunk(Arc::clone(&self.namespace_name), parquet_file)
|
||||
.await
|
||||
}
|
||||
})
|
||||
// fetch multiple RB chunks in parallel to hide latency
|
||||
// TODO(marco): expose this as a config
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
//! Interface for reconciling Ingester and catalog state
|
||||
|
||||
use crate::ingester::IngesterPartition;
|
||||
use crate::{chunk::QuerierParquetChunk, ingester::IngesterPartition};
|
||||
use data_types::{ParquetFile, PartitionId, SequenceNumber, SequencerId, Tombstone, TombstoneId};
|
||||
use std::{ops::Deref, sync::Arc};
|
||||
|
||||
|
@ -76,6 +76,20 @@ impl ParquetFileInfo for Arc<ParquetFile> {
|
|||
}
|
||||
}
|
||||
|
||||
impl ParquetFileInfo for QuerierParquetChunk {
|
||||
fn partition_id(&self) -> PartitionId {
|
||||
self.parquet_file().partition_id
|
||||
}
|
||||
|
||||
fn min_sequence_number(&self) -> SequenceNumber {
|
||||
self.parquet_file().min_sequence_number
|
||||
}
|
||||
|
||||
fn max_sequence_number(&self) -> SequenceNumber {
|
||||
self.parquet_file().max_sequence_number
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a tombstone.
|
||||
///
|
||||
/// This is mostly the same as [`Tombstone`] but allows easier mocking.
|
||||
|
|
|
@ -61,3 +61,23 @@
|
|||
| | IOxReadFilterNode: table_name=h2o, chunks=2 predicate=Predicate |
|
||||
| | |
|
||||
+---------------+---------------------------------------------------------------------------------------+
|
||||
-- SQL: EXPLAIN SELECT * from h2o where time >= to_timestamp('1970-01-01T00:00:00.000000250+00:00');
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| plan_type | plan |
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| logical_plan | Projection: #h2o.city, #h2o.other_temp, #h2o.state, #h2o.temp, #h2o.time |
|
||||
| | Filter: #h2o.time >= TimestampNanosecond(250, None) |
|
||||
| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[#h2o.time >= TimestampNanosecond(250, None)] |
|
||||
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
|
||||
| | CoalesceBatchesExec: target_batch_size=500 |
|
||||
| | FilterExec: time@4 >= 250 |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | UnionExec |
|
||||
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
|
||||
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
|
||||
| | UnionExec |
|
||||
| | IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate exprs: [#time >= TimestampNanosecond(250, None)] |
|
||||
| | IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate exprs: [#time >= TimestampNanosecond(250, None)] |
|
||||
| | IOxReadFilterNode: table_name=h2o, chunks=2 predicate=Predicate exprs: [#time >= TimestampNanosecond(250, None)] |
|
||||
| | |
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
|
|
@ -13,3 +13,6 @@ EXPLAIN SELECT * from h2o;
|
|||
-- IOX_COMPARE: sorted
|
||||
select temp, other_temp, time from h2o;
|
||||
EXPLAIN select temp, other_temp, time from h2o;
|
||||
|
||||
-- early pruning
|
||||
EXPLAIN SELECT * from h2o where time >= to_timestamp('1970-01-01T00:00:00.000000250+00:00');
|
||||
|
|
Loading…
Reference in New Issue