Revert "chore: Update datafusion again (#6108)" (#6159)

This reverts commit fbe9f27f10.
pull/24376/head
Andrew Lamb 2022-11-16 16:14:55 -05:00 committed by GitHub
parent 80e91a644b
commit 67712b595c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 460 additions and 641 deletions

49
Cargo.lock generated
View File

@ -1135,19 +1135,6 @@ dependencies = [
"syn",
]
[[package]]
name = "dashmap"
version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
dependencies = [
"cfg-if",
"hashbrown 0.12.3",
"lock_api",
"once_cell",
"parking_lot_core 0.9.4",
]
[[package]]
name = "data_types"
version = "0.1.0"
@ -1168,8 +1155,8 @@ dependencies = [
[[package]]
name = "datafusion"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1178,7 +1165,6 @@ dependencies = [
"bytes",
"bzip2",
"chrono",
"dashmap",
"datafusion-common",
"datafusion-expr",
"datafusion-optimizer",
@ -1194,6 +1180,7 @@ dependencies = [
"log",
"num_cpus",
"object_store",
"ordered-float 3.4.0",
"parking_lot 0.12.1",
"parquet",
"paste",
@ -1212,20 +1199,21 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"arrow",
"chrono",
"object_store",
"ordered-float 3.4.0",
"parquet",
"sqlparser 0.26.0",
]
[[package]]
name = "datafusion-expr"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1236,8 +1224,8 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"arrow",
"async-trait",
@ -1251,8 +1239,8 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1270,6 +1258,7 @@ dependencies = [
"lazy_static",
"md-5",
"num-traits",
"ordered-float 3.4.0",
"paste",
"rand",
"regex",
@ -1280,8 +1269,8 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"arrow",
"datafusion",
@ -1294,8 +1283,8 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"arrow",
"datafusion-common",
@ -1305,8 +1294,8 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48"
dependencies = [
"arrow",
"datafusion-common",

View File

@ -111,8 +111,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "26.0.0" }
arrow-flight = { version = "26.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="d2814c960168b45c4a0f5d7bbb72d9f412cb08bd", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="dd081d64a2fba8574e63bdd0662c14aec5852b48", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="dd081d64a2fba8574e63bdd0662c14aec5852b48" }
hashbrown = { version = "0.13.1" }
parquet = { version = "26.0.0" }

View File

@ -218,8 +218,8 @@ impl ExecutionPlan for NonNullCheckerExec {
None
}
fn required_input_distribution(&self) -> Vec<Distribution> {
vec![Distribution::UnspecifiedDistribution]
fn required_child_distribution(&self) -> Distribution {
Distribution::UnspecifiedDistribution
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {

View File

@ -189,8 +189,8 @@ impl ExecutionPlan for SchemaPivotExec {
None
}
fn required_input_distribution(&self) -> Vec<Distribution> {
vec![Distribution::UnspecifiedDistribution]
fn required_child_distribution(&self) -> Distribution {
Distribution::UnspecifiedDistribution
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {

View File

@ -207,8 +207,8 @@ impl ExecutionPlan for StreamSplitExec {
/// Always require a single input (eventually we might imagine
/// running this on multiple partitions concurrently to compute
/// the splits in parallel, but not now)
fn required_input_distribution(&self) -> Vec<Distribution> {
vec![Distribution::SinglePartition]
fn required_child_distribution(&self) -> Distribution {
Distribution::SinglePartition
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {

View File

@ -219,11 +219,8 @@ impl ExecutionPlan for DeduplicateExec {
Ok(AdapterStream::adapt(self.schema(), rx, handle))
}
fn required_input_distribution(&self) -> Vec<Distribution> {
// For now use a single input -- it might be helpful
// eventually to deduplicate in parallel by hash partitioning
// the inputs (based on sort keys)
vec![Distribution::SinglePartition]
fn required_child_distribution(&self) -> Distribution {
Distribution::SinglePartition
}
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {

View File

@ -1,9 +1,6 @@
//! Implementation of a DataFusion PhysicalPlan node across partition chunks
use crate::{
provider::record_batch_exec::RecordBatchesExec, util::arrow_sort_key_exprs, QueryChunk,
QueryChunkData,
};
use crate::{provider::record_batch_exec::RecordBatchesExec, QueryChunk, QueryChunkData};
use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
use data_types::TableSummary;
use datafusion::{
@ -18,65 +15,12 @@ use datafusion::{
};
use object_store::ObjectMeta;
use predicate::Predicate;
use schema::{sort::SortKey, Schema};
use schema::Schema;
use std::{
collections::{hash_map::Entry, HashMap},
sync::Arc,
};
/// Holds a list of chunks that all have the same "URL"
#[derive(Debug)]
struct ParquetChunkList {
object_store_url: ObjectStoreUrl,
object_metas: Vec<ObjectMeta>,
sort_key: Option<SortKey>,
}
impl ParquetChunkList {
fn new(object_store_url: ObjectStoreUrl, chunk: &dyn QueryChunk, meta: ObjectMeta) -> Self {
Self {
object_store_url,
object_metas: vec![meta],
sort_key: chunk.sort_key().cloned(),
}
}
/// Add the chunk to the list of files
fn add_parquet_file(&mut self, chunk: &dyn QueryChunk, meta: ObjectMeta) {
self.object_metas.push(meta);
self.sort_key = combine_sort_key(self.sort_key.take(), chunk.sort_key());
}
}
/// Combines the existing sort key with the sort key of the chunk,
/// returning the new combined compatible sort key that describes both
/// chunks.
///
/// If it is not possible to find a compatible sort key, None is
/// returned signifying "unknown sort order"
fn combine_sort_key(
existing_sort_key: Option<SortKey>,
chunk_sort_key: Option<&SortKey>,
) -> Option<SortKey> {
if let (Some(existing_sort_key), Some(chunk_sort_key)) = (existing_sort_key, chunk_sort_key) {
let combined_sort_key = SortKey::try_merge_key(&existing_sort_key, chunk_sort_key);
// Avoid cloning the sort key when possible, as the sort key
// is likely to commonly be the same
match combined_sort_key {
Some(combined_sort_key) if combined_sort_key == &existing_sort_key => {
Some(existing_sort_key)
}
Some(combined_sort_key) => Some(combined_sort_key.clone()),
None => None,
}
} else {
// no existing sort key means the data wasn't consistently sorted so leave it alone
None
}
}
/// Place [chunk](QueryChunk)s into physical nodes.
///
/// This will group chunks into [record batch](QueryChunkData::RecordBatches) and [parquet
@ -108,7 +52,7 @@ pub fn chunks_to_physical_nodes(
}
let mut record_batch_chunks: Vec<(SchemaRef, Vec<RecordBatch>, Arc<TableSummary>)> = vec![];
let mut parquet_chunks: HashMap<String, ParquetChunkList> = HashMap::new();
let mut parquet_chunks: HashMap<String, (ObjectStoreUrl, Vec<ObjectMeta>)> = HashMap::new();
for chunk in &chunks {
match chunk.data() {
@ -119,14 +63,12 @@ pub fn chunks_to_physical_nodes(
let url_str = parquet_input.object_store_url.as_str().to_owned();
match parquet_chunks.entry(url_str) {
Entry::Occupied(mut o) => {
o.get_mut()
.add_parquet_file(chunk.as_ref(), parquet_input.object_meta);
o.get_mut().1.push(parquet_input.object_meta);
}
Entry::Vacant(v) => {
v.insert(ParquetChunkList::new(
v.insert((
parquet_input.object_store_url,
chunk.as_ref(),
parquet_input.object_meta,
vec![parquet_input.object_meta],
));
}
}
@ -144,15 +86,9 @@ pub fn chunks_to_physical_nodes(
let mut parquet_chunks: Vec<_> = parquet_chunks.into_iter().collect();
parquet_chunks.sort_by_key(|(url_str, _)| url_str.clone());
let target_partitions = context.session_config().target_partitions;
for (_url_str, chunk_list) in parquet_chunks {
let ParquetChunkList {
object_store_url,
object_metas,
sort_key,
} = chunk_list;
for (_url_str, (url, chunks)) in parquet_chunks {
let file_groups = distribute(
object_metas.into_iter().map(|object_meta| PartitionedFile {
chunks.into_iter().map(|object_meta| PartitionedFile {
object_meta,
partition_values: vec![],
range: None,
@ -160,26 +96,21 @@ pub fn chunks_to_physical_nodes(
}),
target_partitions,
);
// Tell datafusion about the sort key, if any
let file_schema = iox_schema.as_arrow();
let output_ordering =
sort_key.map(|sort_key| arrow_sort_key_exprs(&sort_key, &file_schema));
let base_config = FileScanConfig {
object_store_url,
file_schema,
object_store_url: url,
file_schema: iox_schema.as_arrow(),
file_groups,
statistics: Statistics::default(),
projection: None,
limit: None,
table_partition_cols: vec![],
config_options: context.session_config().config_options(),
output_ordering,
};
let meta_size_hint = None;
let parquet_exec = ParquetExec::new(base_config, predicate.filter_expr(), meta_size_hint);
output_nodes.push(Arc::new(parquet_exec));
output_nodes.push(Arc::new(ParquetExec::new(
base_config,
predicate.filter_expr(),
None,
)));
}
assert!(!output_nodes.is_empty());
@ -213,8 +144,6 @@ where
#[cfg(test)]
mod tests {
use schema::sort::SortKeyBuilder;
use super::*;
#[test]
@ -227,50 +156,4 @@ mod tests {
assert_eq!(distribute(0..3u8, 10), vec![vec![0], vec![1], vec![2]],);
}
#[test]
fn test_combine_sort_key() {
let skey_t1 = SortKeyBuilder::new()
.with_col("t1")
.with_col("time")
.build();
let skey_t1_t2 = SortKeyBuilder::new()
.with_col("t1")
.with_col("t2")
.with_col("time")
.build();
let skey_t2_t1 = SortKeyBuilder::new()
.with_col("t2")
.with_col("t1")
.with_col("time")
.build();
assert_eq!(combine_sort_key(None, None), None);
assert_eq!(combine_sort_key(Some(skey_t1.clone()), None), None);
assert_eq!(combine_sort_key(None, Some(&skey_t1)), None);
assert_eq!(
combine_sort_key(Some(skey_t1.clone()), Some(&skey_t1)),
Some(skey_t1.clone())
);
assert_eq!(
combine_sort_key(Some(skey_t1.clone()), Some(&skey_t1_t2)),
Some(skey_t1_t2.clone())
);
assert_eq!(
combine_sort_key(Some(skey_t1_t2.clone()), Some(&skey_t1)),
Some(skey_t1_t2.clone())
);
assert_eq!(
combine_sort_key(Some(skey_t2_t1.clone()), Some(&skey_t1)),
Some(skey_t2_t1.clone())
);
assert_eq!(combine_sort_key(Some(skey_t2_t1), Some(&skey_t1_t2)), None);
}
}

View File

@ -123,8 +123,6 @@ impl ParquetExecInput {
table_partition_cols: vec![],
// TODO avoid this `copied_config` when config_options are directly available on context
config_options: session_ctx.copied_config().config_options(),
// Parquet files ARE actually sorted but we don't care here since we just construct a `collect` plan.
output_ordering: None,
};
let exec = ParquetExec::new(base_config, None, None);
let exec_schema = exec.schema();

View File

@ -213,7 +213,6 @@ impl ParquetFileReader {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: None,
config_options: ConfigOptions::new().into_shareable(),
};

View File

@ -217,32 +217,11 @@ impl CachedConnection {
#[cfg(test)]
mod tests {
use data_types::{NamespaceId, TableId};
use datafusion::prelude::{col, lit, when, Expr};
use datafusion::prelude::{col, lit};
use predicate::Predicate;
use super::*;
#[test]
fn serialize_deeply_nested_and() {
// we need more stack space so this doesn't overflow in dev builds
std::thread::Builder::new()
.stack_size(10_000_000)
.spawn(|| {
let n = 100;
println!("testing: {n}");
// build a deeply nested (a < 5) AND (a < 5) AND .... tree
let expr_base = col("a").lt(lit(5i32));
let expr = (0..n).fold(expr_base.clone(), |expr, _| expr.and(expr_base.clone()));
let (request, request2) = serialize_roundtrip(expr);
assert_eq!(request, request2);
})
.expect("spawning thread")
.join()
.expect("joining thread");
}
#[test]
fn serialize_deeply_nested_predicate() {
// see https://github.com/influxdata/influxdb_iox/issues/5974
@ -255,54 +234,28 @@ mod tests {
for n in [1, 2, n_max] {
println!("testing: {n}");
let expr_base = col("a").lt(lit(5i32));
let expr = (0..n).fold(expr_base.clone(), |expr, _| expr.and(expr_base.clone()));
// build a deeply recursive nested expression:
//
// CASE
// WHEN TRUE
// THEN (WHEN ...)
// ELSE FALSE
//
let expr = (0..n).fold(lit(false), |expr, _|{
when(lit(true), expr)
.end()
.unwrap()
});
let predicate = Predicate {exprs: vec![expr], ..Default::default()};
let (request1, request2) = serialize_roundtrip(expr);
let request = IngesterQueryRequest {
namespace_id: NamespaceId::new(42),
table_id: TableId::new(1337),
columns: vec![String::from("col1"), String::from("col2")],
predicate: Some(predicate),
};
let proto = serialize_ingester_query_request(request.clone()).expect("serialization");
let request2 = IngesterQueryRequest::try_from(proto).expect("deserialization");
// expect that the self preservation mechanism has
// kicked in and the predicate has been ignored.
if request2.predicate.is_none() {
assert!(n > 2, "not really deeply nested");
return;
} else {
assert_eq!(request1, request2);
}
}
panic!("did not find a 'too deeply nested' expression, tested up to a depth of {n_max}")
}).expect("spawning thread").join().expect("joining thread");
}
/// Creates a [`IngesterQueryRequest`] and round trips it through
/// serialization, returning both the original and the serialized
/// request
fn serialize_roundtrip(expr: Expr) -> (IngesterQueryRequest, IngesterQueryRequest) {
let predicate = Predicate {
exprs: vec![expr],
..Default::default()
};
let request = IngesterQueryRequest {
namespace_id: NamespaceId::new(42),
table_id: TableId::new(1337),
columns: vec![String::from("col1"), String::from("col2")],
predicate: Some(predicate),
};
let proto = serialize_ingester_query_request(request.clone()).expect("serialization");
let request2 = IngesterQueryRequest::try_from(proto).expect("deserialization");
(request, request2)
}
}

View File

@ -508,24 +508,24 @@ mod tests {
&querier_namespace,
"EXPLAIN SELECT * FROM mem ORDER BY host,time",
&[
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |",
"| | Projection: mem.host, mem.perc, mem.time |",
"| | TableScan: mem projection=[host, perc, time] |",
"| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |",
"| | CoalescePartitionsExec |",
"| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |",
"| | UnionExec |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions=[1/2/1/4/<uuid>.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, perc, time] |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions=[1/2/1/4/<uuid>.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, perc, time] |",
"| | |",
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |",
"| | Projection: mem.host, mem.perc, mem.time |",
"| | TableScan: mem projection=[host, perc, time] |",
"| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |",
"| | CoalescePartitionsExec |",
"| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |",
"| | UnionExec |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions=[1/2/1/4/<uuid>.parquet], projection=[host, perc, time] |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions=[1/2/1/4/<uuid>.parquet], projection=[host, perc, time] |",
"| | |",
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------+",
],
)
.await;
@ -569,20 +569,20 @@ mod tests {
"EXPLAIN SELECT * FROM cpu",
&[
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |",
"| | TableScan: cpu projection=[foo, host, load, time] |",
"| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |",
"| | UnionExec |",
"| | DeduplicateExec: [host@1 ASC,time@3 ASC] |",
"| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |",
"| | UnionExec |",
"| | ParquetExec: limit=None, partitions=[1/1/2/2/<uuid>.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions=[1/1/2/2/<uuid>.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions=[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/3/<uuid>.parquet], projection=[foo, host, load, time] |",
"| | |",
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |",
"| | TableScan: cpu projection=[foo, host, load, time] |",
"| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |",
"| | UnionExec |",
"| | DeduplicateExec: [host@1 ASC,time@3 ASC] |",
"| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |",
"| | UnionExec |",
"| | ParquetExec: limit=None, partitions=[1/1/2/2/<uuid>.parquet], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions=[1/1/2/2/<uuid>.parquet], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions=[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/3/<uuid>.parquet], projection=[foo, host, load, time] |",
"| | |",
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
],
)
.await;

View File

@ -1,91 +1,91 @@
-- Test Setup: OneMeasurementFourChunksWithDuplicatesWithIngester
-- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[state@0 ASC], projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC], projection=[city] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select count(*) from h2o;
+-----------------+
| COUNT(UInt8(1)) |

View File

@ -1,75 +1,75 @@
-- Test Setup: OneMeasurementFourChunksWithDuplicatesParquetOnly
-- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[state@0 ASC], projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[city@0 ASC], projection=[city] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select count(*) from h2o;
+-----------------+
| COUNT(UInt8(1)) |
@ -91,8 +91,8 @@
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, num_dupes=2, output_rows=5, spill_count=0, spilled_bytes=0] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] |
| | UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=591, bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=628, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=2, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=591, bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=628, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=2, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | |
----------

View File

@ -23,16 +23,16 @@
| | DeduplicateExec: [tag@1 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [tag@1 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000c.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000013.parquet], projection=[f] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -14,15 +14,15 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant;
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -37,49 +37,49 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200;
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200.0;
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Float64) > Float64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Float64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Float64) > Float64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Float64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 4.0;
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury';
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -93,19 +93,19 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury';
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -128,7 +128,7 @@
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000;
@ -154,7 +154,7 @@
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND town@4 != tewsbury AND system@2 = 5 OR town@4 = lawrence AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and count < 40000;
@ -170,21 +170,21 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and count < 40000;
-- Results After Normalizing UUIDs
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) |
| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), CAST(restaurant.count AS Int64) < Int64(40000)] |
| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) |
| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), CAST(restaurant.count AS Int64) < Int64(40000)] |
| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -199,19 +199,19 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 4.0 and system < 7.0;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4 AND system_min@1 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 5.0 and system < 7.0;
-- Results After Sorting
+-------+--------+--------------------------------+----------+
@ -223,19 +223,19 @@
+-------+--------+--------------------------------+----------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and system < 7.0;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND system_min@1 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system;
-- Results After Sorting
+-------+--------+--------------------------------+----------+
@ -246,19 +246,19 @@
+-------+--------+--------------------------------+----------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system;
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading');
-- Results After Sorting
+-------+--------+--------------------------------+---------+
@ -278,7 +278,7 @@
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND CAST(count@0 AS Int64) = 632 OR town@3 = reading |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00');

View File

@ -14,25 +14,25 @@
+---------+------------+-------+------+--------------------------------+
-- SQL: EXPLAIN SELECT * from h2o;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city, other_temp, state, temp, time] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select temp, other_temp, time from h2o;
-- Results After Sorting
+------+------------+--------------------------------+
@ -48,50 +48,50 @@
+------+------------+--------------------------------+
-- SQL: EXPLAIN select temp, other_temp, time from h2o;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | UnionExec |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[time@2 ASC], projection=[other_temp, temp, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | UnionExec |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city, other_temp, state, temp, time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[other_temp, temp, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from h2o where time >= to_timestamp('1970-01-01T00:00:00.000000250+00:00');
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | Filter: h2o.time >= TimestampNanosecond(250, None) |
| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@4 >= 250 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | Filter: h2o.time >= TimestampNanosecond(250, None) |
| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@4 >= 250 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -10,20 +10,20 @@
+--------+------------+-------+------+--------------------------------+
-- SQL: EXPLAIN SELECT * from h2o;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select temp, other_temp, time from h2o;
+------+------------+--------------------------------+
| temp | other_temp | time |
@ -34,18 +34,18 @@
+------+------------+--------------------------------+
-- SQL: EXPLAIN select temp, other_temp, time from h2o;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -26,7 +26,7 @@ bytes = { version = "1", features = ["std"] }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] }
crossbeam-utils = { version = "0.8", features = ["std"] }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "d2814c960168b45c4a0f5d7bbb72d9f412cb08bd", features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd081d64a2fba8574e63bdd0662c14aec5852b48", features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] }
either = { version = "1", features = ["use_std"] }
fixedbitset = { version = "0.4", features = ["std"] }