Merge branch 'main' into cn/ingester2-querier

pull/24376/head
kodiakhq[bot] 2022-12-08 14:00:49 +00:00 committed by GitHub
commit 6f7cb5ccf0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 1541 additions and 851 deletions

78
Cargo.lock generated
View File

@ -1237,8 +1237,8 @@ dependencies = [
[[package]]
name = "datafusion"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1270,6 +1270,7 @@ dependencies = [
"pin-project-lite",
"rand",
"smallvec",
"sqllogictest",
"sqlparser 0.27.0",
"tempfile",
"tokio",
@ -1282,8 +1283,8 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"arrow",
"chrono",
@ -1294,8 +1295,8 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1306,8 +1307,8 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"arrow",
"async-trait",
@ -1321,8 +1322,8 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1350,8 +1351,8 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"arrow",
"chrono",
@ -1367,8 +1368,8 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"arrow",
"datafusion-common",
@ -1378,10 +1379,10 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "14.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
dependencies = [
"arrow",
"arrow-schema",
"datafusion-common",
"datafusion-expr",
"sqlparser 0.27.0",
@ -1417,6 +1418,12 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "difference"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
[[package]]
name = "difflib"
version = "0.4.0"
@ -1575,9 +1582,9 @@ dependencies = [
[[package]]
name = "filetime"
version = "0.2.18"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b9663d381d07ae25dc88dbdf27df458faa83a9b25336bcac83d5e452b5fc9d3"
checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9"
dependencies = [
"cfg-if",
"libc",
@ -3017,6 +3024,17 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
[[package]]
name = "libtest-mimic"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7b603516767d1ab23d0de09d023e62966c3322f7148297c35cf3d97aa8b37fa"
dependencies = [
"clap 4.0.29",
"termcolor",
"threadpool",
]
[[package]]
name = "link-cplusplus"
version = "1.0.7"
@ -3707,6 +3725,7 @@ name = "parquet_to_line_protocol"
version = "0.1.0"
dependencies = [
"datafusion",
"datafusion_util",
"futures",
"influxdb_line_protocol",
"mutable_batch",
@ -5075,6 +5094,25 @@ dependencies = [
"unicode_categories",
]
[[package]]
name = "sqllogictest"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba41e01d229d7725401de371e323851f82d839d68732a06162405362b60852fe"
dependencies = [
"async-trait",
"difference",
"futures",
"glob",
"humantime",
"itertools",
"libtest-mimic",
"regex",
"tempfile",
"thiserror",
"tracing",
]
[[package]]
name = "sqlparser"
version = "0.27.0"
@ -6327,6 +6365,7 @@ dependencies = [
"bytes",
"cc",
"chrono",
"clap 4.0.29",
"crossbeam-utils",
"crypto-common",
"datafusion",
@ -6346,7 +6385,6 @@ dependencies = [
"hashbrown 0.13.1",
"heck",
"indexmap",
"io-lifetimes",
"libc",
"lock_api",
"log",

View File

@ -114,8 +114,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "28.0.0" }
arrow-flight = { version = "28.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="799dd747152f6574638a844986b8ea8470d3f4d6", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="799dd747152f6574638a844986b8ea8470d3f4d6" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="fbadebb894672f61327a30f77cda2ee88a343b2a", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="fbadebb894672f61327a30f77cda2ee88a343b2a" }
hashbrown = { version = "0.13.1" }
parquet = { version = "28.0.0" }

View File

@ -33,10 +33,21 @@ pub struct RouterConfig {
/// Retention period to use when auto-creating namespaces.
/// For infinite retention, leave this unset and it will default to `None`.
/// Setting it to zero will not make it infinite.
/// Ignored if namespace-autocreation-enabled is set to false.
#[clap(
long = "new-namespace-retention-hours",
env = "INFLUXDB_IOX_NEW_NAMESPACE_RETENTION_HOURS",
action
)]
pub new_namespace_retention_hours: Option<u64>,
/// When writing data to a non-existant namespace, should the router auto-create the namespace
/// or reject the write? Set to false to disable namespace autocreation.
#[clap(
long = "namespace-autocreation-enabled",
env = "INFLUXDB_IOX_NAMESPACE_AUTOCREATION_ENABLED",
default_value = "true",
action
)]
pub namespace_autocreation_enabled: bool,
}

View File

@ -57,14 +57,4 @@ pub struct RouterRpcWriteConfig {
action
)]
pub query_pool_name: String,
/// Retention period to use when auto-creating namespaces.
/// For infinite retention, leave this unset and it will default to `None`.
/// Setting it to zero will not make it infinite.
#[clap(
long = "new-namespace-retention-hours",
env = "INFLUXDB_IOX_NEW_NAMESPACE_RETENTION_HOURS",
action
)]
pub new_namespace_retention_hours: Option<u64>,
}

View File

@ -26,7 +26,7 @@ pub fn iox_session_config() -> SessionConfig {
// Enable parquet predicate pushdown optimization
.set_bool(OPT_PARQUET_PUSHDOWN_FILTERS, true)
.set_bool(OPT_PARQUET_REORDER_FILTERS, true)
.create_default_catalog_and_schema(true)
.with_create_default_catalog_and_schema(true)
.with_information_schema(true)
.with_default_catalog_and_schema(DEFAULT_CATALOG, DEFAULT_SCHEMA)
}

View File

@ -413,6 +413,7 @@ impl Config {
query_pool_name: QUERY_POOL_NAME.to_string(),
http_request_limit: 1_000,
new_namespace_retention_hours: None, // infinite retention
namespace_autocreation_enabled: true,
};
// create a CompactorConfig for the all in one server based on

View File

@ -2,7 +2,7 @@
use super::main;
use crate::process_info::setup_metric_registry;
use clap_blocks::{
catalog_dsn::CatalogDsnConfig, object_store::make_object_store,
catalog_dsn::CatalogDsnConfig, object_store::make_object_store, router::RouterConfig,
router_rpc_write::RouterRpcWriteConfig, run_config::RunConfig,
};
use iox_time::{SystemProvider, TimeProvider};
@ -60,7 +60,10 @@ pub struct Config {
pub(crate) catalog_dsn: CatalogDsnConfig,
#[clap(flatten)]
pub(crate) router_config: RouterRpcWriteConfig,
pub(crate) router_config: RouterConfig,
#[clap(flatten)]
pub(crate) router_rpc_write_config: RouterRpcWriteConfig,
}
pub async fn command(config: Config) -> Result<()> {
@ -88,6 +91,7 @@ pub async fn command(config: Config) -> Result<()> {
catalog,
object_store,
&config.router_config,
&config.router_rpc_write_config,
)
.await?;

View File

@ -32,6 +32,7 @@ use datafusion::{
execution::{
context::{QueryPlanner, SessionState, TaskContext},
runtime_env::RuntimeEnv,
MemoryManager,
},
logical_expr::{LogicalPlan, UserDefinedLogicalNode},
physical_plan::{
@ -411,6 +412,7 @@ impl IOxSessionContext {
pub async fn to_series_and_groups(
&self,
series_set_plans: SeriesSetPlans,
memory_manager: Arc<MemoryManager>,
) -> Result<impl Stream<Item = Result<Either>>> {
let SeriesSetPlans {
mut plans,
@ -471,7 +473,7 @@ impl IOxSessionContext {
// If we have group columns, sort the results, and create the
// appropriate groups
if let Some(group_columns) = group_columns {
let grouper = GroupGenerator::new(group_columns);
let grouper = GroupGenerator::new(group_columns, memory_manager);
Ok(grouper.group(data).await?.boxed())
} else {
Ok(data.map_ok(|series| series.into()).boxed())

View File

@ -9,13 +9,21 @@ use arrow::{
datatypes::{DataType, Int32Type, SchemaRef},
record_batch::RecordBatch,
};
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
use datafusion::{
error::DataFusionError,
execution::{
memory_manager::proxy::{MemoryConsumerProxy, VecAllocExt},
MemoryConsumerId, MemoryManager,
},
physical_plan::SendableRecordBatchStream,
};
use futures::{ready, Stream, StreamExt, TryStreamExt};
use futures::{future::BoxFuture, ready, FutureExt, Stream, StreamExt};
use predicate::rpc_predicate::{GROUP_KEY_SPECIAL_START, GROUP_KEY_SPECIAL_STOP};
use snafu::{OptionExt, Snafu};
use std::{
collections::VecDeque,
future::Future,
pin::Pin,
sync::Arc,
task::{Context, Poll},
@ -199,7 +207,7 @@ impl SeriesSetConverter {
) -> Vec<(Arc<str>, Arc<str>)> {
assert_eq!(tag_column_names.len(), tag_indexes.len());
tag_column_names
let mut out = tag_column_names
.iter()
.zip(tag_indexes)
.filter_map(|(column_name, column_index)| {
@ -246,7 +254,10 @@ impl SeriesSetConverter {
tag_value.map(|tag_value| (Arc::clone(column_name), Arc::from(tag_value.as_str())))
})
.collect()
.collect::<Vec<_>>();
out.shrink_to_fit();
out
}
}
@ -491,32 +502,49 @@ impl Stream for SeriesSetConverterStream {
#[derive(Debug)]
pub struct GroupGenerator {
group_columns: Vec<Arc<str>>,
memory_manager: Arc<MemoryManager>,
collector_buffered_size_max: usize,
}
impl GroupGenerator {
pub fn new(group_columns: Vec<Arc<str>>) -> Self {
Self { group_columns }
pub fn new(group_columns: Vec<Arc<str>>, memory_manager: Arc<MemoryManager>) -> Self {
Self::new_with_buffered_size_max(
group_columns,
memory_manager,
Collector::<()>::DEFAULT_ALLOCATION_BUFFER_SIZE,
)
}
fn new_with_buffered_size_max(
group_columns: Vec<Arc<str>>,
memory_manager: Arc<MemoryManager>,
collector_buffered_size_max: usize,
) -> Self {
Self {
group_columns,
memory_manager,
collector_buffered_size_max,
}
}
/// groups the set of `series` into SeriesOrGroups
///
/// TODO: make this truly stream-based
/// TODO: make this truly stream-based, see <https://github.com/influxdata/influxdb_iox/issues/6347>.
pub async fn group<S>(
&self,
self,
series: S,
) -> Result<impl Stream<Item = Result<Either, DataFusionError>>, DataFusionError>
where
S: Stream<Item = Result<Series, DataFusionError>> + Send,
{
let mut series = series
.map(|res| {
res.and_then(|series| {
SortableSeries::try_new(series, &self.group_columns)
.map_err(|e| DataFusionError::External(Box::new(e)))
})
})
.try_collect::<Vec<_>>()
.await?;
let series = Box::pin(series);
let mut series = Collector::new(
series,
self.group_columns,
self.memory_manager,
self.collector_buffered_size_max,
)
.await?;
// Potential optimization is to skip this sort if we are
// grouping by a prefix of the tags for a single measurement
@ -658,12 +686,184 @@ impl SortableSeries {
use_tag.then(|| Arc::clone(&tag.value))
}));
// safe memory
tag_vals.shrink_to_fit();
Ok(Self {
series,
tag_vals,
num_partition_keys: group_columns.len(),
})
}
/// Memory usage in bytes, including `self`.
fn size(&self) -> usize {
std::mem::size_of_val(self) + self.series.size() - std::mem::size_of_val(&self.series)
+ (std::mem::size_of::<Arc<str>>() * self.tag_vals.capacity())
+ self.tag_vals.iter().map(|s| s.len()).sum::<usize>()
}
}
/// [`Future`] that collects [`Series`] objects into a [`SortableSeries`] vector while registering/checking memory
/// allocations with a [`MemoryManager`].
///
/// This avoids unbounded memory growth when merging multiple `Series` in memory
struct Collector<S> {
/// The inner stream was fully drained.
inner_done: bool,
/// This very future finished.
outer_done: bool,
/// Inner stream.
inner: S,
/// Group columns.
///
/// These are required for [`SortableSeries::try_new`].
group_columns: Vec<Arc<str>>,
/// Already collected objects.
collected: Vec<SortableSeries>,
/// Buffered but not-yet-registered allocated size.
///
/// We use an additional buffer here because in contrast to the normal DataFusion processing, the input stream is
/// NOT batched and we want to avoid costly memory allocations checks with the [`MemoryManager`] for every single element.
buffered_size: usize,
/// Maximum [buffered size](Self::buffered_size).
buffered_size_max: usize,
/// Our memory consumer.
///
/// This is optional because for [`MemoryConsumerProxy::alloc`], we need to move this into
/// [`mem_proxy_alloc_fut`](Self::mem_proxy_alloc_fut) to avoid self-borrowing.
mem_proxy: Option<MemoryConsumerProxy>,
/// A potential running [`MemoryConsumerProxy::alloc`].
///
/// This owns [`mem_proxy`](Self::mem_proxy) to avoid self-borrowing.
mem_proxy_alloc_fut:
Option<BoxFuture<'static, (MemoryConsumerProxy, Result<(), DataFusionError>)>>,
}
impl<S> Collector<S> {
/// Maximum [buffered size](Self::buffered_size).
const DEFAULT_ALLOCATION_BUFFER_SIZE: usize = 1024 * 1024;
}
impl<S> Collector<S>
where
S: Stream<Item = Result<Series, DataFusionError>> + Send + Unpin,
{
fn new(
inner: S,
group_columns: Vec<Arc<str>>,
memory_manager: Arc<MemoryManager>,
buffered_size_max: usize,
) -> Self {
let mem_proxy =
MemoryConsumerProxy::new("Collector stream", MemoryConsumerId::new(0), memory_manager);
Self {
inner_done: false,
outer_done: false,
inner,
group_columns,
collected: Vec::with_capacity(0),
buffered_size: 0,
buffered_size_max,
mem_proxy: Some(mem_proxy),
mem_proxy_alloc_fut: None,
}
}
/// Start a [`MemoryConsumerProxy::alloc`] future.
///
/// # Panic
/// Panics if a future is already running.
fn alloc(&mut self) {
assert!(self.mem_proxy_alloc_fut.is_none());
let mut mem_proxy =
std::mem::take(&mut self.mem_proxy).expect("no mem proxy future running");
let bytes = std::mem::take(&mut self.buffered_size);
self.mem_proxy_alloc_fut = Some(
async move {
let res = mem_proxy.alloc(bytes).await;
(mem_proxy, res)
}
.boxed(),
);
}
}
impl<S> Future for Collector<S>
where
S: Stream<Item = Result<Series, DataFusionError>> + Send + Unpin,
{
type Output = Result<Vec<SortableSeries>, DataFusionError>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let this = &mut *self;
loop {
assert!(!this.outer_done);
// Drive `MemoryConsumerProxy::alloc` to completion.
if let Some(fut) = this.mem_proxy_alloc_fut.as_mut() {
let (mem_proxy, res) = ready!(fut.poll_unpin(cx));
assert!(this.mem_proxy.is_none());
this.mem_proxy = Some(mem_proxy);
this.mem_proxy_alloc_fut = None;
if let Err(e) = res {
// poison this future
this.outer_done = true;
return Poll::Ready(Err(e));
}
}
// if the underlying stream is drained and the allocation future is ready (see above), we can finalize this future
if this.inner_done {
this.outer_done = true;
return Poll::Ready(Ok(std::mem::take(&mut this.collected)));
}
match ready!(this.inner.poll_next_unpin(cx)) {
Some(Ok(series)) => match SortableSeries::try_new(series, &this.group_columns) {
Ok(series) => {
// Note: the size of `SortableSeries` itself is already included in the vector allocation
this.buffered_size += series.size() - std::mem::size_of_val(&series);
this.collected
.push_accounted(series, &mut this.buffered_size);
// should we clear our allocation buffer?
if this.buffered_size > this.buffered_size_max {
this.alloc();
continue;
}
}
Err(e) => {
// poison this future
this.outer_done = true;
return Poll::Ready(Err(DataFusionError::External(Box::new(e))));
}
},
Some(Err(e)) => {
// poison this future
this.outer_done = true;
return Poll::Ready(Err(e));
}
None => {
// underlying stream drained. now register the final allocation and then we're done
this.inner_done = true;
if this.buffered_size > 0 {
this.alloc();
}
continue;
}
}
}
}
}
#[cfg(test)]
@ -677,10 +877,15 @@ mod tests {
record_batch::RecordBatch,
};
use arrow_util::assert_batches_eq;
use assert_matches::assert_matches;
use datafusion::execution::memory_manager::MemoryManagerConfig;
use datafusion_util::{stream_from_batch, stream_from_batches, stream_from_schema};
use futures::TryStreamExt;
use itertools::Itertools;
use test_helpers::str_vec_to_arc_vec;
use crate::exec::seriesset::series::{Data, Tag};
use super::*;
#[tokio::test]
@ -1431,6 +1636,137 @@ mod tests {
);
}
#[tokio::test]
async fn test_group_generator_mem_limit() {
let memory_manager =
MemoryManager::new(MemoryManagerConfig::try_new_limit(1, 1.0).unwrap());
let ggen = GroupGenerator::new(vec![Arc::from("g")], memory_manager);
let input = futures::stream::iter([Ok(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("x"),
}],
data: Data::FloatPoints {
timestamps: vec![],
values: vec![],
},
})]);
let err = match ggen.group(input).await {
Ok(stream) => stream.try_collect::<Vec<_>>().await.unwrap_err(),
Err(e) => e,
};
assert_matches!(err, DataFusionError::ResourcesExhausted(_));
}
#[tokio::test]
async fn test_group_generator_no_mem_limit() {
let memory_manager =
MemoryManager::new(MemoryManagerConfig::try_new_limit(usize::MAX, 1.0).unwrap());
// use a generator w/ a low buffered allocation to force multiple `alloc` calls
let ggen =
GroupGenerator::new_with_buffered_size_max(vec![Arc::from("g")], memory_manager, 1);
let input = futures::stream::iter([
Ok(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("x"),
}],
data: Data::IntegerPoints {
timestamps: vec![1],
values: vec![1],
},
}),
Ok(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("y"),
}],
data: Data::IntegerPoints {
timestamps: vec![2],
values: vec![2],
},
}),
Ok(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("x"),
}],
data: Data::IntegerPoints {
timestamps: vec![3],
values: vec![3],
},
}),
Ok(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("x"),
}],
data: Data::IntegerPoints {
timestamps: vec![4],
values: vec![4],
},
}),
]);
let actual = ggen
.group(input)
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap();
let expected = vec![
Either::Group(Group {
tag_keys: vec![Arc::from("g")],
partition_key_vals: vec![Arc::from("x")],
}),
Either::Series(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("x"),
}],
data: Data::IntegerPoints {
timestamps: vec![1],
values: vec![1],
},
}),
Either::Series(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("x"),
}],
data: Data::IntegerPoints {
timestamps: vec![3],
values: vec![3],
},
}),
Either::Series(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("x"),
}],
data: Data::IntegerPoints {
timestamps: vec![4],
values: vec![4],
},
}),
Either::Group(Group {
tag_keys: vec![Arc::from("g")],
partition_key_vals: vec![Arc::from("y")],
}),
Either::Series(Series {
tags: vec![Tag {
key: Arc::from("g"),
value: Arc::from("y"),
}],
data: Data::IntegerPoints {
timestamps: vec![2],
values: vec![2],
},
}),
];
assert_eq!(actual, expected);
}
fn assert_series_set<const N: usize, const M: usize>(
set: &SeriesSet,
table_name: &'static str,

View File

@ -28,12 +28,19 @@ pub enum Error {
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// A name=value pair used to represent a series's tag
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Tag {
pub key: Arc<str>,
pub value: Arc<str>,
}
impl Tag {
/// Memory usage in bytes, including `self`.
pub fn size(&self) -> usize {
std::mem::size_of_val(self) + self.key.len() + self.value.len()
}
}
impl fmt::Display for Tag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}={}", self.key, self.value)
@ -41,7 +48,7 @@ impl fmt::Display for Tag {
}
/// Represents a single logical TimeSeries
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Series {
/// key = value pairs that define this series
/// (including the _measurement and _field that correspond to table name and column name)
@ -51,6 +58,21 @@ pub struct Series {
pub data: Data,
}
impl Series {
/// Memory usage in bytes, including `self`.
pub fn size(&self) -> usize {
std::mem::size_of_val(self)
+ (std::mem::size_of::<Tag>() * self.tags.capacity())
+ self
.tags
.iter()
.map(|tag| tag.size() - std::mem::size_of_val(tag))
.sum::<usize>()
+ self.data.size()
- std::mem::size_of_val(&self.data)
}
}
impl fmt::Display for Series {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Series tags={{")?;
@ -97,6 +119,95 @@ pub enum Data {
},
}
impl Data {
/// Memory usage in bytes, including `self`.
pub fn size(&self) -> usize {
std::mem::size_of_val(self)
+ match self {
Self::FloatPoints { timestamps, values } => {
primitive_vec_size(timestamps) + primitive_vec_size(values)
}
Self::IntegerPoints { timestamps, values } => {
primitive_vec_size(timestamps) + primitive_vec_size(values)
}
Self::UnsignedPoints { timestamps, values } => {
primitive_vec_size(timestamps) + primitive_vec_size(values)
}
Self::BooleanPoints { timestamps, values } => {
primitive_vec_size(timestamps) + primitive_vec_size(values)
}
Self::StringPoints { timestamps, values } => {
primitive_vec_size(timestamps) + primitive_vec_size(values)
}
}
}
}
impl PartialEq for Data {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(
Self::FloatPoints {
timestamps: l_timestamps,
values: l_values,
},
Self::FloatPoints {
timestamps: r_timestamps,
values: r_values,
},
) => l_timestamps == r_timestamps && l_values == r_values,
(
Self::IntegerPoints {
timestamps: l_timestamps,
values: l_values,
},
Self::IntegerPoints {
timestamps: r_timestamps,
values: r_values,
},
) => l_timestamps == r_timestamps && l_values == r_values,
(
Self::UnsignedPoints {
timestamps: l_timestamps,
values: l_values,
},
Self::UnsignedPoints {
timestamps: r_timestamps,
values: r_values,
},
) => l_timestamps == r_timestamps && l_values == r_values,
(
Self::BooleanPoints {
timestamps: l_timestamps,
values: l_values,
},
Self::BooleanPoints {
timestamps: r_timestamps,
values: r_values,
},
) => l_timestamps == r_timestamps && l_values == r_values,
(
Self::StringPoints {
timestamps: l_timestamps,
values: l_values,
},
Self::StringPoints {
timestamps: r_timestamps,
values: r_values,
},
) => l_timestamps == r_timestamps && l_values == r_values,
_ => false,
}
}
}
impl Eq for Data {}
/// Returns size of given vector of primitive types in bytes, EXCLUDING `vec` itself.
fn primitive_vec_size<T>(vec: &Vec<T>) -> usize {
std::mem::size_of::<T>() * vec.capacity()
}
impl fmt::Display for Data {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
@ -174,7 +285,7 @@ impl SeriesSet {
let tags = self.create_frame_tags(schema.field(index.value_index).name());
let timestamps = compute::nullif(
let mut timestamps = compute::nullif(
batch.column(index.timestamp_index),
&compute::is_null(array).expect("is_null"),
)
@ -183,47 +294,57 @@ impl SeriesSet {
.downcast_ref::<TimestampNanosecondArray>()
.unwrap()
.extract_values();
timestamps.shrink_to_fit();
let data = match array.data_type() {
ArrowDataType::Utf8 => {
let values = array
let mut values = array
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.extract_values();
values.shrink_to_fit();
Data::StringPoints { timestamps, values }
}
ArrowDataType::Float64 => {
let values = array
let mut values = array
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.extract_values();
values.shrink_to_fit();
Data::FloatPoints { timestamps, values }
}
ArrowDataType::Int64 => {
let values = array
let mut values = array
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.extract_values();
values.shrink_to_fit();
Data::IntegerPoints { timestamps, values }
}
ArrowDataType::UInt64 => {
let values = array
let mut values = array
.as_any()
.downcast_ref::<UInt64Array>()
.unwrap()
.extract_values();
values.shrink_to_fit();
Data::UnsignedPoints { timestamps, values }
}
ArrowDataType::Boolean => {
let values = array
let mut values = array
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap()
.extract_values();
values.shrink_to_fit();
Data::BooleanPoints { timestamps, values }
}
_ => {
@ -270,7 +391,7 @@ impl SeriesSet {
}
/// Represents a group of `Series`
#[derive(Clone, Debug, Default)]
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Group {
/// Contains *ALL* tag keys (not just those used for grouping)
pub tag_keys: Vec<Arc<str>>,
@ -297,7 +418,7 @@ impl fmt::Display for Group {
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Either {
Series(Series),
Group(Group),

View File

@ -1445,7 +1445,7 @@ fn table_chunk_stream<'a>(
.chunks(
table_name,
predicate,
&projection,
projection.as_ref(),
ctx.child_ctx("table chunks"),
)
.await

View File

@ -154,7 +154,7 @@ pub trait QueryNamespace: QueryNamespaceMeta + Debug + Send + Sync {
&self,
table_name: &str,
predicate: &Predicate,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
ctx: IOxSessionContext,
) -> Result<Vec<Arc<dyn QueryChunk>>, DataFusionError>;

View File

@ -221,7 +221,7 @@ impl TableProvider for ChunkTableProvider {
async fn scan(
&self,
_ctx: &SessionState,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
filters: &[Expr],
_limit: Option<usize>,
) -> std::result::Result<Arc<dyn ExecutionPlan>, DataFusionError> {

View File

@ -166,7 +166,7 @@ pub fn chunks_to_physical_nodes(
}
let mut parquet_chunks: Vec<_> = parquet_chunks.into_iter().collect();
parquet_chunks.sort_by_key(|(url_str, _)| url_str.clone());
let target_partitions = context.session_config().target_partitions;
let target_partitions = context.session_config().target_partitions();
for (_url_str, chunk_list) in parquet_chunks {
let ParquetChunkList {
object_store_url,

View File

@ -104,7 +104,7 @@ impl QueryNamespace for TestDatabase {
&self,
table_name: &str,
predicate: &Predicate,
_projection: &Option<Vec<usize>>,
_projection: Option<&Vec<usize>>,
_ctx: IOxSessionContext,
) -> Result<Vec<Arc<dyn QueryChunk>>, DataFusionError> {
// save last predicate

View File

@ -27,7 +27,9 @@ use router::{
namespace_cache::{
metrics::InstrumentedCache, MemoryNamespaceCache, NamespaceCache, ShardedCache,
},
namespace_resolver::{NamespaceAutocreation, NamespaceResolver, NamespaceSchemaResolver},
namespace_resolver::{
MissingNamespaceAction, NamespaceAutocreation, NamespaceResolver, NamespaceSchemaResolver,
},
server::{
grpc::{sharder::ShardService, GrpcDelegate, RpcWriteGrpcDelegate},
http::HttpDelegate,
@ -251,12 +253,13 @@ pub async fn create_router_grpc_write_server_type(
metrics: Arc<metric::Registry>,
catalog: Arc<dyn Catalog>,
object_store: Arc<DynObjectStore>,
router_config: &RouterRpcWriteConfig,
router_config: &RouterConfig,
router_rpc_write_config: &RouterRpcWriteConfig,
) -> Result<Arc<dyn ServerType>> {
// 1. START: Different Setup Per Router Path: this part is only relevant to using RPC write
// path and should not be added to `create_router_server_type`.
let mut ingester_clients = Vec::with_capacity(router_config.ingester_addresses.len());
for ingester_addr in &router_config.ingester_addresses {
let mut ingester_clients = Vec::with_capacity(router_rpc_write_config.ingester_addresses.len());
for ingester_addr in &router_rpc_write_config.ingester_addresses {
ingester_clients.push(write_service_client(ingester_addr).await);
}
@ -333,19 +336,24 @@ pub async fn create_router_grpc_write_server_type(
let mut txn = catalog.start_transaction().await?;
let topic_id = txn
.topics()
.get_by_name(&router_config.topic)
.get_by_name(&router_rpc_write_config.topic)
.await?
.map(|v| v.id)
.unwrap_or_else(|| panic!("no topic named {} in catalog", router_config.topic));
.unwrap_or_else(|| {
panic!(
"no topic named {} in catalog",
router_rpc_write_config.topic
)
});
let query_id = txn
.query_pools()
.create_or_get(&router_config.query_pool_name)
.create_or_get(&router_rpc_write_config.query_pool_name)
.await
.map(|v| v.id)
.unwrap_or_else(|e| {
panic!(
"failed to upsert query pool {} in catalog: {}",
router_config.topic, e
router_rpc_write_config.topic, e
)
});
txn.commit().await?;
@ -356,9 +364,17 @@ pub async fn create_router_grpc_write_server_type(
Arc::clone(&catalog),
topic_id,
query_id,
router_config
.new_namespace_retention_hours
.map(|hours| hours as i64 * 60 * 60 * 1_000_000_000),
{
if router_config.namespace_autocreation_enabled {
MissingNamespaceAction::AutoCreate(
router_config
.new_namespace_retention_hours
.map(|hours| hours as i64 * 60 * 60 * 1_000_000_000),
)
} else {
MissingNamespaceAction::Reject
}
},
);
//
////////////////////////////////////////////////////////////////////////////
@ -530,9 +546,17 @@ pub async fn create_router_server_type(
Arc::clone(&catalog),
topic_id,
query_id,
router_config
.new_namespace_retention_hours
.map(|hours| hours as i64 * 60 * 60 * 1_000_000_000),
{
if router_config.namespace_autocreation_enabled {
MissingNamespaceAction::AutoCreate(
router_config
.new_namespace_retention_hours
.map(|hours| hours as i64 * 60 * 60 * 1_000_000_000),
)
} else {
MissingNamespaceAction::Reject
}
},
);
//
////////////////////////////////////////////////////////////////////////////

View File

@ -7,6 +7,7 @@ license.workspace = true
[dependencies]
datafusion = { workspace = true }
datafusion_util = { path = "../datafusion_util" }
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
futures = {version = "0.3"}
num_cpus = "1.13.1"

View File

@ -2,7 +2,6 @@
use datafusion::{
arrow::datatypes::SchemaRef as ArrowSchemaRef,
config::ConfigOptions,
datasource::{
file_format::{parquet::ParquetFormat, FileFormat},
listing::PartitionedFile,
@ -16,6 +15,7 @@ use datafusion::{
},
prelude::{SessionConfig, SessionContext},
};
use datafusion_util::config::iox_session_config;
use futures::StreamExt;
use object_store::{
local::LocalFileSystem, path::Path as ObjectStorePath, ObjectMeta, ObjectStore,
@ -162,9 +162,8 @@ pub struct ParquetFileReader {
/// Parquet file metadata
schema: ArrowSchemaRef,
/// number of rows to read in each batch (can pick small to
/// increase parallelism). Defaults to 1000
batch_size: usize,
/// DataFusion configuration, such as the target batchsize, etc
session_config: SessionConfig,
}
impl ParquetFileReader {
@ -174,8 +173,11 @@ impl ParquetFileReader {
object_store_url: ObjectStoreUrl,
object_meta: ObjectMeta,
) -> Result<Self, Error> {
let session_config = iox_session_config();
// Keep metadata so we can find the measurement name
let format = ParquetFormat::default().with_skip_metadata(false);
let format =
ParquetFormat::new(session_config.config_options()).with_skip_metadata(Some(false));
// Use datafusion parquet reader to read the metadata from the
// file.
@ -189,7 +191,7 @@ impl ParquetFileReader {
object_store_url,
object_meta,
schema,
batch_size: 1000,
session_config,
})
}
@ -214,15 +216,14 @@ impl ParquetFileReader {
limit: None,
table_partition_cols: vec![],
output_ordering: None,
config_options: ConfigOptions::new().into_shareable(),
config_options: self.session_config.config_options(),
};
// set up enough datafusion context to do the real read session
let predicate = None;
let metadata_size_hint = None;
let exec = ParquetExec::new(base_config, predicate, metadata_size_hint);
let session_config = SessionConfig::new().with_batch_size(self.batch_size);
let session_ctx = SessionContext::with_config(session_config);
let session_ctx = SessionContext::with_config(self.session_config.clone());
let object_store = Arc::clone(&self.object_store);
let task_ctx = Arc::new(TaskContext::from(&session_ctx));

View File

@ -42,7 +42,7 @@ impl QueryNamespace for QuerierNamespace {
&self,
table_name: &str,
predicate: &Predicate,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
ctx: IOxSessionContext,
) -> Result<Vec<Arc<dyn QueryChunk>>, DataFusionError> {
debug!(%table_name, %predicate, "Finding chunks for table");
@ -487,15 +487,15 @@ mod tests {
&querier_namespace,
"EXPLAIN SELECT * FROM cpu",
&[
"+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |",
"| | TableScan: cpu projection=[foo, host, load, time] |",
"| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |",
"| | ParquetExec: limit=None, partitions=[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/2/2/<uuid>.parquet, 1/1/1/3/<uuid>.parquet], projection=[foo, host, load, time] |",
"| | |",
"+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |",
"| | TableScan: cpu projection=[foo, host, load, time] |",
"| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |",
"| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/2/2/<uuid>.parquet, 1/1/1/3/<uuid>.parquet]]}, projection=[foo, host, load, time] |",
"| | |",
"+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
],
)
.await;
@ -507,24 +507,24 @@ mod tests {
&querier_namespace,
"EXPLAIN SELECT * FROM mem ORDER BY host,time",
&[
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |",
"| | Projection: mem.host, mem.perc, mem.time |",
"| | TableScan: mem projection=[host, perc, time] |",
"| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |",
"| | CoalescePartitionsExec |",
"| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |",
"| | UnionExec |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions=[1/2/1/4/<uuid>.parquet], projection=[host, perc, time] |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions=[1/2/1/4/<uuid>.parquet], projection=[host, perc, time] |",
"| | |",
"+---------------+---------------------------------------------------------------------------------------------------------------------------------------+",
"+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |",
"| | Projection: mem.host, mem.perc, mem.time |",
"| | TableScan: mem projection=[host, perc, time] |",
"| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |",
"| | CoalescePartitionsExec |",
"| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |",
"| | UnionExec |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/<uuid>.parquet]]}, projection=[host, perc, time] |",
"| | CoalesceBatchesExec: target_batch_size=4096 |",
"| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |",
"| | ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/<uuid>.parquet]]}, projection=[host, perc, time] |",
"| | |",
"+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+",
],
)
.await;
@ -567,21 +567,21 @@ mod tests {
&querier_namespace,
"EXPLAIN SELECT * FROM cpu",
&[
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |",
"| | TableScan: cpu projection=[foo, host, load, time] |",
"| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |",
"| | UnionExec |",
"| | DeduplicateExec: [host@1 ASC,time@3 ASC] |",
"| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |",
"| | UnionExec |",
"| | ParquetExec: limit=None, partitions=[1/1/2/2/<uuid>.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions=[1/1/2/2/<uuid>.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions=[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/3/<uuid>.parquet], projection=[foo, host, load, time] |",
"| | |",
"+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| plan_type | plan |",
"+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
"| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |",
"| | TableScan: cpu projection=[foo, host, load, time] |",
"| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |",
"| | UnionExec |",
"| | DeduplicateExec: [host@1 ASC,time@3 ASC] |",
"| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |",
"| | UnionExec |",
"| | ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/<uuid>.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/<uuid>.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |",
"| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/3/<uuid>.parquet]]}, projection=[foo, host, load, time] |",
"| | |",
"+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+",
],
)
.await;

View File

@ -101,7 +101,7 @@ where
async fn scan(
&self,
_ctx: &SessionState,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
// It would be cool to push projection and limit down
_filters: &[Expr],
_limit: Option<usize>,
@ -114,7 +114,7 @@ where
Ok(Arc::new(SystemTableExecutionPlan {
table: Arc::clone(&self.table),
projection: projection.clone(),
projection: projection.cloned(),
projected_schema,
}))
}

View File

@ -194,7 +194,7 @@ impl QuerierTable {
&self,
predicate: &Predicate,
span: Option<Span>,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
) -> Result<Vec<Arc<dyn QueryChunk>>> {
let mut span_recorder = SpanRecorder::new(span);
match self
@ -216,7 +216,7 @@ impl QuerierTable {
&self,
predicate: &Predicate,
span_recorder: &SpanRecorder,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
) -> Result<Vec<Arc<dyn QueryChunk>>> {
debug!(
?predicate,
@ -331,6 +331,10 @@ impl QuerierTable {
// create parquet files
let parquet_files: Vec<_> = match cached_table {
Some(cached_table) => {
// use nested scope because we span many child scopes here and it's easier to
// aggregate / collapse in the UI
let span_recorder = span_recorder.child("parquet chunks");
let basic_summaries: Vec<_> = parquet_files
.files
.iter()
@ -377,11 +381,14 @@ impl QuerierTable {
let keep = *keep;
async move { keep }
})
.map(|(cached_parquet_file, _keep)| async move {
let span = span_recorder.child_span("new_chunk");
self.chunk_adapter
.new_chunk(Arc::clone(cached_table), cached_parquet_file, span)
.await
.map(|(cached_parquet_file, _keep)| {
let span_recorder = &span_recorder;
async move {
let span = span_recorder.child_span("new_chunk");
self.chunk_adapter
.new_chunk(Arc::clone(cached_table), cached_parquet_file, span)
.await
}
})
.buffer_unordered(CONCURRENT_CHUNK_CREATION_JOBS)
.filter_map(|x| async { x })
@ -430,7 +437,7 @@ impl QuerierTable {
&self,
predicate: &Predicate,
span: Option<Span>,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
) -> Result<Vec<IngesterPartition>> {
let mut span_recorder = SpanRecorder::new(span);
@ -465,7 +472,7 @@ impl QuerierTable {
ingester_connection: Arc<dyn IngesterConnection>,
predicate: &Predicate,
span_recorder: &SpanRecorder,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
) -> Result<Vec<IngesterPartition>> {
// If the projection is provided, use it. Otherwise, use all columns of the table
// The provided projection should include all columns needed by the query
@ -828,7 +835,7 @@ mod tests {
// Expect one chunk from the ingester
let pred = Predicate::new().with_range(0, 100);
let chunks = querier_table
.chunks_with_predicate_and_projection(&pred, &Some(vec![1])) // only select `foo` column
.chunks_with_predicate_and_projection(&pred, Some(&vec![1])) // only select `foo` column
.await
.unwrap();
assert_eq!(chunks.len(), 1);
@ -1371,14 +1378,14 @@ mod tests {
&self,
pred: &Predicate,
) -> Result<Vec<Arc<dyn QueryChunk>>> {
self.chunks_with_predicate_and_projection(pred, &None).await
self.chunks_with_predicate_and_projection(pred, None).await
}
/// Invokes querier_table.chunks modeling the ingester sending the partitions in this table
async fn chunks_with_predicate_and_projection(
&self,
pred: &Predicate,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
) -> Result<Vec<Arc<dyn QueryChunk>>> {
self.querier_table
.ingester_connection

View File

@ -44,7 +44,7 @@ impl TableProvider for QuerierTable {
async fn scan(
&self,
ctx: &SessionState,
projection: &Option<Vec<usize>>,
projection: Option<&Vec<usize>>,
filters: &[Expr],
limit: Option<usize>,
) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {

View File

@ -8,23 +8,23 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" ORDER BY tag;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | TableScan: table projection=[bar, foo, tag, time] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | TableScan: table projection=[bar, foo, tag, time] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE tag='A';
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -33,23 +33,23 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A';
-- Results After Normalizing UUIDs
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2;
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -58,23 +58,23 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE foo=1 AND bar=2;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: foo@1 = 1 AND bar@0 = 2 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: foo@1 = 1 AND bar@0 = 2 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag;
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -84,26 +84,26 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00');
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -112,20 +112,20 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00');
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -8,24 +8,24 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" ORDER BY tag;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | TableScan: table projection=[bar, foo, tag, time] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | TableScan: table projection=[bar, foo, tag, time] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE tag='A';
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -34,24 +34,24 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A';
-- Results After Normalizing UUIDs
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2;
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -60,24 +60,24 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE foo=1 AND bar=2;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: foo@1 = 1 AND bar@0 = 2 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: foo@1 = 1 AND bar@0 = 2 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag;
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -87,27 +87,27 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: table.tag ASC NULLS LAST |
| | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] |
| physical_plan | SortExec: [tag@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00');
+-----+-----+-----+----------------------+
| bar | foo | tag | time |
@ -116,21 +116,21 @@
+-----+-----+-----+----------------------+
-- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00');
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: table.bar, table.foo, table.tag, table.time |
| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) |
| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] |
| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@2 ASC,time@3 ASC] |
| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] |
| | SortExec: [tag@2 ASC,time@3 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -1,91 +1,91 @@
-- Test Setup: OneMeasurementFourChunksWithDuplicatesWithIngester
-- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[city] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select count(*) from h2o;
+-----------------+
| COUNT(UInt8(1)) |

View File

@ -1,75 +1,75 @@
-- Test Setup: OneMeasurementFourChunksWithDuplicatesParquetOnly
-- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST |
| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area |
| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] |
| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] |
| | UnionExec |
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] |
| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Union |
| | Projection: h2o.state AS name |
| | TableScan: h2o projection=[state] |
| | Projection: h2o.city AS name |
| | TableScan: h2o projection=[city] |
| physical_plan | UnionExec |
| | ProjectionExec: expr=[state@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[state@1 as state] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[state] |
| | ProjectionExec: expr=[city@0 as name] |
| | UnionExec |
| | ProjectionExec: expr=[city@0 as city] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] |
| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[city] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select count(*) from h2o;
+-----------------+
| COUNT(UInt8(1)) |
@ -91,8 +91,8 @@
| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, num_dupes=2, output_rows=5, spill_count=0, spilled_bytes=0] |
| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] |
| | UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=0, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=0, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] |
| | |
----------

View File

@ -7,32 +7,32 @@
+-----------------+----------+
-- SQL: EXPLAIN select count(*), sum(f) from m;
-- Results After Normalizing UUIDs
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: COUNT(UInt8(1)), SUM(m.f) |
| | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)), SUM(m.f)]] |
| | TableScan: m projection=[f] |
| physical_plan | ProjectionExec: expr=[COUNT(UInt8(1))@0 as COUNT(UInt8(1)), SUM(m.f)@1 as SUM(m.f)] |
| | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| | UnionExec |
| | ProjectionExec: expr=[f@0 as f] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@1 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [tag@1 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000c.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000013.parquet], projection=[f] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: COUNT(UInt8(1)), SUM(m.f) |
| | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)), SUM(m.f)]] |
| | TableScan: m projection=[f] |
| physical_plan | ProjectionExec: expr=[COUNT(UInt8(1))@0 as COUNT(UInt8(1)), SUM(m.f)@1 as SUM(m.f)] |
| | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| | UnionExec |
| | ProjectionExec: expr=[f@0 as f] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [tag@1 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [tag@1 ASC,time@2 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] |
| | ParquetExec: limit=None, partitions={4 groups: [[1/1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000f.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000011.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000013.parquet]]}, projection=[f] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -0,0 +1,21 @@
-- Test Setup: PeriodsInNames
-- SQL: SELECT * from "measurement.one";
+-----------+-----------+---------+---------+--------------------------------+
| field.one | field.two | tag.one | tag.two | time |
+-----------+-----------+---------+---------+--------------------------------+
| 1 | true | value | other | 2021-01-01T00:00:01.000000001Z |
| 1 | false | value2 | other2 | 2021-01-01T00:00:01.000000002Z |
+-----------+-----------+---------+---------+--------------------------------+
-- SQL: SELECT "tag.one" from "measurement.one";
+---------+
| tag.one |
+---------+
| value |
| value2 |
+---------+
-- SQL: SELECT "tag.one" from "measurement.one" where "field.two" is TRUE;
+---------+
| tag.one |
+---------+
| value |
+---------+

View File

@ -0,0 +1,13 @@
-- Basic query tests for measurement names that have periods in their names
-- IOX_SETUP: PeriodsInNames
-- query data
SELECT * from "measurement.one";
-- projection
SELECT "tag.one" from "measurement.one";
-- predicate
SELECT "tag.one" from "measurement.one" where "field.two" is TRUE;

View File

@ -14,15 +14,15 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant;
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------+
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -37,49 +37,49 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200;
-- Results After Normalizing UUIDs
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200), pruning_predicate=count_max@0 > 200, projection=[count, system, time, town] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200), pruning_predicate=count_max@0 > 200, projection=[count, system, time, town] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200.0;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Float64) > Float64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Float64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=CAST(count AS Float64) > Float64(200), projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Float64) > Float64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Float64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=CAST(count AS Float64) > Float64(200), projection=[count, system, time, town] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 4.0;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(4), pruning_predicate=system_max@0 > 4, projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(4), pruning_predicate=system_max@0 > 4, projection=[count, system, time, town] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury';
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -93,19 +93,19 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury';
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND town@3 != tewsbury |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2, projection=[count, system, time, town] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND town@3 != tewsbury |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2, projection=[count, system, time, town] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -118,19 +118,19 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
-- Results After Normalizing UUIDs
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2, projection=[count, system, time, town] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2, projection=[count, system, time, town] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -142,19 +142,19 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000;
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND restaurant.count < UInt64(40000) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND count@0 < 40000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2 AND count_min@5 < 40000, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND restaurant.count < UInt64(40000) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND count@0 < 40000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2 AND count_min@5 < 40000, projection=[count, system, time, town] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and count < 40000;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -168,19 +168,19 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and count < 40000;
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.count < UInt64(40000) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND count@0 < 40000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND count_min@1 < 40000, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.count > UInt64(200) AND restaurant.count < UInt64(40000) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: count@0 > 200 AND count@0 < 40000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND count_min@1 < 40000, projection=[count, system, time, town] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -195,19 +195,19 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 4.0 and system < 7.0;
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(4) AND system < Float64(7), pruning_predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 4 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(4) AND system < Float64(7), pruning_predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 5.0 and system < 7.0;
-- Results After Sorting
+-------+--------+--------------------------------+----------+
@ -219,19 +219,19 @@
+-------+--------+--------------------------------+----------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and system < 7.0;
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND system < Float64(7), pruning_predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND system@1 < 7 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(5) AND system < Float64(7), pruning_predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system;
-- Results After Sorting
+-------+--------+--------------------------------+----------+
@ -242,19 +242,19 @@
+-------+--------+--------------------------------+----------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > system, pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(5) AND town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > system, pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading');
-- Results After Sorting
+-------+--------+--------------------------------+---------+
@ -264,19 +264,19 @@
+-------+--------+--------------------------------+---------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading');
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND count@0 = 632 OR town@3 = reading |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != town AND system < Float64(7) AND (count = UInt64(632) OR town = Dictionary(Int32, Utf8("reading"))), pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND count@0 = 632 OR town@3 = reading |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != town AND system < Float64(7) AND (count = UInt64(632) OR town = Dictionary(Int32, Utf8("reading"))), pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00');
-- Results After Sorting
++

View File

@ -9,30 +9,30 @@
+------+------+----------------------+
-- SQL: EXPLAIN SELECT * FROM cpu order by host, load, time;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.load ASC NULLS LAST, cpu.time ASC NULLS LAST |
| | Projection: cpu.host, cpu.load, cpu.time |
| | TableScan: cpu projection=[host, load, time] |
| physical_plan | SortExec: [host@0 ASC NULLS LAST,load@1 ASC NULLS LAST,time@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [host@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] |
| | SortExec: [host@0 ASC,time@2 ASC] |
| | UnionExec |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.load ASC NULLS LAST, cpu.time ASC NULLS LAST |
| | Projection: cpu.host, cpu.load, cpu.time |
| | TableScan: cpu projection=[host, load, time] |
| physical_plan | SortExec: [host@0 ASC NULLS LAST,load@1 ASC NULLS LAST,time@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [host@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] |
| | SortExec: [host@0 ASC,time@2 ASC] |
| | UnionExec |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * FROM cpu WHERE host != 'b' ORDER BY host,time;
+------+------+----------------------+
| host | load | time |
@ -42,30 +42,30 @@
+------+------+----------------------+
-- SQL: EXPLAIN SELECT * FROM cpu WHERE host != 'b' ORDER BY host,time;
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.time ASC NULLS LAST |
| | Projection: cpu.host, cpu.load, cpu.time |
| | Filter: cpu.host != Dictionary(Int32, Utf8("b")) |
| | TableScan: cpu projection=[host, load, time], partial_filters=[cpu.host != Dictionary(Int32, Utf8("b"))] |
| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: host@0 != b |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [host@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] |
| | SortExec: [host@0 ASC,time@2 ASC] |
| | UnionExec |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.time ASC NULLS LAST |
| | Projection: cpu.host, cpu.load, cpu.time |
| | Filter: cpu.host != Dictionary(Int32, Utf8("b")) |
| | TableScan: cpu projection=[host, load, time], partial_filters=[cpu.host != Dictionary(Int32, Utf8("b"))] |
| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |
| | CoalescePartitionsExec |
| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: host@0 != b |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [host@0 ASC,time@2 ASC] |
| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] |
| | SortExec: [host@0 ASC,time@2 ASC] |
| | UnionExec |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -14,25 +14,25 @@
+---------+------------+-------+------+--------------------------------+
-- SQL: EXPLAIN SELECT * from h2o;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city, other_temp, state, temp, time] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[city, other_temp, state, temp, time] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select temp, other_temp, time from h2o;
-- Results After Sorting
+------+------------+--------------------------------+
@ -48,50 +48,50 @@
+------+------------+--------------------------------+
-- SQL: EXPLAIN select temp, other_temp, time from h2o;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | UnionExec |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[other_temp, temp, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | UnionExec |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[other_temp, temp, time] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from h2o where time >= to_timestamp('1970-01-01T00:00:00.000000250+00:00');
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | Filter: h2o.time >= TimestampNanosecond(250, None) |
| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@4 >= 250 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | Filter: h2o.time >= TimestampNanosecond(250, None) |
| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: time@4 >= 250 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | UnionExec |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -10,20 +10,20 @@
+--------+------------+-------+------+--------------------------------+
-- SQL: EXPLAIN SELECT * from h2o;
-- Results After Normalizing UUIDs
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time |
| | TableScan: h2o projection=[city, other_temp, state, temp, time] |
| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: select temp, other_temp, time from h2o;
+------+------------+--------------------------------+
| temp | other_temp | time |
@ -34,18 +34,18 @@
+------+------------+--------------------------------+
-- SQL: EXPLAIN select temp, other_temp, time from h2o;
-- Results After Normalizing UUIDs
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time |
| | TableScan: h2o projection=[other_temp, temp, time] |
| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] |
| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] |
| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | UnionExec |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] |
| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] |
| | RecordBatchesExec: batches_groups=1 batches=1 |
| | |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -116,6 +116,22 @@ async fn test_cases_new_sql_system_tables_sql() {
.expect("flush worked");
}
#[tokio::test]
// Tests from "periods.sql",
async fn test_cases_periods_sql() {
test_helpers::maybe_start_logging();
let input_path = Path::new("cases").join("in").join("periods.sql");
let mut runner = Runner::new();
runner
.run(input_path)
.await
.expect("test failed");
runner
.flush()
.expect("flush worked");
}
#[tokio::test]
// Tests from "pushdown.sql",
async fn test_cases_pushdown_sql() {

View File

@ -22,9 +22,11 @@ pub async fn run_series_set_plan_maybe_error(
ctx: &IOxSessionContext,
plans: SeriesSetPlans,
) -> Result<Vec<String>, DataFusionError> {
use std::sync::Arc;
use futures::TryStreamExt;
ctx.to_series_and_groups(plans)
ctx.to_series_and_groups(plans, Arc::clone(&ctx.inner().runtime_env().memory_manager))
.await?
.map_ok(|series_or_group| series_or_group.to_string())
.try_collect()

View File

@ -66,6 +66,7 @@ pub fn get_all_setups() -> &'static HashMap<String, Arc<dyn DbSetup>> {
register_setup!(ManyFieldsSeveralChunks),
register_setup!(TwoChunksMissingColumns),
register_setup!(AllTypes),
register_setup!(PeriodsInNames),
register_setup!(TwoChunksDedupWeirdnessParquet),
register_setup!(TwoChunksDedupWeirdnessParquetIngester),
register_setup!(ThreeChunksWithRetention),

View File

@ -35,7 +35,7 @@ async fn run_table_schema_test_case<D>(
let ctx = db.new_query_context(None);
let chunks = db
.chunks(table_name, &Default::default(), &None, ctx)
.chunks(table_name, &Default::default(), None, ctx)
.await
.expect("error getting chunks");
for chunk in chunks {

View File

@ -12,7 +12,7 @@ use thiserror::Error;
use crate::namespace_cache::NamespaceCache;
pub mod mock;
mod ns_autocreation;
pub(crate) mod ns_autocreation;
pub use ns_autocreation::*;
/// Error states encountered during [`NamespaceId`] lookup.

View File

@ -9,12 +9,27 @@ use thiserror::Error;
use super::NamespaceResolver;
use crate::namespace_cache::NamespaceCache;
/// What to do when the namespace doesn't exist
#[derive(Debug)]
pub enum MissingNamespaceAction {
/// Automatically create the namespace using the given retention period.
AutoCreate(Option<i64>),
/// Reject the write.
Reject,
}
/// An error auto-creating the request namespace.
#[derive(Debug, Error)]
pub enum NamespaceCreationError {
/// An error returned from a namespace creation request.
#[error("failed to create namespace: {0}")]
Create(iox_catalog::interface::Error),
/// The write is to be rejected because the namespace doesn't exist and auto-creation is
/// disabled.
#[error("rejecting write due to non-existing namespace: {0}")]
Reject(String),
}
/// A layer to populate the [`Catalog`] with all the namespaces the router
@ -30,7 +45,7 @@ pub struct NamespaceAutocreation<C, T> {
topic_id: TopicId,
query_id: QueryPoolId,
retention_period_ns: Option<i64>,
action: MissingNamespaceAction,
}
impl<C, T> NamespaceAutocreation<C, T> {
@ -48,7 +63,7 @@ impl<C, T> NamespaceAutocreation<C, T> {
catalog: Arc<dyn Catalog>,
topic_id: TopicId,
query_id: QueryPoolId,
retention_period_ns: Option<i64>,
action: MissingNamespaceAction,
) -> Self {
Self {
inner,
@ -56,7 +71,7 @@ impl<C, T> NamespaceAutocreation<C, T> {
catalog,
topic_id,
query_id,
retention_period_ns,
action,
}
}
}
@ -74,32 +89,40 @@ where
namespace: &NamespaceName<'static>,
) -> Result<NamespaceId, super::Error> {
if self.cache.get_schema(namespace).is_none() {
trace!(%namespace, "namespace auto-create cache miss");
trace!(%namespace, "namespace not found in cache");
let mut repos = self.catalog.repositories().await;
match repos
.namespaces()
.create(
namespace.as_str(),
self.retention_period_ns,
self.topic_id,
self.query_id,
)
.await
{
Ok(_) => {
debug!(%namespace, "created namespace");
match self.action {
MissingNamespaceAction::Reject => {
debug!(%namespace, "namespace not in catalog and autocreation disabled");
return Err(NamespaceCreationError::Reject(namespace.into()).into());
}
Err(iox_catalog::interface::Error::NameExists { .. }) => {
// Either the cache has not yet converged to include this
// namespace, or another thread raced populating the catalog
// and beat this thread to it.
debug!(%namespace, "spurious namespace create failed");
}
Err(e) => {
error!(error=%e, %namespace, "failed to auto-create namespace");
return Err(NamespaceCreationError::Create(e).into());
MissingNamespaceAction::AutoCreate(retention_period_ns) => {
match repos
.namespaces()
.create(
namespace.as_str(),
retention_period_ns,
self.topic_id,
self.query_id,
)
.await
{
Ok(_) => {
debug!(%namespace, "created namespace");
}
Err(iox_catalog::interface::Error::NameExists { .. }) => {
// Either the cache has not yet converged to include this
// namespace, or another thread raced populating the catalog
// and beat this thread to it.
debug!(%namespace, "spurious namespace create failed");
}
Err(e) => {
error!(error=%e, %namespace, "failed to auto-create namespace");
return Err(NamespaceCreationError::Create(e).into());
}
}
}
}
}
@ -112,6 +135,7 @@ where
mod tests {
use std::sync::Arc;
use assert_matches::assert_matches;
use data_types::{Namespace, NamespaceId, NamespaceSchema};
use iox_catalog::mem::MemCatalog;
@ -152,7 +176,7 @@ mod tests {
Arc::clone(&catalog),
TopicId::new(42),
QueryPoolId::new(42),
TEST_RETENTION_PERIOD_NS,
MissingNamespaceAction::AutoCreate(TEST_RETENTION_PERIOD_NS),
);
// Drive the code under test
@ -190,7 +214,7 @@ mod tests {
Arc::clone(&catalog),
TopicId::new(42),
QueryPoolId::new(42),
TEST_RETENTION_PERIOD_NS,
MissingNamespaceAction::AutoCreate(TEST_RETENTION_PERIOD_NS),
);
let created_id = creator
@ -222,4 +246,34 @@ mod tests {
}
);
}
#[tokio::test]
async fn test_reject() {
let ns = NamespaceName::try_from("bananas").unwrap();
let cache = Arc::new(MemoryNamespaceCache::default());
let metrics = Arc::new(metric::Registry::new());
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(metrics));
let creator = NamespaceAutocreation::new(
MockNamespaceResolver::default().with_mapping(ns.clone(), NamespaceId::new(1)),
cache,
Arc::clone(&catalog),
TopicId::new(42),
QueryPoolId::new(42),
MissingNamespaceAction::Reject,
);
// It should not autocreate because we specified "rejection" behaviour, above
assert_matches!(
creator.get_namespace_id(&ns).await,
Err(crate::namespace_resolver::Error::Create(
NamespaceCreationError::Reject(_ns)
))
);
// Make double-sure it wasn't created in the catalog
let mut repos = catalog.repositories().await;
assert_matches!(repos.namespaces().get_by_name(ns.as_str()).await, Ok(None));
}
}

View File

@ -114,6 +114,10 @@ impl Error {
StatusCode::UNSUPPORTED_MEDIA_TYPE
}
Error::DmlHandler(err) => StatusCode::from(err),
// Error from the namespace resolver is 4xx if autocreation is disabled, 5xx otherwise
Error::NamespaceResolver(crate::namespace_resolver::Error::Create(
crate::namespace_resolver::ns_autocreation::NamespaceCreationError::Reject(_),
)) => StatusCode::BAD_REQUEST,
Error::NamespaceResolver(_) => StatusCode::INTERNAL_SERVER_ERROR,
Error::RequestLimit => StatusCode::SERVICE_UNAVAILABLE,
}

View File

@ -19,7 +19,7 @@ use router::{
ShardedWriteBuffer, WriteSummaryAdapter,
},
namespace_cache::{MemoryNamespaceCache, ShardedCache},
namespace_resolver::{NamespaceAutocreation, NamespaceSchemaResolver},
namespace_resolver::{MissingNamespaceAction, NamespaceAutocreation, NamespaceSchemaResolver},
server::http::HttpDelegate,
shard::Shard,
};
@ -78,7 +78,7 @@ type HttpDelegateStack = HttpDelegate<
/// A [`router`] stack configured with the various DML handlers using mock
/// catalog / write buffer backends.
impl TestContext {
pub fn new(ns_autocreate_retention_period_ns: Option<i64>) -> Self {
pub fn new(autocreate_ns: bool, ns_autocreate_retention_period_ns: Option<i64>) -> Self {
let metrics = Arc::new(metric::Registry::default());
let time = iox_time::MockProvider::new(
iox_time::Time::from_timestamp_millis(668563200000).unwrap(),
@ -131,7 +131,13 @@ impl TestContext {
Arc::clone(&catalog),
TopicId::new(TEST_TOPIC_ID),
QueryPoolId::new(TEST_QUERY_POOL_ID),
ns_autocreate_retention_period_ns,
{
if autocreate_ns {
MissingNamespaceAction::AutoCreate(ns_autocreate_retention_period_ns)
} else {
MissingNamespaceAction::Reject
}
},
);
let delegate = HttpDelegate::new(1024, 100, namespace_resolver, handler_stack, &metrics);
@ -187,13 +193,13 @@ impl TestContext {
impl Default for TestContext {
fn default() -> Self {
Self::new(None)
Self::new(true, None)
}
}
#[tokio::test]
async fn test_write_ok() {
let ctx = TestContext::new(None);
let ctx = TestContext::new(true, None);
// Write data inside retention period
let now = SystemProvider::default()
@ -279,7 +285,7 @@ async fn test_write_ok() {
#[tokio::test]
async fn test_write_outside_retention_period() {
let ctx = TestContext::new(TEST_RETENTION_PERIOD_NS);
let ctx = TestContext::new(true, TEST_RETENTION_PERIOD_NS);
// Write data outside retention period into a new table
let two_hours_ago =
@ -312,7 +318,7 @@ async fn test_write_outside_retention_period() {
#[tokio::test]
async fn test_schema_conflict() {
let ctx = TestContext::new(None);
let ctx = TestContext::new(true, None);
// data inside the retention period
let now = SystemProvider::default()
@ -376,9 +382,42 @@ async fn test_schema_conflict() {
assert_eq!(err.as_status_code(), StatusCode::BAD_REQUEST);
}
#[tokio::test]
async fn test_rejected_ns() {
let ctx = TestContext::new(false, None);
let now = SystemProvider::default()
.now()
.timestamp_nanos()
.to_string();
let lp = "platanos,tag1=A,tag2=B val=42i ".to_string() + &now;
let request = Request::builder()
.uri("https://bananas.example/api/v2/write?org=bananas&bucket=test")
.method("POST")
.body(Body::from(lp))
.expect("failed to construct HTTP request");
let err = ctx
.delegate()
.route(request)
.await
.expect_err("should error");
assert_matches!(
err,
router::server::http::Error::NamespaceResolver(
// can't check the type of the create error without making ns_autocreation public, but
// not worth it just for this test, as the correct error is asserted in unit tests in
// that module. here it's just important that the write fails.
router::namespace_resolver::Error::Create(_)
)
);
assert_eq!(err.as_status_code(), StatusCode::BAD_REQUEST);
}
#[tokio::test]
async fn test_schema_limit() {
let ctx = TestContext::new(None);
let ctx = TestContext::new(true, None);
let now = SystemProvider::default()
.now()
@ -444,7 +483,7 @@ async fn test_schema_limit() {
#[tokio::test]
async fn test_write_propagate_ids() {
let ctx = TestContext::new(None);
let ctx = TestContext::new(true, None);
// Create the namespace and a set of tables.
let ns = ctx
@ -525,7 +564,7 @@ async fn test_write_propagate_ids() {
#[tokio::test]
async fn test_delete_propagate_ids() {
let ctx = TestContext::new(None);
let ctx = TestContext::new(true, None);
// Create the namespace and a set of tables.
let ns = ctx

View File

@ -375,7 +375,7 @@ impl Schema {
/// Return names of the columns of given indexes with all PK columns (tags and time)
/// If the columns are not provided, return all columns
pub fn select_given_and_pk_columns(&self, cols: &Option<Vec<usize>>) -> Vec<String> {
pub fn select_given_and_pk_columns(&self, cols: Option<&Vec<usize>>) -> Vec<String> {
match cols {
Some(cols) => {
let mut columns = cols

View File

@ -1348,7 +1348,10 @@ where
// Execute the plans.
let db_name = db_name.to_owned();
let series_or_groups = ctx
.to_series_and_groups(series_plan)
.to_series_and_groups(
series_plan,
Arc::clone(&ctx.inner().runtime_env().memory_manager),
)
.await
.context(FilteringSeriesSnafu {
db_name: db_name.clone(),
@ -1413,7 +1416,10 @@ where
// Execute the plans
let db_name = db_name.to_owned();
let series_or_groups = ctx
.to_series_and_groups(grouped_series_set_plan)
.to_series_and_groups(
grouped_series_set_plan,
Arc::clone(&ctx.inner().runtime_env().memory_manager),
)
.await
.context(GroupingSeriesSnafu {
db_name: db_name.clone(),

View File

@ -23,9 +23,10 @@ bitflags = { version = "1" }
byteorder = { version = "1", features = ["std"] }
bytes = { version = "1", features = ["std"] }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] }
clap = { version = "4", features = ["color", "derive", "env", "error-context", "help", "std", "suggestions", "usage"] }
crossbeam-utils = { version = "0.8", features = ["std"] }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "799dd747152f6574638a844986b8ea8470d3f4d6", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fbadebb894672f61327a30f77cda2ee88a343b2a", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] }
digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] }
either = { version = "1", features = ["use_std"] }
fixedbitset = { version = "0.4", features = ["std"] }
@ -136,7 +137,6 @@ url = { version = "2" }
uuid = { version = "1", features = ["getrandom", "rng", "std", "v4"] }
[target.x86_64-unknown-linux-gnu.dependencies]
io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] }
@ -144,7 +144,6 @@ rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "t
once_cell = { version = "1", default-features = false, features = ["unstable"] }
[target.x86_64-apple-darwin.dependencies]
io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] }
@ -152,7 +151,6 @@ rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "t
once_cell = { version = "1", default-features = false, features = ["unstable"] }
[target.aarch64-apple-darwin.dependencies]
io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] }